|
|
@@ -0,0 +1,438 @@ |
|
|
|
|
|
From d743a2c12e889f7605a56f5144ae2e3899c9dd4f Mon Sep 17 00:00:00 2001 |
|
|
|
|
|
From: DRC <information@libjpeg-turbo.org> |
|
|
|
|
|
Date: Thu, 2 Feb 2023 08:55:37 -0600 |
|
|
|
|
|
Subject: [PATCH] SIMD/x86: Initialize simd_support before every use |
|
|
|
|
|
|
|
|
|
|
|
As long as a libjpeg instance is only used by one thread at a time, a |
|
|
|
|
|
program is technically within its rights to call jpeg_start_*compress() |
|
|
|
|
|
in one thread and jpeg_(read|write)_*(), with the same libjpeg instance, |
|
|
|
|
|
in a second thread. However, because the various jsimd_can*() functions |
|
|
|
|
|
are called within the body of jpeg_start_*compress() and simd_support is |
|
|
|
|
|
now thread-local (due to f579cc11b33e5bfeb9931e37cc74b4a33c95d2e6), that |
|
|
|
|
|
led to a situation in which simd_support was initialized in the first |
|
|
|
|
|
thread but not the second. The uninitialized value of simd_support is |
|
|
|
|
|
0xFFFFFFFF, which the second thread interpreted to mean that it could |
|
|
|
|
|
use any instruction set, and when it attempted to use AVX2 instructions |
|
|
|
|
|
on a CPU that didn't support them, an illegal instruction error |
|
|
|
|
|
occurred. |
|
|
|
|
|
|
|
|
|
|
|
This issue was known to affect libvips. |
|
|
|
|
|
|
|
|
|
|
|
This commit modifies the i386 and x86-64 SIMD dispatchers so that the |
|
|
|
|
|
various jsimd_*() functions always call init_simd(), if simd_support is |
|
|
|
|
|
uninitialized, prior to dispatching based on the value of simd_support. |
|
|
|
|
|
Note that the other SIMD dispatchers don't need this, because only the |
|
|
|
|
|
x86 SIMD extensions currently support multiple instruction sets. |
|
|
|
|
|
|
|
|
|
|
|
This patch has been verified to be performance-neutral to within |
|
|
|
|
|
+/- 0.4% with 32-bit and 64-bit code running on a 2.8 GHz Intel Xeon |
|
|
|
|
|
W3530 and a 3.6 GHz Intel Xeon W2123. |
|
|
|
|
|
|
|
|
|
|
|
Fixes #649 |
|
|
|
|
|
--- |
|
|
|
|
|
simd/i386/jsimd.c | 71 ++++++++++++++++++++++++++++++++++++++++++++- |
|
|
|
|
|
simd/x86_64/jsimd.c | 47 +++++++++++++++++++++++++++++- |
|
|
|
|
|
2 files changed, 116 insertions(+), 2 deletions(-) |
|
|
|
|
|
|
|
|
|
|
|
--- a/simd/i386/jsimd.c |
|
|
|
|
|
+++ b/simd/i386/jsimd.c |
|
|
|
|
|
@@ -2,7 +2,7 @@ |
|
|
|
|
|
* jsimd_i386.c |
|
|
|
|
|
* |
|
|
|
|
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
|
|
|
|
|
- * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022, D. R. Commander. |
|
|
|
|
|
+ * Copyright (C) 2009-2011, 2013-2014, 2016, 2018, 2022-2023, D. R. Commander. |
|
|
|
|
|
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. |
|
|
|
|
|
* |
|
|
|
|
|
* Based on the x86 SIMD extension for IJG JPEG library, |
|
|
|
|
|
@@ -158,6 +158,9 @@ |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->in_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_extrgb_ycc_convert_avx2; |
|
|
|
|
|
@@ -217,6 +220,9 @@ |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
void (*mmxfct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->in_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_extrgb_gray_convert_avx2; |
|
|
|
|
|
@@ -276,6 +282,9 @@ |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
|
|
|
|
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_ycc_extrgb_convert_avx2; |
|
|
|
|
|
@@ -379,6 +388,9 @@ |
|
|
|
|
|
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->v_samp_factor, |
|
|
|
|
|
@@ -399,6 +411,9 @@ |
|
|
|
|
|
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->v_samp_factor, |
|
|
|
|
|
@@ -461,6 +476,9 @@ |
|
|
|
|
|
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, |
|
|
|
|
|
input_data, output_data_ptr); |
|
|
|
|
|
@@ -476,6 +494,9 @@ |
|
|
|
|
|
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, |
|
|
|
|
|
input_data, output_data_ptr); |
|
|
|
|
|
@@ -537,6 +558,9 @@ |
|
|
|
|
|
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->downsampled_width, input_data, |
|
|
|
|
|
@@ -555,6 +579,9 @@ |
|
|
|
|
|
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->downsampled_width, input_data, |
|
|
|
|
|
@@ -623,6 +650,9 @@ |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; |
|
|
|
|
|
@@ -681,6 +711,9 @@ |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
void (*mmxfct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; |
|
|
|
|
|
@@ -785,6 +818,9 @@ |
|
|
|
|
|
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, |
|
|
|
|
|
DCTELEM *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_convsamp_avx2(sample_data, start_col, workspace); |
|
|
|
|
|
else if (simd_support & JSIMD_SSE2) |
|
|
|
|
|
@@ -797,6 +833,9 @@ |
|
|
|
|
|
jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col, |
|
|
|
|
|
FAST_FLOAT *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_SSE2) |
|
|
|
|
|
jsimd_convsamp_float_sse2(sample_data, start_col, workspace); |
|
|
|
|
|
else if (simd_support & JSIMD_SSE) |
|
|
|
|
|
@@ -867,6 +906,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_fdct_islow(DCTELEM *data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_fdct_islow_avx2(data); |
|
|
|
|
|
else if (simd_support & JSIMD_SSE2) |
|
|
|
|
|
@@ -878,6 +920,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_fdct_ifast(DCTELEM *data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_fdct_islow_sse2)) |
|
|
|
|
|
jsimd_fdct_ifast_sse2(data); |
|
|
|
|
|
else |
|
|
|
|
|
@@ -887,6 +932,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_fdct_float(FAST_FLOAT *data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE) && IS_ALIGNED_SSE(jconst_fdct_float_sse)) |
|
|
|
|
|
jsimd_fdct_float_sse(data); |
|
|
|
|
|
else if (simd_support & JSIMD_3DNOW) |
|
|
|
|
|
@@ -942,6 +990,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_quantize_avx2(coef_block, divisors, workspace); |
|
|
|
|
|
else if (simd_support & JSIMD_SSE2) |
|
|
|
|
|
@@ -954,6 +1005,9 @@ |
|
|
|
|
|
jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors, |
|
|
|
|
|
FAST_FLOAT *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_SSE2) |
|
|
|
|
|
jsimd_quantize_float_sse2(coef_block, divisors, workspace); |
|
|
|
|
|
else if (simd_support & JSIMD_SSE) |
|
|
|
|
|
@@ -1017,6 +1071,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
|
|
|
|
jsimd_idct_2x2_sse2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |
|
|
|
|
|
@@ -1029,6 +1086,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_red_sse2)) |
|
|
|
|
|
jsimd_idct_4x4_sse2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |
|
|
|
|
|
@@ -1123,6 +1183,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |
|
|
|
|
|
@@ -1139,6 +1202,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_ifast_sse2)) |
|
|
|
|
|
jsimd_idct_ifast_sse2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |
|
|
|
|
|
@@ -1152,6 +1218,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if ((simd_support & JSIMD_SSE2) && IS_ALIGNED_SSE(jconst_idct_float_sse2)) |
|
|
|
|
|
jsimd_idct_float_sse2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |
|
|
|
|
|
--- a/simd/x86_64/jsimd.c |
|
|
|
|
|
+++ b/simd/x86_64/jsimd.c |
|
|
|
|
|
@@ -2,7 +2,7 @@ |
|
|
|
|
|
* jsimd_x86_64.c |
|
|
|
|
|
* |
|
|
|
|
|
* Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB |
|
|
|
|
|
- * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022, D. R. Commander. |
|
|
|
|
|
+ * Copyright (C) 2009-2011, 2014, 2016, 2018, 2022-2023, D. R. Commander. |
|
|
|
|
|
* Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois. |
|
|
|
|
|
* |
|
|
|
|
|
* Based on the x86 SIMD extension for IJG JPEG library, |
|
|
|
|
|
@@ -145,6 +145,9 @@ |
|
|
|
|
|
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->in_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_extrgb_ycc_convert_avx2; |
|
|
|
|
|
@@ -194,6 +197,9 @@ |
|
|
|
|
|
void (*avx2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPARRAY, JSAMPIMAGE, JDIMENSION, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->in_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_extrgb_gray_convert_avx2; |
|
|
|
|
|
@@ -243,6 +249,9 @@ |
|
|
|
|
|
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY, int); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_ycc_extrgb_convert_avx2; |
|
|
|
|
|
@@ -333,6 +342,9 @@ |
|
|
|
|
|
jsimd_h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->v_samp_factor, |
|
|
|
|
|
@@ -349,6 +361,9 @@ |
|
|
|
|
|
jsimd_h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY output_data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_downsample_avx2(cinfo->image_width, cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->v_samp_factor, |
|
|
|
|
|
@@ -403,6 +418,9 @@ |
|
|
|
|
|
jsimd_h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, |
|
|
|
|
|
input_data, output_data_ptr); |
|
|
|
|
|
@@ -415,6 +433,9 @@ |
|
|
|
|
|
jsimd_h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_upsample_avx2(cinfo->max_v_samp_factor, cinfo->output_width, |
|
|
|
|
|
input_data, output_data_ptr); |
|
|
|
|
|
@@ -469,6 +490,9 @@ |
|
|
|
|
|
jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v2_fancy_upsample_avx2(cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->downsampled_width, input_data, |
|
|
|
|
|
@@ -483,6 +507,9 @@ |
|
|
|
|
|
jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr, |
|
|
|
|
|
JSAMPARRAY input_data, JSAMPARRAY *output_data_ptr) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_h2v1_fancy_upsample_avx2(cinfo->max_v_samp_factor, |
|
|
|
|
|
compptr->downsampled_width, input_data, |
|
|
|
|
|
@@ -542,6 +569,9 @@ |
|
|
|
|
|
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_h2v2_extrgb_merged_upsample_avx2; |
|
|
|
|
|
@@ -590,6 +620,9 @@ |
|
|
|
|
|
void (*avx2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
void (*sse2fct) (JDIMENSION, JSAMPIMAGE, JDIMENSION, JSAMPARRAY); |
|
|
|
|
|
|
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
switch (cinfo->out_color_space) { |
|
|
|
|
|
case JCS_EXT_RGB: |
|
|
|
|
|
avx2fct = jsimd_h2v1_extrgb_merged_upsample_avx2; |
|
|
|
|
|
@@ -679,6 +712,9 @@ |
|
|
|
|
|
jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col, |
|
|
|
|
|
DCTELEM *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_convsamp_avx2(sample_data, start_col, workspace); |
|
|
|
|
|
else |
|
|
|
|
|
@@ -748,6 +784,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_fdct_islow(DCTELEM *data) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_fdct_islow_avx2(data); |
|
|
|
|
|
else |
|
|
|
|
|
@@ -809,6 +848,9 @@ |
|
|
|
|
|
GLOBAL(void) |
|
|
|
|
|
jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_quantize_avx2(coef_block, divisors, workspace); |
|
|
|
|
|
else |
|
|
|
|
|
@@ -963,6 +1005,9 @@ |
|
|
|
|
|
JCOEFPTR coef_block, JSAMPARRAY output_buf, |
|
|
|
|
|
JDIMENSION output_col) |
|
|
|
|
|
{ |
|
|
|
|
|
+ if (simd_support == ~0U) |
|
|
|
|
|
+ init_simd(); |
|
|
|
|
|
+ |
|
|
|
|
|
if (simd_support & JSIMD_AVX2) |
|
|
|
|
|
jsimd_idct_islow_avx2(compptr->dct_table, coef_block, output_buf, |
|
|
|
|
|
output_col); |