Also add some documentation for each function to colorspacedsp.h.tags/n3.1
| @@ -100,45 +100,45 @@ static void multiply3x3_c(int16_t *buf[3], ptrdiff_t stride, | |||
| void ff_colorspacedsp_init(ColorSpaceDSPContext *dsp) | |||
| { | |||
| #define init_yuv2rgb_fn(idx, bit) \ | |||
| dsp->yuv2rgb[idx][0] = yuv2rgb_444p##bit##_c; \ | |||
| dsp->yuv2rgb[idx][1] = yuv2rgb_422p##bit##_c; \ | |||
| dsp->yuv2rgb[idx][2] = yuv2rgb_420p##bit##_c | |||
| init_yuv2rgb_fn(0, 8); | |||
| init_yuv2rgb_fn(1, 10); | |||
| init_yuv2rgb_fn(2, 12); | |||
| #define init_rgb2yuv_fn(idx, bit) \ | |||
| dsp->rgb2yuv[idx][0] = rgb2yuv_444p##bit##_c; \ | |||
| dsp->rgb2yuv[idx][1] = rgb2yuv_422p##bit##_c; \ | |||
| dsp->rgb2yuv[idx][2] = rgb2yuv_420p##bit##_c | |||
| init_rgb2yuv_fn(0, 8); | |||
| init_rgb2yuv_fn(1, 10); | |||
| init_rgb2yuv_fn(2, 12); | |||
| #define init_rgb2yuv_fsb_fn(idx, bit) \ | |||
| dsp->rgb2yuv_fsb[idx][0] = rgb2yuv_fsb_444p##bit##_c; \ | |||
| dsp->rgb2yuv_fsb[idx][1] = rgb2yuv_fsb_422p##bit##_c; \ | |||
| dsp->rgb2yuv_fsb[idx][2] = rgb2yuv_fsb_420p##bit##_c | |||
| init_rgb2yuv_fsb_fn(0, 8); | |||
| init_rgb2yuv_fsb_fn(1, 10); | |||
| init_rgb2yuv_fsb_fn(2, 12); | |||
| #define init_yuv2yuv_fn(idx1, idx2, bit1, bit2) \ | |||
| dsp->yuv2yuv[idx1][idx2][0] = yuv2yuv_444p##bit1##to##bit2##_c; \ | |||
| dsp->yuv2yuv[idx1][idx2][1] = yuv2yuv_422p##bit1##to##bit2##_c; \ | |||
| dsp->yuv2yuv[idx1][idx2][2] = yuv2yuv_420p##bit1##to##bit2##_c | |||
| #define init_yuv2yuv_fns(idx1, bit1) \ | |||
| init_yuv2yuv_fn(idx1, 0, bit1, 8); \ | |||
| init_yuv2yuv_fn(idx1, 1, bit1, 10); \ | |||
| init_yuv2yuv_fn(idx1, 2, bit1, 12) | |||
| init_yuv2yuv_fns(0, 8); | |||
| init_yuv2yuv_fns(1, 10); | |||
| init_yuv2yuv_fns(2, 12); | |||
| #define init_yuv2rgb_fn(bit) \ | |||
| dsp->yuv2rgb[BPP_##bit][SS_444] = yuv2rgb_444p##bit##_c; \ | |||
| dsp->yuv2rgb[BPP_##bit][SS_422] = yuv2rgb_422p##bit##_c; \ | |||
| dsp->yuv2rgb[BPP_##bit][SS_420] = yuv2rgb_420p##bit##_c | |||
| init_yuv2rgb_fn( 8); | |||
| init_yuv2rgb_fn(10); | |||
| init_yuv2rgb_fn(12); | |||
| #define init_rgb2yuv_fn(bit) \ | |||
| dsp->rgb2yuv[BPP_##bit][SS_444] = rgb2yuv_444p##bit##_c; \ | |||
| dsp->rgb2yuv[BPP_##bit][SS_422] = rgb2yuv_422p##bit##_c; \ | |||
| dsp->rgb2yuv[BPP_##bit][SS_420] = rgb2yuv_420p##bit##_c | |||
| init_rgb2yuv_fn( 8); | |||
| init_rgb2yuv_fn(10); | |||
| init_rgb2yuv_fn(12); | |||
| #define init_rgb2yuv_fsb_fn(bit) \ | |||
| dsp->rgb2yuv_fsb[BPP_##bit][SS_444] = rgb2yuv_fsb_444p##bit##_c; \ | |||
| dsp->rgb2yuv_fsb[BPP_##bit][SS_422] = rgb2yuv_fsb_422p##bit##_c; \ | |||
| dsp->rgb2yuv_fsb[BPP_##bit][SS_420] = rgb2yuv_fsb_420p##bit##_c | |||
| init_rgb2yuv_fsb_fn( 8); | |||
| init_rgb2yuv_fsb_fn(10); | |||
| init_rgb2yuv_fsb_fn(12); | |||
| #define init_yuv2yuv_fn(idx1, bit1, bit2) \ | |||
| dsp->yuv2yuv[idx1][BPP_##bit2][SS_444] = yuv2yuv_444p##bit1##to##bit2##_c; \ | |||
| dsp->yuv2yuv[idx1][BPP_##bit2][SS_422] = yuv2yuv_422p##bit1##to##bit2##_c; \ | |||
| dsp->yuv2yuv[idx1][BPP_##bit2][SS_420] = yuv2yuv_420p##bit1##to##bit2##_c | |||
| #define init_yuv2yuv_fns(bit1) \ | |||
| init_yuv2yuv_fn(BPP_##bit1, bit1, 8); \ | |||
| init_yuv2yuv_fn(BPP_##bit1, bit1, 10); \ | |||
| init_yuv2yuv_fn(BPP_##bit1, bit1, 12) | |||
| init_yuv2yuv_fns( 8); | |||
| init_yuv2yuv_fns(10); | |||
| init_yuv2yuv_fns(12); | |||
| dsp->multiply3x3 = multiply3x3_c; | |||
| @@ -42,12 +42,35 @@ typedef void (*yuv2yuv_fn)(uint8_t *yuv_out[3], const ptrdiff_t yuv_out_stride[3 | |||
| int w, int h, const int16_t yuv2yuv_coeffs[3][3][8], | |||
| const int16_t yuv_offset[2][8]); | |||
| enum BitDepthIndex { | |||
| BPP_8, | |||
| BPP_10, | |||
| BPP_12, | |||
| NB_BPP, | |||
| }; | |||
| enum ChromaSubsamplingIndex { | |||
| SS_444, | |||
| SS_422, | |||
| SS_420, | |||
| NB_SS, | |||
| }; | |||
| typedef struct ColorSpaceDSPContext { | |||
| yuv2rgb_fn yuv2rgb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; | |||
| rgb2yuv_fn rgb2yuv[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; | |||
| rgb2yuv_fsb_fn rgb2yuv_fsb[3 /* 0: 8bit, 1: 10bit, 2: 12bit */][3 /* 0: 444, 1: 422, 2: 420 */]; | |||
| yuv2yuv_fn yuv2yuv[3 /* in_depth */][3 /* out_depth */][3 /* 0: 444, 1: 422, 2: 420 */]; | |||
| /* Convert input YUV pixel buffer from a user into an internal, 15bpp array | |||
| * of intermediate RGB data. */ | |||
| yuv2rgb_fn yuv2rgb[NB_BPP][NB_SS]; | |||
| /* Convert intermediate RGB data (15bpp, internal format) into YUV data and | |||
| * store into user-provided output buffer */ | |||
| rgb2yuv_fn rgb2yuv[NB_BPP][NB_SS]; | |||
| /* Same as rgb2yuv(), but use floyd-steinberg dithering */ | |||
| rgb2yuv_fsb_fn rgb2yuv_fsb[NB_BPP][NB_SS]; | |||
| /* Direct yuv-to-yuv conversion (input and output are both user-provided | |||
| * buffers) */ | |||
| yuv2yuv_fn yuv2yuv[NB_BPP /* in */][NB_BPP /* out */][NB_SS]; | |||
| /* In-place 3x3 matrix multiplication. Input and output are both 15bpp | |||
| * (our internal data format) */ | |||
| void (*multiply3x3)(int16_t *data[3], ptrdiff_t stride, | |||
| int w, int h, const int16_t m[3][3][8]); | |||
| } ColorSpaceDSPContext; | |||
| @@ -81,38 +81,38 @@ void ff_colorspacedsp_x86_init(ColorSpaceDSPContext *dsp) | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (ARCH_X86_64 && EXTERNAL_SSE2(cpu_flags)) { | |||
| #define assign_yuv2yuv_fns(idx, ss) \ | |||
| dsp->yuv2yuv[0][0][idx] = ff_yuv2yuv_##ss##p8to8_sse2; \ | |||
| dsp->yuv2yuv[0][1][idx] = ff_yuv2yuv_##ss##p8to10_sse2; \ | |||
| dsp->yuv2yuv[0][2][idx] = ff_yuv2yuv_##ss##p8to12_sse2; \ | |||
| dsp->yuv2yuv[1][0][idx] = ff_yuv2yuv_##ss##p10to8_sse2; \ | |||
| dsp->yuv2yuv[1][1][idx] = ff_yuv2yuv_##ss##p10to10_sse2; \ | |||
| dsp->yuv2yuv[1][2][idx] = ff_yuv2yuv_##ss##p10to12_sse2; \ | |||
| dsp->yuv2yuv[2][0][idx] = ff_yuv2yuv_##ss##p12to8_sse2; \ | |||
| dsp->yuv2yuv[2][1][idx] = ff_yuv2yuv_##ss##p12to10_sse2; \ | |||
| dsp->yuv2yuv[2][2][idx] = ff_yuv2yuv_##ss##p12to12_sse2 | |||
| assign_yuv2yuv_fns(2, 420); | |||
| assign_yuv2yuv_fns(1, 422); | |||
| assign_yuv2yuv_fns(0, 444); | |||
| #define assign_yuv2rgb_fns(idx, ss) \ | |||
| dsp->yuv2rgb[0][idx] = ff_yuv2rgb_##ss##p8_sse2; \ | |||
| dsp->yuv2rgb[1][idx] = ff_yuv2rgb_##ss##p10_sse2; \ | |||
| dsp->yuv2rgb[2][idx] = ff_yuv2rgb_##ss##p12_sse2 | |||
| assign_yuv2rgb_fns(2, 420); | |||
| assign_yuv2rgb_fns(1, 422); | |||
| assign_yuv2rgb_fns(0, 444); | |||
| #define assign_rgb2yuv_fns(idx, ss) \ | |||
| dsp->rgb2yuv[0][idx] = ff_rgb2yuv_##ss##p8_sse2; \ | |||
| dsp->rgb2yuv[1][idx] = ff_rgb2yuv_##ss##p10_sse2; \ | |||
| dsp->rgb2yuv[2][idx] = ff_rgb2yuv_##ss##p12_sse2 | |||
| assign_rgb2yuv_fns(2, 420); | |||
| assign_rgb2yuv_fns(1, 422); | |||
| assign_rgb2yuv_fns(0, 444); | |||
| #define assign_yuv2yuv_fns(ss) \ | |||
| dsp->yuv2yuv[BPP_8 ][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p8to8_sse2; \ | |||
| dsp->yuv2yuv[BPP_8 ][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p8to10_sse2; \ | |||
| dsp->yuv2yuv[BPP_8 ][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p8to12_sse2; \ | |||
| dsp->yuv2yuv[BPP_10][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p10to8_sse2; \ | |||
| dsp->yuv2yuv[BPP_10][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p10to10_sse2; \ | |||
| dsp->yuv2yuv[BPP_10][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p10to12_sse2; \ | |||
| dsp->yuv2yuv[BPP_12][BPP_8 ][SS_##ss] = ff_yuv2yuv_##ss##p12to8_sse2; \ | |||
| dsp->yuv2yuv[BPP_12][BPP_10][SS_##ss] = ff_yuv2yuv_##ss##p12to10_sse2; \ | |||
| dsp->yuv2yuv[BPP_12][BPP_12][SS_##ss] = ff_yuv2yuv_##ss##p12to12_sse2 | |||
| assign_yuv2yuv_fns(420); | |||
| assign_yuv2yuv_fns(422); | |||
| assign_yuv2yuv_fns(444); | |||
| #define assign_yuv2rgb_fns(ss) \ | |||
| dsp->yuv2rgb[BPP_8 ][SS_##ss] = ff_yuv2rgb_##ss##p8_sse2; \ | |||
| dsp->yuv2rgb[BPP_10][SS_##ss] = ff_yuv2rgb_##ss##p10_sse2; \ | |||
| dsp->yuv2rgb[BPP_12][SS_##ss] = ff_yuv2rgb_##ss##p12_sse2 | |||
| assign_yuv2rgb_fns(420); | |||
| assign_yuv2rgb_fns(422); | |||
| assign_yuv2rgb_fns(444); | |||
| #define assign_rgb2yuv_fns(ss) \ | |||
| dsp->rgb2yuv[BPP_8 ][SS_##ss] = ff_rgb2yuv_##ss##p8_sse2; \ | |||
| dsp->rgb2yuv[BPP_10][SS_##ss] = ff_rgb2yuv_##ss##p10_sse2; \ | |||
| dsp->rgb2yuv[BPP_12][SS_##ss] = ff_rgb2yuv_##ss##p12_sse2 | |||
| assign_rgb2yuv_fns(420); | |||
| assign_rgb2yuv_fns(422); | |||
| assign_rgb2yuv_fns(444); | |||
| dsp->multiply3x3 = ff_multiply3x3_sse2; | |||
| } | |||