Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.0
| @@ -358,7 +358,7 @@ typedef struct SwsContext { | |||
| uint8_t *table_gU[256 + 2*YUVRGB_TABLE_HEADROOM]; | |||
| int table_gV[256 + 2*YUVRGB_TABLE_HEADROOM]; | |||
| uint8_t *table_bU[256 + 2*YUVRGB_TABLE_HEADROOM]; | |||
| int32_t input_rgb2yuv_table[16+32*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points | |||
| int32_t input_rgb2yuv_table[16+40*4]; // This table can contain both C and SIMD formatted values, teh C vales are always at the XY_IDX points | |||
| #define RY_IDX 0 | |||
| #define GY_IDX 1 | |||
| #define BY_IDX 2 | |||
| @@ -834,7 +834,18 @@ static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) | |||
| RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, | |||
| BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, BV_IDX, RV_IDX, | |||
| GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , | |||
| -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, | |||
| -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, -1 , GV_IDX, //23 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //24 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //25 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //26 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //27 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //28 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //29 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //30 | |||
| -1 , -1 , -1 , -1 , -1 , -1 , -1 , -1 , //31 | |||
| BY_IDX, GY_IDX, RY_IDX, -1 , -1 , -1 , -1 , -1 , //32 | |||
| BU_IDX, GU_IDX, RU_IDX, -1 , -1 , -1 , -1 , -1 , //33 | |||
| BV_IDX, GV_IDX, RV_IDX, -1 , -1 , -1 , -1 , -1 , //34 | |||
| }; | |||
| dstRange = 0; //FIXME range = 1 is handled elsewhere | |||
| @@ -874,7 +885,6 @@ static void fill_rgb2yuv_table(SwsContext *c, const int table[4], int dstRange) | |||
| c->input_rgb2yuv_table[RV_IDX] = ((int)(0.500 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); | |||
| c->input_rgb2yuv_table[RU_IDX] = (-(int)(0.169 * 224 / 255 * (1 << RGB2YUV_SHIFT) + 0.5)); | |||
| } | |||
| for(i=0; i<FF_ARRAY_ELEMS(map); i++) | |||
| AV_WL16(p + 16*4 + 2*i, map[i] >= 0 ? c->input_rgb2yuv_table[map[i]] : 0); | |||
| } | |||
| @@ -1615,6 +1615,9 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| int lumStride, int chromStride, int srcStride, | |||
| int32_t *rgb2yuv) | |||
| { | |||
| #define BGR2Y_IDX "16*4+16*32" | |||
| #define BGR2U_IDX "16*4+16*33" | |||
| #define BGR2V_IDX "16*4+16*34" | |||
| int y; | |||
| const x86_reg chromWidth= width>>1; | |||
| for (y=0; y<height-2; y+=2) { | |||
| @@ -1622,7 +1625,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| for (i=0; i<2; i++) { | |||
| __asm__ volatile( | |||
| "mov %2, %%"REG_a" \n\t" | |||
| "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | |||
| "movq "BGR2Y_IDX"(%3), %%mm6 \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| @@ -1682,7 +1685,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" | |||
| "add $8, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) | |||
| : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width), "r"(rgb2yuv) | |||
| : "%"REG_a, "%"REG_d | |||
| ); | |||
| ydst += lumStride; | |||
| @@ -1692,7 +1695,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| __asm__ volatile( | |||
| "mov %4, %%"REG_a" \n\t" | |||
| "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | |||
| "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" | |||
| "movq "BGR2U_IDX"(%5), %%mm6 \n\t" | |||
| "pxor %%mm7, %%mm7 \n\t" | |||
| "lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" | |||
| "add %%"REG_d", %%"REG_d" \n\t" | |||
| @@ -1741,8 +1744,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "psrlw $2, %%mm0 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "BGR2V_IDX"(%5), %%mm1 \n\t" | |||
| "movq "BGR2V_IDX"(%5), %%mm3 \n\t" | |||
| "pmaddwd %%mm0, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -1801,8 +1804,8 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "psrlw $2, %%mm4 \n\t" | |||
| "psrlw $2, %%mm2 \n\t" | |||
| #endif | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm1 \n\t" | |||
| "movq "MANGLE(ff_bgr2VCoeff)", %%mm3 \n\t" | |||
| "movq "BGR2V_IDX"(%5), %%mm1 \n\t" | |||
| "movq "BGR2V_IDX"(%5), %%mm3 \n\t" | |||
| "pmaddwd %%mm4, %%mm1 \n\t" | |||
| "pmaddwd %%mm2, %%mm3 \n\t" | |||
| @@ -1830,7 +1833,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ | |||
| "movd %%mm0, (%3, %%"REG_a") \n\t" | |||
| "add $4, %%"REG_a" \n\t" | |||
| " js 1b \n\t" | |||
| : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) | |||
| : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth), "r"(rgb2yuv) | |||
| : "%"REG_a, "%"REG_d | |||
| ); | |||