committags/n0.893681fbd50Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 11:32:32 2011 -0400 swscale: fix compile on ppc. commite758573a88Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 10:36:47 2011 -0400 swscale: fix compile on x86-32. commit0f4eb8b043Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Thu May 26 09:17:52 2011 -0400 swscale: remove VOF/VOFW. commitb4a224c5e4Author: Ronald S. Bultje <rsbultje@gmail.com> Date: Wed May 25 14:30:09 2011 -0400 swscale: split chroma buffers into separate U/V planes. Preparatory step to implement support for sizes > VOFW.
| @@ -86,9 +86,11 @@ altivec_packIntArrayToCharArray(int *val, uint8_t* dest, int dstW) | |||
| } | |||
| static inline void | |||
| yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) | |||
| yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, | |||
| int lumFilterSize, const int16_t *chrFilter, | |||
| const int16_t **chrUSrc, const int16_t **chrVSrc, | |||
| int chrFilterSize, uint8_t *dest, uint8_t *uDest, | |||
| uint8_t *vDest, int dstW, int chrDstW) | |||
| { | |||
| const vector signed int vini = {(1 << 18), (1 << 18), (1 << 18), (1 << 18)}; | |||
| register int i, j; | |||
| @@ -159,22 +161,22 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF | |||
| vChrFilter = vec_perm(vChrFilter, vChrFilter, perm0); | |||
| vChrFilter = vec_splat(vChrFilter, 0); // chrFilter[j] is loaded 8 times in vChrFilter | |||
| perm = vec_lvsl(0, chrSrc[j]); | |||
| l1 = vec_ld(0, chrSrc[j]); | |||
| l1_V = vec_ld(VOFW << 1, chrSrc[j]); | |||
| perm = vec_lvsl(0, chrUSrc[j]); | |||
| l1 = vec_ld(0, chrUSrc[j]); | |||
| l1_V = vec_ld(0, chrVSrc[j]); | |||
| for (i = 0; i < (chrDstW - 7); i+=8) { | |||
| int offset = i << 2; | |||
| vector signed short l2 = vec_ld((i << 1) + 16, chrSrc[j]); | |||
| vector signed short l2_V = vec_ld(((i + VOFW) << 1) + 16, chrSrc[j]); | |||
| vector signed short l2 = vec_ld((i << 1) + 16, chrUSrc[j]); | |||
| vector signed short l2_V = vec_ld((i << 1) + 16, chrVSrc[j]); | |||
| vector signed int v1 = vec_ld(offset, u); | |||
| vector signed int v2 = vec_ld(offset + 16, u); | |||
| vector signed int v1_V = vec_ld(offset, v); | |||
| vector signed int v2_V = vec_ld(offset + 16, v); | |||
| vector signed short ls = vec_perm(l1, l2, perm); // chrSrc[j][i] ... chrSrc[j][i+7] | |||
| vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrSrc[j][i+VOFW] ... chrSrc[j][i+2055] | |||
| vector signed short ls = vec_perm(l1, l2, perm); // chrUSrc[j][i] ... chrUSrc[j][i+7] | |||
| vector signed short ls_V = vec_perm(l1_V, l2_V, perm); // chrVSrc[j][i] ... chrVSrc[j][i] | |||
| vector signed int i1 = vec_mule(vChrFilter, ls); | |||
| vector signed int i2 = vec_mulo(vChrFilter, ls); | |||
| @@ -182,9 +184,9 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF | |||
| vector signed int i2_V = vec_mulo(vChrFilter, ls_V); | |||
| vector signed int vf1 = vec_mergeh(i1, i2); | |||
| vector signed int vf2 = vec_mergel(i1, i2); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] | |||
| vector signed int vf2 = vec_mergel(i1, i2); // chrUSrc[j][i] * chrFilter[j] ... chrUSrc[j][i+7] * chrFilter[j] | |||
| vector signed int vf1_V = vec_mergeh(i1_V, i2_V); | |||
| vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrSrc[j][i] * chrFilter[j] ... chrSrc[j][i+7] * chrFilter[j] | |||
| vector signed int vf2_V = vec_mergel(i1_V, i2_V); // chrVSrc[j][i] * chrFilter[j] ... chrVSrc[j][i+7] * chrFilter[j] | |||
| vector signed int vo1 = vec_add(v1, vf1); | |||
| vector signed int vo2 = vec_add(v2, vf2); | |||
| @@ -200,8 +202,8 @@ yuv2yuvX_altivec_real(const int16_t *lumFilter, const int16_t **lumSrc, int lumF | |||
| l1_V = l2_V; | |||
| } | |||
| for ( ; i < chrDstW; i++) { | |||
| u[i] += chrSrc[j][i] * chrFilter[j]; | |||
| v[i] += chrSrc[j][i + VOFW] * chrFilter[j]; | |||
| u[i] += chrUSrc[j][i] * chrFilter[j]; | |||
| v[i] += chrVSrc[j][i] * chrFilter[j]; | |||
| } | |||
| } | |||
| altivec_packIntArrayToCharArray(u, uDest, chrDstW); | |||
| @@ -24,21 +24,28 @@ | |||
| #endif | |||
| #if COMPILE_TEMPLATE_ALTIVEC | |||
| static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, const int16_t **alpSrc, | |||
| uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, long dstW, long chrDstW) | |||
| static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, | |||
| uint8_t *dest, uint8_t *uDest, uint8_t *vDest, | |||
| uint8_t *aDest, long dstW, long chrDstW) | |||
| { | |||
| yuv2yuvX_altivec_real(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| dest, uDest, vDest, dstW, chrDstW); | |||
| } | |||
| /** | |||
| * vertical scale YV12 to RGB | |||
| */ | |||
| static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, long dstW, long dstY) | |||
| static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, | |||
| long dstW, long dstY) | |||
| { | |||
| /* The following list of supported dstFormat values should | |||
| match what's found in the body of ff_yuv2packedX_altivec() */ | |||
| @@ -47,11 +54,11 @@ static inline void RENAME(yuv2packedX)(SwsContext *c, const int16_t *lumFilter, | |||
| c->dstFormat==PIX_FMT_BGR24 || c->dstFormat==PIX_FMT_RGB24 || | |||
| c->dstFormat==PIX_FMT_RGBA || c->dstFormat==PIX_FMT_ARGB)) | |||
| ff_yuv2packedX_altivec(c, lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| dest, dstW, dstY); | |||
| else | |||
| yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, dest, dstW, dstY); | |||
| } | |||
| #endif | |||
| @@ -778,10 +778,11 @@ void ff_yuv2rgb_init_tables_altivec(SwsContext *c, const int inv_table[4], int b | |||
| void | |||
| ff_yuv2packedX_altivec(SwsContext *c, | |||
| const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| uint8_t *dest, int dstW, int dstY) | |||
| ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| uint8_t *dest, int dstW, int dstY) | |||
| { | |||
| int i,j; | |||
| vector signed short X,X0,X1,Y0,U0,V0,Y1,U1,V1,U,V; | |||
| @@ -816,9 +817,9 @@ ff_yuv2packedX_altivec(SwsContext *c, | |||
| V = RND; | |||
| /* extract 8 coeffs from U,V */ | |||
| for (j=0; j<chrFilterSize; j++) { | |||
| X = vec_ld (0, &chrSrc[j][i/2]); | |||
| X = vec_ld (0, &chrUSrc[j][i/2]); | |||
| U = vec_mradds (X, CCoeffs[j], U); | |||
| X = vec_ld (0, &chrSrc[j][i/2+VOFW]); | |||
| X = vec_ld (0, &chrVSrc[j][i/2]); | |||
| V = vec_mradds (X, CCoeffs[j], V); | |||
| } | |||
| @@ -894,9 +895,9 @@ ff_yuv2packedX_altivec(SwsContext *c, | |||
| V = RND; | |||
| /* extract 8 coeffs from U,V */ | |||
| for (j=0; j<chrFilterSize; j++) { | |||
| X = vec_ld (0, &chrSrc[j][i/2]); | |||
| X = vec_ld (0, &chrUSrc[j][i/2]); | |||
| U = vec_mradds (X, CCoeffs[j], U); | |||
| X = vec_ld (0, &chrSrc[j][i/2+VOFW]); | |||
| X = vec_ld (0, &chrVSrc[j][i/2]); | |||
| V = vec_mradds (X, CCoeffs[j], V); | |||
| } | |||
| @@ -301,7 +301,8 @@ uint16_t dither_scale[15][16]={ | |||
| }; | |||
| static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, | |||
| int dstW, int chrDstW, int big_endian, int output_bits) | |||
| { | |||
| @@ -340,8 +341,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co | |||
| int j; | |||
| for (j = 0; j < chrFilterSize; j++) { | |||
| u += chrSrc[j][i ] * chrFilter[j]; | |||
| v += chrSrc[j][i + VOFW] * chrFilter[j]; | |||
| u += chrUSrc[j][i] * chrFilter[j]; | |||
| v += chrVSrc[j][i] * chrFilter[j]; | |||
| } | |||
| output_pixel(&uDest[i], u); | |||
| @@ -362,28 +363,50 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co | |||
| } | |||
| } | |||
| #define yuv2NBPS(bits, BE_LE, is_be) \ | |||
| static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \ | |||
| const int16_t **lumSrc, int lumFilterSize, \ | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, \ | |||
| const int16_t **chrVSrc, \ | |||
| int chrFilterSize, const int16_t **alpSrc, \ | |||
| uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \ | |||
| uint16_t *aDest, int dstW, int chrDstW) \ | |||
| { \ | |||
| yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \ | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ | |||
| alpSrc, \ | |||
| dest, uDest, vDest, aDest, \ | |||
| dstW, chrDstW, is_be, bits); \ | |||
| } | |||
| yuv2NBPS( 9, BE, 1); | |||
| yuv2NBPS( 9, LE, 0); | |||
| yuv2NBPS(10, BE, 1); | |||
| yuv2NBPS(10, LE, 0); | |||
| yuv2NBPS(16, BE, 1); | |||
| yuv2NBPS(16, LE, 0); | |||
| static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint16_t *dest, uint16_t *uDest, uint16_t *vDest, uint16_t *aDest, int dstW, int chrDstW, | |||
| enum PixelFormat dstFormat) | |||
| { | |||
| if (isNBPS(dstFormat)) { | |||
| const int depth = av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1+1; | |||
| yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, | |||
| dest, uDest, vDest, aDest, | |||
| dstW, chrDstW, isBE(dstFormat), depth); | |||
| } else { | |||
| if (isBE(dstFormat)) { | |||
| yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, | |||
| dest, uDest, vDest, aDest, | |||
| dstW, chrDstW, 1, 16); | |||
| } else { | |||
| yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, | |||
| dest, uDest, vDest, aDest, | |||
| dstW, chrDstW, 0, 16); | |||
| @@ -392,7 +415,8 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr | |||
| } | |||
| static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, int dstW, int chrDstW) | |||
| { | |||
| //FIXME Optimize (just quickly written not optimized..) | |||
| @@ -412,8 +436,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, | |||
| int v=1<<18; | |||
| int j; | |||
| for (j=0; j<chrFilterSize; j++) { | |||
| u += chrSrc[j][i] * chrFilter[j]; | |||
| v += chrSrc[j][i + VOFW] * chrFilter[j]; | |||
| u += chrUSrc[j][i] * chrFilter[j]; | |||
| v += chrVSrc[j][i] * chrFilter[j]; | |||
| } | |||
| uDest[i]= av_clip_uint8(u>>19); | |||
| @@ -433,7 +457,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc, | |||
| } | |||
| static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) | |||
| { | |||
| //FIXME Optimize (just quickly written not optimized..) | |||
| @@ -456,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| int v=1<<18; | |||
| int j; | |||
| for (j=0; j<chrFilterSize; j++) { | |||
| u += chrSrc[j][i] * chrFilter[j]; | |||
| v += chrSrc[j][i + VOFW] * chrFilter[j]; | |||
| u += chrUSrc[j][i] * chrFilter[j]; | |||
| v += chrVSrc[j][i] * chrFilter[j]; | |||
| } | |||
| uDest[2*i]= av_clip_uint8(u>>19); | |||
| @@ -469,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| int v=1<<18; | |||
| int j; | |||
| for (j=0; j<chrFilterSize; j++) { | |||
| u += chrSrc[j][i] * chrFilter[j]; | |||
| v += chrSrc[j][i + VOFW] * chrFilter[j]; | |||
| u += chrUSrc[j][i] * chrFilter[j]; | |||
| v += chrVSrc[j][i] * chrFilter[j]; | |||
| } | |||
| uDest[2*i]= av_clip_uint8(v>>19); | |||
| @@ -494,8 +519,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| Y2 += lumSrc[j][i2+1] * lumFilter[j];\ | |||
| }\ | |||
| for (j=0; j<chrFilterSize; j++) {\ | |||
| U += chrSrc[j][i] * chrFilter[j];\ | |||
| V += chrSrc[j][i+VOFW] * chrFilter[j];\ | |||
| U += chrUSrc[j][i] * chrFilter[j];\ | |||
| V += chrVSrc[j][i] * chrFilter[j];\ | |||
| }\ | |||
| Y1>>=19;\ | |||
| Y2>>=19;\ | |||
| @@ -542,8 +567,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| Y += lumSrc[j][i ] * lumFilter[j];\ | |||
| }\ | |||
| for (j=0; j<chrFilterSize; j++) {\ | |||
| U += chrSrc[j][i ] * chrFilter[j];\ | |||
| V += chrSrc[j][i+VOFW] * chrFilter[j];\ | |||
| U += chrUSrc[j][i] * chrFilter[j];\ | |||
| V += chrVSrc[j][i] * chrFilter[j];\ | |||
| }\ | |||
| Y >>=10;\ | |||
| U >>=10;\ | |||
| @@ -608,8 +633,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| const int i2= 2*i; \ | |||
| int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \ | |||
| int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \ | |||
| int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \ | |||
| int V= (uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19; \ | |||
| int U= (ubuf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19; \ | |||
| int V= (vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19; \ | |||
| type av_unused *r, *b, *g; \ | |||
| int av_unused A1, A2; \ | |||
| if (alpha) {\ | |||
| @@ -634,8 +659,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| const int i2= 2*i;\ | |||
| int Y1= buf0[i2 ]>>7;\ | |||
| int Y2= buf0[i2+1]>>7;\ | |||
| int U= (uvbuf1[i ])>>7;\ | |||
| int V= (uvbuf1[i+VOFW])>>7;\ | |||
| int U= (ubuf1[i])>>7;\ | |||
| int V= (vbuf1[i])>>7;\ | |||
| type av_unused *r, *b, *g;\ | |||
| int av_unused A1, A2;\ | |||
| if (alpha) {\ | |||
| @@ -660,8 +685,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| const int i2= 2*i;\ | |||
| int Y1= buf0[i2 ]>>7;\ | |||
| int Y2= buf0[i2+1]>>7;\ | |||
| int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ | |||
| int V= (uvbuf0[i+VOFW] + uvbuf1[i+VOFW])>>8;\ | |||
| int U= (ubuf0[i] + ubuf1[i])>>8;\ | |||
| int V= (vbuf0[i] + vbuf1[i])>>8;\ | |||
| type av_unused *r, *b, *g;\ | |||
| int av_unused A1, A2;\ | |||
| if (alpha) {\ | |||
| @@ -943,16 +968,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc | |||
| break;\ | |||
| } | |||
| static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| static inline void yuv2packedXinC(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, int dstW, int y) | |||
| { | |||
| int i; | |||
| YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGBX_C, YSCALE_YUV_2_PACKEDX_C(void,0), YSCALE_YUV_2_GRAY16_C, YSCALE_YUV_2_MONOX_C) | |||
| } | |||
| static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| static inline void yuv2rgbXinC_full(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, int dstW, int y) | |||
| { | |||
| int i; | |||
| @@ -35,10 +35,6 @@ | |||
| #define MAX_FILTER_SIZE 256 | |||
| #define VOFW 21504 | |||
| #define VOF (VOFW*2) | |||
| #if HAVE_BIGENDIAN | |||
| #define ALT32_CORR (-1) | |||
| #else | |||
| @@ -108,7 +104,8 @@ typedef struct SwsContext { | |||
| */ | |||
| //@{ | |||
| int16_t **lumPixBuf; ///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler. | |||
| int16_t **chrPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. | |||
| int16_t **chrUPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. | |||
| int16_t **chrVPixBuf; ///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler. | |||
| int16_t **alpPixBuf; ///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler. | |||
| int vLumBufSize; ///< Number of vertical luma/alpha lines allocated in the ring buffer. | |||
| int vChrBufSize; ///< Number of vertical chroma lines allocated in the ring buffer. | |||
| @@ -196,6 +193,7 @@ typedef struct SwsContext { | |||
| #define V_TEMP "11*8+4*4*256*2+32" | |||
| #define Y_TEMP "11*8+4*4*256*2+40" | |||
| #define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48" | |||
| #define UV_OFF "11*8+4*4*256*3+48" | |||
| DECLARE_ALIGNED(8, uint64_t, redDither); | |||
| DECLARE_ALIGNED(8, uint64_t, greenDither); | |||
| @@ -218,6 +216,7 @@ typedef struct SwsContext { | |||
| DECLARE_ALIGNED(8, uint64_t, v_temp); | |||
| DECLARE_ALIGNED(8, uint64_t, y_temp); | |||
| int32_t alpMmxFilter[4*MAX_FILTER_SIZE]; | |||
| DECLARE_ALIGNED(8, ptrdiff_t, uv_off); ///< offset (in pixels) between u and v planes | |||
| #if HAVE_ALTIVEC | |||
| vector signed short CY; | |||
| @@ -251,36 +250,42 @@ typedef struct SwsContext { | |||
| /* function pointers for swScale() */ | |||
| void (*yuv2nv12X )(struct SwsContext *c, | |||
| const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| uint8_t *dest, uint8_t *uDest, | |||
| int dstW, int chrDstW, int dstFormat); | |||
| void (*yuv2yuv1 )(struct SwsContext *c, | |||
| const int16_t *lumSrc, const int16_t *chrSrc, const int16_t *alpSrc, | |||
| const int16_t *lumSrc, const int16_t *chrUSrc, | |||
| const int16_t *chrVSrc, const int16_t *alpSrc, | |||
| uint8_t *dest, | |||
| uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, | |||
| long dstW, long chrDstW); | |||
| void (*yuv2yuvX )(struct SwsContext *c, | |||
| const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, | |||
| uint8_t *dest, | |||
| uint8_t *uDest, uint8_t *vDest, uint8_t *aDest, | |||
| long dstW, long chrDstW); | |||
| void (*yuv2packed1)(struct SwsContext *c, | |||
| const uint16_t *buf0, | |||
| const uint16_t *uvbuf0, const uint16_t *uvbuf1, | |||
| const uint16_t *ubuf0, const uint16_t *ubuf1, | |||
| const uint16_t *vbuf0, const uint16_t *vbuf1, | |||
| const uint16_t *abuf0, | |||
| uint8_t *dest, | |||
| int dstW, int uvalpha, int dstFormat, int flags, int y); | |||
| void (*yuv2packed2)(struct SwsContext *c, | |||
| const uint16_t *buf0, const uint16_t *buf1, | |||
| const uint16_t *uvbuf0, const uint16_t *uvbuf1, | |||
| const uint16_t *ubuf0, const uint16_t *ubuf1, | |||
| const uint16_t *vbuf0, const uint16_t *vbuf1, | |||
| const uint16_t *abuf0, const uint16_t *abuf1, | |||
| uint8_t *dest, | |||
| int dstW, int yalpha, int uvalpha, int y); | |||
| void (*yuv2packedX)(struct SwsContext *c, | |||
| const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| const int16_t **alpSrc, uint8_t *dest, | |||
| long dstW, long dstY); | |||
| @@ -295,7 +300,7 @@ typedef struct SwsContext { | |||
| int16_t *dst, long dstWidth, | |||
| const uint8_t *src, int srcW, int xInc); | |||
| void (*hcscale_fast)(struct SwsContext *c, | |||
| int16_t *dst, long dstWidth, | |||
| int16_t *dst1, int16_t *dst2, long dstWidth, | |||
| const uint8_t *src1, const uint8_t *src2, | |||
| int srcW, int xInc); | |||
| @@ -308,7 +313,7 @@ typedef struct SwsContext { | |||
| long filterSize, int shift); | |||
| void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed. | |||
| void (*chrConvertRange)(int16_t *dst, int width); ///< Color range conversion function for chroma planes if needed. | |||
| void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed. | |||
| int lumSrcOffset; ///< Offset given to luma src pointers passed to horizontal input functions. | |||
| int chrSrcOffset; ///< Offset given to chroma src pointers passed to horizontal input functions. | |||
| @@ -332,9 +337,10 @@ SwsFunc ff_yuv2rgb_init_mlib(SwsContext *c); | |||
| SwsFunc ff_yuv2rgb_init_altivec(SwsContext *c); | |||
| SwsFunc ff_yuv2rgb_get_func_ptr_bfin(SwsContext *c); | |||
| void ff_bfin_get_unscaled_swscale(SwsContext *c); | |||
| void ff_yuv2packedX_altivec(SwsContext *c, | |||
| const int16_t *lumFilter, const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, int chrFilterSize, | |||
| void ff_yuv2packedX_altivec(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, int chrFilterSize, | |||
| uint8_t *dest, int dstW, int dstY); | |||
| const char *sws_format_name(enum PixelFormat format); | |||
| @@ -20,29 +20,32 @@ | |||
| static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, | |||
| int chrFilterSize, const int16_t **alpSrc, | |||
| uint8_t *dest, uint8_t *uDest, uint8_t *vDest, | |||
| uint8_t *aDest, long dstW, long chrDstW) | |||
| { | |||
| yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, dest, uDest, vDest, aDest, dstW, chrDstW); | |||
| } | |||
| static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, | |||
| int chrFilterSize, uint8_t *dest, uint8_t *uDest, | |||
| int dstW, int chrDstW, enum PixelFormat dstFormat) | |||
| { | |||
| yuv2nv12XinC(lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| dest, uDest, dstW, chrDstW, dstFormat); | |||
| } | |||
| static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, | |||
| const int16_t *chrSrc, const int16_t *alpSrc, | |||
| const int16_t *chrUSrc, const int16_t *chrVSrc, | |||
| const int16_t *alpSrc, | |||
| uint8_t *dest, uint8_t *uDest, uint8_t *vDest, | |||
| uint8_t *aDest, long dstW, long chrDstW) | |||
| { | |||
| @@ -54,8 +57,8 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, | |||
| if (uDest) | |||
| for (i=0; i<chrDstW; i++) { | |||
| int u=(chrSrc[i ]+64)>>7; | |||
| int v=(chrSrc[i + VOFW]+64)>>7; | |||
| int u=(chrUSrc[i]+64)>>7; | |||
| int v=(chrVSrc[i]+64)>>7; | |||
| uDest[i]= av_clip_uint8(u); | |||
| vDest[i]= av_clip_uint8(v); | |||
| } | |||
| @@ -73,12 +76,13 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc, | |||
| */ | |||
| static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, | |||
| const int16_t **lumSrc, int lumFilterSize, | |||
| const int16_t *chrFilter, const int16_t **chrSrc, | |||
| const int16_t *chrFilter, const int16_t **chrUSrc, | |||
| const int16_t **chrVSrc, | |||
| int chrFilterSize, const int16_t **alpSrc, | |||
| uint8_t *dest, long dstW, long dstY) | |||
| { | |||
| yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, | |||
| chrFilter, chrSrc, chrFilterSize, | |||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, | |||
| alpSrc, dest, dstW, dstY); | |||
| } | |||
| @@ -86,8 +90,9 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter, | |||
| * vertical bilinear scale YV12 to RGB | |||
| */ | |||
| static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, | |||
| const uint16_t *buf1, const uint16_t *uvbuf0, | |||
| const uint16_t *uvbuf1, const uint16_t *abuf0, | |||
| const uint16_t *buf1, const uint16_t *ubuf0, | |||
| const uint16_t *ubuf1, const uint16_t *vbuf0, | |||
| const uint16_t *vbuf1, const uint16_t *abuf0, | |||
| const uint16_t *abuf1, uint8_t *dest, int dstW, | |||
| int yalpha, int uvalpha, int y) | |||
| { | |||
| @@ -102,7 +107,8 @@ static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0, | |||
| * YV12 to RGB without scaling or interpolating | |||
| */ | |||
| static inline void yuv2packed1_c(SwsContext *c, const uint16_t *buf0, | |||
| const uint16_t *uvbuf0, const uint16_t *uvbuf1, | |||
| const uint16_t *ubuf0, const uint16_t *ubuf1, | |||
| const uint16_t *vbuf0, const uint16_t *vbuf1, | |||
| const uint16_t *abuf0, uint8_t *dest, int dstW, | |||
| int uvalpha, enum PixelFormat dstFormat, | |||
| int flags, int y) | |||
| @@ -373,20 +379,20 @@ static inline void hScale16X_c(int16_t *dst, int dstW, const uint16_t *src, int | |||
| //FIXME all pal and rgb srcFormats could do this convertion as well | |||
| //FIXME all scalers more complex than bilinear could do half of this transform | |||
| static void chrRangeToJpeg_c(int16_t *dst, int width) | |||
| static void chrRangeToJpeg_c(int16_t *dstU, int16_t *dstV, int width) | |||
| { | |||
| int i; | |||
| for (i = 0; i < width; i++) { | |||
| dst[i ] = (FFMIN(dst[i ],30775)*4663 - 9289992)>>12; //-264 | |||
| dst[i+VOFW] = (FFMIN(dst[i+VOFW],30775)*4663 - 9289992)>>12; //-264 | |||
| dstU[i] = (FFMIN(dstU[i],30775)*4663 - 9289992)>>12; //-264 | |||
| dstV[i] = (FFMIN(dstV[i],30775)*4663 - 9289992)>>12; //-264 | |||
| } | |||
| } | |||
| static void chrRangeFromJpeg_c(int16_t *dst, int width) | |||
| static void chrRangeFromJpeg_c(int16_t *dstU, int16_t *dstV, int width) | |||
| { | |||
| int i; | |||
| for (i = 0; i < width; i++) { | |||
| dst[i ] = (dst[i ]*1799 + 4081085)>>11; //1469 | |||
| dst[i+VOFW] = (dst[i+VOFW]*1799 + 4081085)>>11; //1469 | |||
| dstU[i] = (dstU[i]*1799 + 4081085)>>11; //1469 | |||
| dstV[i] = (dstV[i]*1799 + 4081085)>>11; //1469 | |||
| } | |||
| } | |||
| static void lumRangeToJpeg_c(int16_t *dst, int width) | |||
| @@ -446,7 +452,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth, | |||
| convertRange(dst, dstWidth); | |||
| } | |||
| static inline void hcscale_fast_c(SwsContext *c, int16_t *dst, | |||
| static inline void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||
| long dstWidth, const uint8_t *src1, | |||
| const uint8_t *src2, int srcW, int xInc) | |||
| { | |||
| @@ -455,17 +461,13 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst, | |||
| for (i=0;i<dstWidth;i++) { | |||
| register unsigned int xx=xpos>>16; | |||
| register unsigned int xalpha=(xpos&0xFFFF)>>9; | |||
| dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |||
| dst[i+VOFW]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |||
| /* slower | |||
| dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |||
| dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |||
| */ | |||
| dst1[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |||
| dst2[i]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |||
| xpos+=xInc; | |||
| } | |||
| } | |||
| inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth, | |||
| inline static void hcscale_c(SwsContext *c, uint16_t *dst1, uint16_t *dst2, long dstWidth, | |||
| const uint8_t *src1, const uint8_t *src2, | |||
| int srcW, int xInc, const int16_t *hChrFilter, | |||
| const int16_t *hChrFilterPos, int hChrFilterSize, | |||
| @@ -484,17 +486,17 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth, | |||
| if (c->hScale16) { | |||
| int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; | |||
| c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); | |||
| c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); | |||
| c->hScale16(dst1, dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); | |||
| c->hScale16(dst2, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift); | |||
| } else if (!c->hcscale_fast) { | |||
| c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| c->hScale(dst1, dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| c->hScale(dst2, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |||
| } else { // fast bilinear upscale / crap downscale | |||
| c->hcscale_fast(c, dst, dstWidth, src1, src2, srcW, xInc); | |||
| c->hcscale_fast(c, dst1, dst2, dstWidth, src1, src2, srcW, xInc); | |||
| } | |||
| if (c->chrConvertRange) | |||
| c->chrConvertRange(dst, dstWidth); | |||
| c->chrConvertRange(dst1, dst2, dstWidth); | |||
| } | |||
| #define DEBUG_SWSCALE_BUFFERS 0 | |||
| @@ -534,7 +536,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| const int hLumFilterSize= c->hLumFilterSize; | |||
| const int hChrFilterSize= c->hChrFilterSize; | |||
| int16_t **lumPixBuf= c->lumPixBuf; | |||
| int16_t **chrPixBuf= c->chrPixBuf; | |||
| int16_t **chrUPixBuf= c->chrUPixBuf; | |||
| int16_t **chrVPixBuf= c->chrVPixBuf; | |||
| int16_t **alpPixBuf= c->alpPixBuf; | |||
| const int vLumBufSize= c->vLumBufSize; | |||
| const int vChrBufSize= c->vChrBufSize; | |||
| @@ -662,10 +665,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| //FIXME replace parameters through context struct (some at least) | |||
| if (c->needs_hcscale) | |||
| hcscale_c(c, chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, | |||
| hChrFilter, hChrFilterPos, hChrFilterSize, | |||
| formatConvBuffer, | |||
| pal); | |||
| hcscale_c(c, chrUPixBuf[chrBufIndex], chrVPixBuf[chrBufIndex], | |||
| chrDstW, src1, src2, chrSrcW, chrXInc, | |||
| hChrFilter, hChrFilterPos, hChrFilterSize, | |||
| formatConvBuffer, pal); | |||
| lastInChrBuf++; | |||
| DEBUG_BUFFERS("\t\tchrBufIndex %d: lastInChrBuf: %d\n", | |||
| chrBufIndex, lastInChrBuf); | |||
| @@ -681,47 +684,54 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| #endif | |||
| if (dstY < dstH-2) { | |||
| const int16_t **lumSrcPtr= (const int16_t **) lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |||
| const int16_t **chrSrcPtr= (const int16_t **) chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **chrUSrcPtr= (const int16_t **) chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **chrVSrcPtr= (const int16_t **) chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **) alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; | |||
| if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { | |||
| const int chrSkipMask= (1<<c->chrDstVSubSample)-1; | |||
| if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi | |||
| c->yuv2nv12X(c, | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| dest, uDest, dstW, chrDstW, dstFormat); | |||
| } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 like | |||
| const int chrSkipMask= (1<<c->chrDstVSubSample)-1; | |||
| if ((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |||
| if (is16BPS(dstFormat) || isNBPS(dstFormat)) { | |||
| yuv2yuvX16inC( | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, | |||
| yuv2yuvX16inC(vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, | |||
| chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, | |||
| (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, | |||
| dstFormat); | |||
| } else if (vLumFilterSize == 1 && vChrFilterSize == 1) { // unscaled YV12 | |||
| const int16_t *lumBuf = lumSrcPtr[0]; | |||
| const int16_t *chrBuf= chrSrcPtr[0]; | |||
| const int16_t *chrUBuf= chrUSrcPtr[0]; | |||
| const int16_t *chrVBuf= chrVSrcPtr[0]; | |||
| const int16_t *alpBuf= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? alpSrcPtr[0] : NULL; | |||
| c->yuv2yuv1(c, lumBuf, chrBuf, alpBuf, dest, uDest, vDest, aDest, dstW, chrDstW); | |||
| c->yuv2yuv1(c, lumBuf, chrUBuf, chrVBuf, alpBuf, dest, | |||
| uDest, vDest, aDest, dstW, chrDstW); | |||
| } else { //General YV12 | |||
| c->yuv2yuvX(c, | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, | |||
| chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); | |||
| } | |||
| } else { | |||
| assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |||
| assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |||
| assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |||
| assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); | |||
| if (vLumFilterSize == 1 && vChrFilterSize == 2) { //unscaled RGB | |||
| int chrAlpha= vChrFilter[2*dstY+1]; | |||
| if(flags & SWS_FULL_CHR_H_INT) { | |||
| yuv2rgbXinC_full(c, //FIXME write a packed1_full function | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, | |||
| chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } else { | |||
| c->yuv2packed1(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), | |||
| c->yuv2packed1(c, *lumSrcPtr, *chrUSrcPtr, *(chrUSrcPtr+1), | |||
| *chrVSrcPtr, *(chrVSrcPtr+1), | |||
| alpPixBuf ? *alpSrcPtr : NULL, | |||
| dest, dstW, chrAlpha, dstFormat, flags, dstY); | |||
| } | |||
| @@ -735,10 +745,11 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| if(flags & SWS_FULL_CHR_H_INT) { | |||
| yuv2rgbXinC_full(c, //FIXME write a packed2_full function | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } else { | |||
| c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), | |||
| c->yuv2packed2(c, *lumSrcPtr, *(lumSrcPtr+1), *chrUSrcPtr, *(chrUSrcPtr+1), | |||
| *chrVSrcPtr, *(chrVSrcPtr+1), | |||
| alpPixBuf ? *alpSrcPtr : NULL, alpPixBuf ? *(alpSrcPtr+1) : NULL, | |||
| dest, dstW, lumAlpha, chrAlpha, dstY); | |||
| } | |||
| @@ -746,26 +757,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| if(flags & SWS_FULL_CHR_H_INT) { | |||
| yuv2rgbXinC_full(c, | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } else { | |||
| c->yuv2packedX(c, | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } | |||
| } | |||
| } | |||
| } else { // hmm looks like we can't use MMX here without overwriting this array's tail | |||
| const int16_t **lumSrcPtr= (const int16_t **)lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |||
| const int16_t **chrSrcPtr= (const int16_t **)chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **chrUSrcPtr= (const int16_t **)chrUPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **chrVSrcPtr= (const int16_t **)chrVPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |||
| const int16_t **alpSrcPtr= (CONFIG_SWSCALE_ALPHA && alpPixBuf) ? (const int16_t **)alpPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize : NULL; | |||
| if (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21) { | |||
| const int chrSkipMask= (1<<c->chrDstVSubSample)-1; | |||
| if (dstY&chrSkipMask) uDest= NULL; //FIXME split functions in lumi / chromi | |||
| yuv2nv12XinC( | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| dest, uDest, dstW, chrDstW, dstFormat); | |||
| } else if (isPlanarYUV(dstFormat) || dstFormat==PIX_FMT_GRAY8) { //YV12 | |||
| const int chrSkipMask= (1<<c->chrDstVSubSample)-1; | |||
| @@ -773,27 +785,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[], | |||
| if (is16BPS(dstFormat) || isNBPS(dstFormat)) { | |||
| yuv2yuvX16inC( | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, (uint16_t *) dest, (uint16_t *) uDest, (uint16_t *) vDest, (uint16_t *) aDest, dstW, chrDstW, | |||
| dstFormat); | |||
| } else { | |||
| yuv2yuvXinC( | |||
| vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+chrDstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, uDest, vDest, aDest, dstW, chrDstW); | |||
| } | |||
| } else { | |||
| assert(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |||
| assert(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |||
| assert(chrUSrcPtr + vChrFilterSize - 1 < chrUPixBuf + vChrBufSize*2); | |||
| if(flags & SWS_FULL_CHR_H_INT) { | |||
| yuv2rgbXinC_full(c, | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } else { | |||
| yuv2packedXinC(c, | |||
| vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |||
| vChrFilter+dstY*vChrFilterSize, chrUSrcPtr, chrVSrcPtr, vChrFilterSize, | |||
| alpSrcPtr, dest, dstW, dstY); | |||
| } | |||
| } | |||
| @@ -753,6 +753,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) | |||
| int srcH= c->srcH; | |||
| int dstW= c->dstW; | |||
| int dstH= c->dstH; | |||
| int dst_stride = FFALIGN(dstW * sizeof(int16_t)+66, 16), dst_stride_px = dst_stride >> 1; | |||
| int flags, cpu_flags; | |||
| enum PixelFormat srcFormat= c->srcFormat; | |||
| enum PixelFormat dstFormat= c->dstFormat; | |||
| @@ -794,10 +795,6 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) | |||
| srcW, srcH, dstW, dstH); | |||
| return AVERROR(EINVAL); | |||
| } | |||
| if(srcW > VOFW || dstW > VOFW) { | |||
| av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); | |||
| return AVERROR(EINVAL); | |||
| } | |||
| FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail); | |||
| if (!dstFilter) dstFilter= &dummyFilter; | |||
| @@ -1001,29 +998,31 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) | |||
| // allocate pixbufs (we use dynamic allocation because otherwise we would need to | |||
| // allocate several megabytes to handle all possible cases) | |||
| FF_ALLOC_OR_GOTO(c, c->lumPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); | |||
| FF_ALLOC_OR_GOTO(c, c->chrPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); | |||
| FF_ALLOC_OR_GOTO(c, c->chrUPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); | |||
| FF_ALLOC_OR_GOTO(c, c->chrVPixBuf, c->vChrBufSize*2*sizeof(int16_t*), fail); | |||
| if (CONFIG_SWSCALE_ALPHA && isALPHA(c->srcFormat) && isALPHA(c->dstFormat)) | |||
| FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf, c->vLumBufSize*2*sizeof(int16_t*), fail); | |||
| //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000) | |||
| /* align at 16 bytes for AltiVec */ | |||
| for (i=0; i<c->vLumBufSize; i++) { | |||
| FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], VOF+1, fail); | |||
| FF_ALLOCZ_OR_GOTO(c, c->lumPixBuf[i+c->vLumBufSize], dst_stride+1, fail); | |||
| c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; | |||
| } | |||
| c->uv_off = dst_stride_px; | |||
| for (i=0; i<c->vChrBufSize; i++) { | |||
| FF_ALLOC_OR_GOTO(c, c->chrPixBuf[i+c->vChrBufSize], (VOF+1)*2, fail); | |||
| c->chrPixBuf[i] = c->chrPixBuf[i+c->vChrBufSize]; | |||
| FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+1, fail); | |||
| c->chrUPixBuf[i] = c->chrUPixBuf[i+c->vChrBufSize]; | |||
| c->chrVPixBuf[i] = c->chrVPixBuf[i+c->vChrBufSize] = c->chrUPixBuf[i] + dst_stride_px; | |||
| } | |||
| if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) | |||
| for (i=0; i<c->vLumBufSize; i++) { | |||
| FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], VOF+1, fail); | |||
| FF_ALLOCZ_OR_GOTO(c, c->alpPixBuf[i+c->vLumBufSize], dst_stride+1, fail); | |||
| c->alpPixBuf[i] = c->alpPixBuf[i+c->vLumBufSize]; | |||
| } | |||
| //try to avoid drawing green stuff between the right end and the stride end | |||
| for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, (VOF+1)*2); | |||
| assert(2*VOFW == VOF); | |||
| for (i=0; i<c->vChrBufSize; i++) | |||
| memset(c->chrUPixBuf[i], 64, dst_stride*2+1); | |||
| assert(c->chrDstH <= dstH); | |||
| @@ -1481,10 +1480,11 @@ void sws_freeContext(SwsContext *c) | |||
| av_freep(&c->lumPixBuf); | |||
| } | |||
| if (c->chrPixBuf) { | |||
| if (c->chrUPixBuf) { | |||
| for (i=0; i<c->vChrBufSize; i++) | |||
| av_freep(&c->chrPixBuf[i]); | |||
| av_freep(&c->chrPixBuf); | |||
| av_freep(&c->chrUPixBuf[i]); | |||
| av_freep(&c->chrUPixBuf); | |||
| av_freep(&c->chrVPixBuf); | |||
| } | |||
| if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { | |||