Fixes overflows for large image sizes. Found-by: Mateusz "j00ru" Jurczyk and Gynvael Coldwind CC: libav-stable@libav.orgtags/n0.11
| @@ -144,7 +144,7 @@ static void yuv2planeX_altivec(const int16_t *filter, int filterSize, | |||
| static void hScale_altivec_real(SwsContext *c, int16_t *dst, int dstW, | |||
| const uint8_t *src, const int16_t *filter, | |||
| const int16_t *filterPos, int filterSize) | |||
| const int32_t *filterPos, int filterSize) | |||
| { | |||
| register int i; | |||
| DECLARE_ALIGNED(16, int, tempo)[4]; | |||
| @@ -61,7 +61,7 @@ static av_always_inline void fillPlane(uint8_t* plane, int stride, | |||
| static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_src, | |||
| const int16_t *filter, | |||
| const int16_t *filterPos, int filterSize) | |||
| const int32_t *filterPos, int filterSize) | |||
| { | |||
| int i; | |||
| int32_t *dst = (int32_t *) _dst; | |||
| @@ -84,7 +84,7 @@ static void hScale16To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t | |||
| static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, | |||
| const int16_t *filter, | |||
| const int16_t *filterPos, int filterSize) | |||
| const int32_t *filterPos, int filterSize) | |||
| { | |||
| int i; | |||
| const uint16_t *src = (const uint16_t *) _src; | |||
| @@ -105,7 +105,7 @@ static void hScale16To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t | |||
| // bilinear / bicubic scaling | |||
| static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, | |||
| const int16_t *filter, const int16_t *filterPos, | |||
| const int16_t *filter, const int32_t *filterPos, | |||
| int filterSize) | |||
| { | |||
| int i; | |||
| @@ -123,7 +123,7 @@ static void hScale8To15_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t * | |||
| } | |||
| static void hScale8To19_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *src, | |||
| const int16_t *filter, const int16_t *filterPos, | |||
| const int16_t *filter, const int32_t *filterPos, | |||
| int filterSize) | |||
| { | |||
| int i; | |||
| @@ -224,7 +224,7 @@ static void hyscale_fast_c(SwsContext *c, int16_t *dst, int dstWidth, | |||
| static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, | |||
| const uint8_t *src_in[4], int srcW, int xInc, | |||
| const int16_t *hLumFilter, | |||
| const int16_t *hLumFilterPos, int hLumFilterSize, | |||
| const int32_t *hLumFilterPos, int hLumFilterSize, | |||
| uint8_t *formatConvBuffer, | |||
| uint32_t *pal, int isAlpha) | |||
| { | |||
| @@ -268,7 +268,7 @@ static void hcscale_fast_c(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||
| static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2, int dstWidth, | |||
| const uint8_t *src_in[4], | |||
| int srcW, int xInc, const int16_t *hChrFilter, | |||
| const int16_t *hChrFilterPos, int hChrFilterSize, | |||
| const int32_t *hChrFilterPos, int hChrFilterSize, | |||
| uint8_t *formatConvBuffer, uint32_t *pal) | |||
| { | |||
| const uint8_t *src1 = src_in[1], *src2 = src_in[2]; | |||
| @@ -312,10 +312,10 @@ static int swScale(SwsContext *c, const uint8_t* src[], | |||
| const int chrXInc= c->chrXInc; | |||
| const enum PixelFormat dstFormat= c->dstFormat; | |||
| const int flags= c->flags; | |||
| int16_t *vLumFilterPos= c->vLumFilterPos; | |||
| int16_t *vChrFilterPos= c->vChrFilterPos; | |||
| int16_t *hLumFilterPos= c->hLumFilterPos; | |||
| int16_t *hChrFilterPos= c->hChrFilterPos; | |||
| int32_t *vLumFilterPos= c->vLumFilterPos; | |||
| int32_t *vChrFilterPos= c->vChrFilterPos; | |||
| int32_t *hLumFilterPos= c->hLumFilterPos; | |||
| int32_t *hChrFilterPos= c->hChrFilterPos; | |||
| int16_t *vLumFilter= c->vLumFilter; | |||
| int16_t *vChrFilter= c->vChrFilter; | |||
| int16_t *hLumFilter= c->hLumFilter; | |||
| @@ -295,10 +295,10 @@ typedef struct SwsContext { | |||
| int16_t *hChrFilter; ///< Array of horizontal filter coefficients for chroma planes. | |||
| int16_t *vLumFilter; ///< Array of vertical filter coefficients for luma/alpha planes. | |||
| int16_t *vChrFilter; ///< Array of vertical filter coefficients for chroma planes. | |||
| int16_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. | |||
| int16_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. | |||
| int16_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. | |||
| int16_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. | |||
| int32_t *hLumFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for luma/alpha planes. | |||
| int32_t *hChrFilterPos; ///< Array of horizontal filter starting positions for each dst[i] for chroma planes. | |||
| int32_t *vLumFilterPos; ///< Array of vertical filter starting positions for each dst[i] for luma/alpha planes. | |||
| int32_t *vChrFilterPos; ///< Array of vertical filter starting positions for each dst[i] for chroma planes. | |||
| int hLumFilterSize; ///< Horizontal filter size for luma/alpha pixels. | |||
| int hChrFilterSize; ///< Horizontal filter size for chroma pixels. | |||
| int vLumFilterSize; ///< Vertical filter size for luma/alpha pixels. | |||
| @@ -508,10 +508,10 @@ typedef struct SwsContext { | |||
| /** @{ */ | |||
| void (*hyScale)(struct SwsContext *c, int16_t *dst, int dstW, | |||
| const uint8_t *src, const int16_t *filter, | |||
| const int16_t *filterPos, int filterSize); | |||
| const int32_t *filterPos, int filterSize); | |||
| void (*hcScale)(struct SwsContext *c, int16_t *dst, int dstW, | |||
| const uint8_t *src, const int16_t *filter, | |||
| const int16_t *filterPos, int filterSize); | |||
| const int32_t *filterPos, int filterSize); | |||
| /** @} */ | |||
| /// Color range conversion function for luma plane if needed. | |||
| @@ -180,7 +180,7 @@ static double getSplineCoeff(double a, double b, double c, double d, double dist | |||
| dist-1.0); | |||
| } | |||
| static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, | |||
| static int initFilter(int16_t **outFilter, int32_t **filterPos, int *outFilterSize, int xInc, | |||
| int srcW, int dstW, int filterAlign, int one, int flags, int cpu_flags, | |||
| SwsVector *srcFilter, SwsVector *dstFilter, double param[2], int is_horizontal) | |||
| { | |||
| @@ -196,7 +196,7 @@ static int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSi | |||
| emms_c(); //FIXME this should not be required but it IS (even for non-MMX versions) | |||
| // NOTE: the +3 is for the MMX(+1)/SSE(+3) scaler which reads over the end | |||
| FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(int16_t), fail); | |||
| FF_ALLOC_OR_GOTO(NULL, *filterPos, (dstW+3)*sizeof(**filterPos), fail); | |||
| if (FFABS(xInc - 0x10000) <10) { // unscaled | |||
| int i; | |||
| @@ -38,7 +38,7 @@ SECTION .text | |||
| ; (SwsContext *c, int{16,32}_t *dst, | |||
| ; int dstW, const uint{8,16}_t *src, | |||
| ; const int16_t *filter, | |||
| ; const int16_t *filterPos, int filterSize); | |||
| ; const int32_t *filterPos, int filterSize); | |||
| ; | |||
| ; Scale one horizontal line. Input is either 8-bits width or 16-bits width | |||
| ; ($source_width can be either 8, 9, 10 or 16, difference is whether we have to | |||
| @@ -53,6 +53,9 @@ SECTION .text | |||
| cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %if ARCH_X86_64 | |||
| movsxd r2, r2d | |||
| %define mov32 movsxd | |||
| %else ; x86-32 | |||
| %define mov32 mov | |||
| %endif ; x86-64 | |||
| %if %2 == 19 | |||
| %if mmsize == 8 ; mmx | |||
| @@ -95,14 +98,14 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %else ; %2 == 19 | |||
| lea r1, [r1+r2*(4>>r2shr)] | |||
| %endif ; %2 == 15/19 | |||
| lea r5, [r5+r2*(2>>r2shr)] | |||
| lea r5, [r5+r2*(4>>r2shr)] | |||
| neg r2 | |||
| .loop: | |||
| %if %3 == 4 ; filterSize == 4 scaling | |||
| ; load 2x4 or 4x4 source pixels into m0/m1 | |||
| movsx r0, word [r5+r2*2+0] ; filterPos[0] | |||
| movsx r6, word [r5+r2*2+2] ; filterPos[1] | |||
| mov32 r0, dword [r5+r2*4+0] ; filterPos[0] | |||
| mov32 r6, dword [r5+r2*4+4] ; filterPos[1] | |||
| movlh m0, [r3+r0*srcmul] ; src[filterPos[0] + {0,1,2,3}] | |||
| %if mmsize == 8 | |||
| movlh m1, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | |||
| @@ -112,8 +115,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %else ; %1 == 8 | |||
| movd m4, [r3+r6*srcmul] ; src[filterPos[1] + {0,1,2,3}] | |||
| %endif | |||
| movsx r0, word [r5+r2*2+4] ; filterPos[2] | |||
| movsx r6, word [r5+r2*2+6] ; filterPos[3] | |||
| mov32 r0, dword [r5+r2*4+8] ; filterPos[2] | |||
| mov32 r6, dword [r5+r2*4+12] ; filterPos[3] | |||
| movlh m1, [r3+r0*srcmul] ; src[filterPos[2] + {0,1,2,3}] | |||
| %if %1 > 8 | |||
| movhps m1, [r3+r6*srcmul] ; src[filterPos[3] + {0,1,2,3}] | |||
| @@ -156,8 +159,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %endif ; mmx/sse2/ssse3/sse4 | |||
| %else ; %3 == 8, i.e. filterSize == 8 scaling | |||
| ; load 2x8 or 4x8 source pixels into m0, m1, m4 and m5 | |||
| movsx r0, word [r5+r2*1+0] ; filterPos[0] | |||
| movsx r6, word [r5+r2*1+2] ; filterPos[1] | |||
| mov32 r0, dword [r5+r2*2+0] ; filterPos[0] | |||
| mov32 r6, dword [r5+r2*2+4] ; filterPos[1] | |||
| movbh m0, [r3+ r0 *srcmul] ; src[filterPos[0] + {0,1,2,3,4,5,6,7}] | |||
| %if mmsize == 8 | |||
| movbh m1, [r3+(r0+4)*srcmul] ; src[filterPos[0] + {4,5,6,7}] | |||
| @@ -165,8 +168,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| movbh m5, [r3+(r6+4)*srcmul] ; src[filterPos[1] + {4,5,6,7}] | |||
| %else ; mmsize == 16 | |||
| movbh m1, [r3+ r6 *srcmul] ; src[filterPos[1] + {0,1,2,3,4,5,6,7}] | |||
| movsx r0, word [r5+r2*1+4] ; filterPos[2] | |||
| movsx r6, word [r5+r2*1+6] ; filterPos[3] | |||
| mov32 r0, dword [r5+r2*2+8] ; filterPos[2] | |||
| mov32 r6, dword [r5+r2*2+12] ; filterPos[3] | |||
| movbh m4, [r3+ r0 *srcmul] ; src[filterPos[2] + {0,1,2,3,4,5,6,7}] | |||
| movbh m5, [r3+ r6 *srcmul] ; src[filterPos[3] + {0,1,2,3,4,5,6,7}] | |||
| %endif ; mmsize == 8/16 | |||
| @@ -251,7 +254,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| %define r1x r1 | |||
| %define filter2 r6m | |||
| %endif ; x86-32/64 | |||
| lea r5, [r5+r2*2] | |||
| lea r5, [r5+r2*4] | |||
| %if %2 == 15 | |||
| lea r1, [r1+r2*2] | |||
| %else ; %2 == 19 | |||
| @@ -261,8 +264,8 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| neg r2 | |||
| .loop: | |||
| movsx r0, word [r5+r2*2+0] ; filterPos[0] | |||
| movsx r1x, word [r5+r2*2+2] ; filterPos[1] | |||
| mov32 r0, dword [r5+r2*4+0] ; filterPos[0] | |||
| mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] | |||
| ; FIXME maybe do 4px/iteration on x86-64 (x86-32 wouldn't have enough regs)? | |||
| pxor m4, m4 | |||
| pxor m5, m5 | |||
| @@ -293,7 +296,7 @@ cglobal hscale%1to%2_%4_%5, %6, 7, %7 | |||
| jl .innerloop | |||
| %ifidn %4, X4 | |||
| movsx r1x, word [r5+r2*2+2] ; filterPos[1] | |||
| mov32 r1x, dword [r5+r2*4+4] ; filterPos[1] | |||
| movlh m0, [src_reg+r0 *srcmul] ; split last 4 srcpx of dstpx[0] | |||
| sub r1x, r6 ; and first 4 srcpx of dstpx[1] | |||
| %if %1 > 8 | |||
| @@ -93,8 +93,8 @@ void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int chrBufI | |||
| int16_t **alpPixBuf= c->alpPixBuf; | |||
| const int vLumBufSize= c->vLumBufSize; | |||
| const int vChrBufSize= c->vChrBufSize; | |||
| int16_t *vLumFilterPos= c->vLumFilterPos; | |||
| int16_t *vChrFilterPos= c->vChrFilterPos; | |||
| int32_t *vLumFilterPos= c->vLumFilterPos; | |||
| int32_t *vChrFilterPos= c->vChrFilterPos; | |||
| int16_t *vLumFilter= c->vLumFilter; | |||
| int16_t *vChrFilter= c->vChrFilter; | |||
| int32_t *lumMmxFilter= c->lumMmxFilter; | |||
| @@ -204,7 +204,7 @@ extern void ff_hscale ## from_bpc ## to ## to_bpc ## _ ## filter_n ## _ ## opt( | |||
| SwsContext *c, int16_t *data, \ | |||
| int dstW, const uint8_t *src, \ | |||
| const int16_t *filter, \ | |||
| const int16_t *filterPos, int filterSize) | |||
| const int32_t *filterPos, int filterSize) | |||
| #define SCALE_FUNCS(filter_n, opt) \ | |||
| SCALE_FUNC(filter_n, 8, 15, opt); \ | |||
| @@ -1376,7 +1376,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, | |||
| int dstWidth, const uint8_t *src, | |||
| int srcW, int xInc) | |||
| { | |||
| int16_t *filterPos = c->hLumFilterPos; | |||
| int32_t *filterPos = c->hLumFilterPos; | |||
| int16_t *filter = c->hLumFilter; | |||
| void *mmx2FilterCode= c->lumMmx2FilterCode; | |||
| int i; | |||
| @@ -1472,7 +1472,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, | |||
| int dstWidth, const uint8_t *src1, | |||
| const uint8_t *src2, int srcW, int xInc) | |||
| { | |||
| int16_t *filterPos = c->hChrFilterPos; | |||
| int32_t *filterPos = c->hChrFilterPos; | |||
| int16_t *filter = c->hChrFilter; | |||
| void *mmx2FilterCode= c->chrMmx2FilterCode; | |||
| int i; | |||