This reverts commit ac0fb59348. It
causes valgrind errors which I'll want to investigate before
resubmitting this.
tags/n0.9
| @@ -211,69 +211,17 @@ yuv2yuvX16_c_template(const int16_t *lumFilter, const int32_t **lumSrc, | |||||
| #define output_pixel(pos, val) \ | #define output_pixel(pos, val) \ | ||||
| if (big_endian) { \ | if (big_endian) { \ | ||||
| AV_WB16(pos, av_clip_uint16(val >> shift)); \ | |||||
| } else { \ | |||||
| AV_WL16(pos, av_clip_uint16(val >> shift)); \ | |||||
| } | |||||
| for (i = 0; i < dstW; i++) { | |||||
| int val = 1 << (30-output_bits - 1); | |||||
| int j; | |||||
| for (j = 0; j < lumFilterSize; j++) | |||||
| val += (lumSrc[j][i] * lumFilter[j]) >> 1; | |||||
| output_pixel(&yDest[i], val); | |||||
| } | |||||
| if (uDest) { | |||||
| for (i = 0; i < chrDstW; i++) { | |||||
| int u = 1 << (30-output_bits - 1); | |||||
| int v = 1 << (30-output_bits - 1); | |||||
| int j; | |||||
| for (j = 0; j < chrFilterSize; j++) { | |||||
| u += (chrUSrc[j][i] * chrFilter[j]) >> 1; | |||||
| v += (chrVSrc[j][i] * chrFilter[j]) >> 1; | |||||
| } | |||||
| output_pixel(&uDest[i], u); | |||||
| output_pixel(&vDest[i], v); | |||||
| } | |||||
| } | |||||
| if (CONFIG_SWSCALE_ALPHA && aDest) { | |||||
| for (i = 0; i < dstW; i++) { | |||||
| int val = 1 << (30-output_bits - 1); | |||||
| int j; | |||||
| for (j = 0; j < lumFilterSize; j++) | |||||
| val += (alpSrc[j][i] * lumFilter[j]) >> 1; | |||||
| output_pixel(&aDest[i], val); | |||||
| } | |||||
| } | |||||
| #undef output_pixel | |||||
| } | |||||
| static av_always_inline void | |||||
| yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, | |||||
| int lumFilterSize, const int16_t *chrFilter, | |||||
| const int16_t **chrUSrc, const int16_t **chrVSrc, | |||||
| int chrFilterSize, const int16_t **alpSrc, | |||||
| uint16_t *dest[4], int dstW, int chrDstW, | |||||
| int big_endian, int output_bits) | |||||
| { | |||||
| //FIXME Optimize (just quickly written not optimized..) | |||||
| int i; | |||||
| uint16_t *yDest = dest[0], *uDest = dest[1], *vDest = dest[2], | |||||
| *aDest = CONFIG_SWSCALE_ALPHA ? dest[3] : NULL; | |||||
| int shift = 15 + 16 - output_bits - 1; | |||||
| #define output_pixel(pos, val) \ | |||||
| if (big_endian) { \ | |||||
| AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ | |||||
| if (output_bits == 16) { \ | |||||
| AV_WB16(pos, av_clip_uint16(val >> shift)); \ | |||||
| } else { \ | |||||
| AV_WB16(pos, av_clip_uintp2(val >> shift, output_bits)); \ | |||||
| } \ | |||||
| } else { \ | } else { \ | ||||
| AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ | |||||
| if (output_bits == 16) { \ | |||||
| AV_WL16(pos, av_clip_uint16(val >> shift)); \ | |||||
| } else { \ | |||||
| AV_WL16(pos, av_clip_uintp2(val >> shift, output_bits)); \ | |||||
| } \ | |||||
| } | } | ||||
| for (i = 0; i < dstW; i++) { | for (i = 0; i < dstW; i++) { | ||||
| int val = 1 << (30-output_bits - 1); | int val = 1 << (30-output_bits - 1); | ||||
| @@ -315,7 +263,7 @@ yuv2yuvX10_c_template(const int16_t *lumFilter, const int16_t **lumSrc, | |||||
| #undef output_pixel | #undef output_pixel | ||||
| } | } | ||||
| #define yuv2NBPS(bits, BE_LE, is_be, yuv2yuvX_template_fn, typeX_t) \ | |||||
| #define yuv2NBPS(bits, BE_LE, is_be) \ | |||||
| static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ | static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFilter, \ | ||||
| const int16_t **_lumSrc, int lumFilterSize, \ | const int16_t **_lumSrc, int lumFilterSize, \ | ||||
| const int16_t *chrFilter, const int16_t **_chrUSrc, \ | const int16_t *chrFilter, const int16_t **_chrUSrc, \ | ||||
| @@ -323,21 +271,21 @@ static void yuv2yuvX ## bits ## BE_LE ## _c(SwsContext *c, const int16_t *lumFil | |||||
| int chrFilterSize, const int16_t **_alpSrc, \ | int chrFilterSize, const int16_t **_alpSrc, \ | ||||
| uint8_t *_dest[4], int dstW, int chrDstW) \ | uint8_t *_dest[4], int dstW, int chrDstW) \ | ||||
| { \ | { \ | ||||
| const typeX_t **lumSrc = (const typeX_t **) _lumSrc, \ | |||||
| **chrUSrc = (const typeX_t **) _chrUSrc, \ | |||||
| **chrVSrc = (const typeX_t **) _chrVSrc, \ | |||||
| **alpSrc = (const typeX_t **) _alpSrc; \ | |||||
| yuv2yuvX_template_fn(lumFilter, lumSrc, lumFilterSize, \ | |||||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ | |||||
| alpSrc, (uint16_t **) _dest, \ | |||||
| dstW, chrDstW, is_be, bits); \ | |||||
| } | |||||
| yuv2NBPS( 9, BE, 1, yuv2yuvX10_c_template, int16_t); | |||||
| yuv2NBPS( 9, LE, 0, yuv2yuvX10_c_template, int16_t); | |||||
| yuv2NBPS(10, BE, 1, yuv2yuvX10_c_template, int16_t); | |||||
| yuv2NBPS(10, LE, 0, yuv2yuvX10_c_template, int16_t); | |||||
| yuv2NBPS(16, BE, 1, yuv2yuvX16_c_template, int32_t); | |||||
| yuv2NBPS(16, LE, 0, yuv2yuvX16_c_template, int32_t); | |||||
| const int32_t **lumSrc = (const int32_t **) _lumSrc, \ | |||||
| **chrUSrc = (const int32_t **) _chrUSrc, \ | |||||
| **chrVSrc = (const int32_t **) _chrVSrc, \ | |||||
| **alpSrc = (const int32_t **) _alpSrc; \ | |||||
| yuv2yuvX16_c_template(lumFilter, lumSrc, lumFilterSize, \ | |||||
| chrFilter, chrUSrc, chrVSrc, chrFilterSize, \ | |||||
| alpSrc, (uint16_t **) _dest, \ | |||||
| dstW, chrDstW, is_be, bits); \ | |||||
| } | |||||
| yuv2NBPS( 9, BE, 1); | |||||
| yuv2NBPS( 9, LE, 0); | |||||
| yuv2NBPS(10, BE, 1); | |||||
| yuv2NBPS(10, LE, 0); | |||||
| yuv2NBPS(16, BE, 1); | |||||
| yuv2NBPS(16, LE, 0); | |||||
| static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, | static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter, | ||||
| const int16_t **lumSrc, int lumFilterSize, | const int16_t **lumSrc, int lumFilterSize, | ||||
| @@ -1932,27 +1880,6 @@ static void hScale16_c(SwsContext *c, int16_t *_dst, int dstW, const uint8_t *_s | |||||
| } | } | ||||
| } | } | ||||
| static void hScale10_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *_src, | |||||
| const int16_t *filter, | |||||
| const int16_t *filterPos, int filterSize) | |||||
| { | |||||
| int i; | |||||
| const uint16_t *src = (const uint16_t *) _src; | |||||
| int sh = av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1; | |||||
| for (i = 0; i < dstW; i++) { | |||||
| int j; | |||||
| int srcPos = filterPos[i]; | |||||
| int val = 0; | |||||
| for (j = 0; j < filterSize; j++) { | |||||
| val += src[srcPos + j] * filter[filterSize * i + j]; | |||||
| } | |||||
| // filter=14 bit, input=16 bit, output=30 bit, >> 11 makes 19 bit | |||||
| dst[i] = FFMIN(val >> sh, (1 << 15) - 1); | |||||
| } | |||||
| } | |||||
| // bilinear / bicubic scaling | // bilinear / bicubic scaling | ||||
| static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, | static void hScale_c(SwsContext *c, int16_t *dst, int dstW, const uint8_t *src, | ||||
| const int16_t *filter, const int16_t *filterPos, | const int16_t *filter, const int16_t *filterPos, | ||||
| @@ -2098,7 +2025,7 @@ static av_always_inline void hyscale(SwsContext *c, int16_t *dst, int dstWidth, | |||||
| if (convertRange) | if (convertRange) | ||||
| convertRange(dst, dstWidth); | convertRange(dst, dstWidth); | ||||
| if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { | |||||
| if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { | |||||
| c->scale19To15Fw(dst, (int32_t *) dst, dstWidth); | c->scale19To15Fw(dst, (int32_t *) dst, dstWidth); | ||||
| } | } | ||||
| } | } | ||||
| @@ -2125,7 +2052,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 | |||||
| uint8_t *formatConvBuffer, uint32_t *pal) | uint8_t *formatConvBuffer, uint32_t *pal) | ||||
| { | { | ||||
| if (c->chrToYV12) { | if (c->chrToYV12) { | ||||
| uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * FFALIGN(c->scalingBpp, 8) >> 3, 16); | |||||
| uint8_t *buf2 = formatConvBuffer + FFALIGN(srcW * c->scalingBpp >> 3, 16); | |||||
| c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); | c->chrToYV12(formatConvBuffer, buf2, src1, src2, srcW, pal); | ||||
| src1= formatConvBuffer; | src1= formatConvBuffer; | ||||
| src2= buf2; | src2= buf2; | ||||
| @@ -2149,7 +2076,7 @@ static av_always_inline void hcscale(SwsContext *c, int16_t *dst1, int16_t *dst2 | |||||
| if (c->chrConvertRange) | if (c->chrConvertRange) | ||||
| c->chrConvertRange(dst1, dst2, dstWidth); | c->chrConvertRange(dst1, dst2, dstWidth); | ||||
| if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 10 && c->scalingBpp == 16) { | |||||
| if (av_pix_fmt_descriptors[c->dstFormat].comp[0].depth_minus1 < 8 && c->scalingBpp == 16) { | |||||
| c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth); | c->scale19To15Fw(dst1, (int32_t *) dst1, dstWidth); | ||||
| c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth); | c->scale19To15Fw(dst2, (int32_t *) dst2, dstWidth); | ||||
| } | } | ||||
| @@ -2808,27 +2735,27 @@ static av_cold void sws_init_swScale_c(SwsContext *c) | |||||
| } | } | ||||
| if (c->scalingBpp == 8) { | if (c->scalingBpp == 8) { | ||||
| c->hScale = hScale_c; | |||||
| if (c->flags & SWS_FAST_BILINEAR) { | |||||
| c->hyscale_fast = hyscale_fast_c; | |||||
| c->hcscale_fast = hcscale_fast_c; | |||||
| c->hScale = hScale_c; | |||||
| if (c->flags & SWS_FAST_BILINEAR) { | |||||
| c->hyscale_fast = hyscale_fast_c; | |||||
| c->hcscale_fast = hcscale_fast_c; | |||||
| } | |||||
| if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { | |||||
| if (c->srcRange) { | |||||
| c->lumConvertRange = lumRangeFromJpeg_c; | |||||
| c->chrConvertRange = chrRangeFromJpeg_c; | |||||
| } else { | |||||
| c->lumConvertRange = lumRangeToJpeg_c; | |||||
| c->chrConvertRange = chrRangeToJpeg_c; | |||||
| } | } | ||||
| } | |||||
| } else { | } else { | ||||
| c->hScale = c->scalingBpp == 16 ? hScale16_c : hScale10_c; | |||||
| c->hScale = hScale16_c; | |||||
| c->scale19To15Fw = scale19To15Fw_c; | c->scale19To15Fw = scale19To15Fw_c; | ||||
| c->scale8To16Rv = scale8To16Rv_c; | c->scale8To16Rv = scale8To16Rv_c; | ||||
| } | |||||
| if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { | |||||
| if (c->scalingBpp <= 10) { | |||||
| if (c->srcRange) { | |||||
| c->lumConvertRange = lumRangeFromJpeg_c; | |||||
| c->chrConvertRange = chrRangeFromJpeg_c; | |||||
| } else { | |||||
| c->lumConvertRange = lumRangeToJpeg_c; | |||||
| c->chrConvertRange = chrRangeToJpeg_c; | |||||
| } | |||||
| } else { | |||||
| if (c->srcRange != c->dstRange && !isAnyRGB(c->dstFormat)) { | |||||
| if (c->srcRange) { | if (c->srcRange) { | ||||
| c->lumConvertRange = lumRangeFromJpeg16_c; | c->lumConvertRange = lumRangeFromJpeg16_c; | ||||
| c->chrConvertRange = chrRangeFromJpeg16_c; | c->chrConvertRange = chrRangeFromJpeg16_c; | ||||
| @@ -64,16 +64,11 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[], | |||||
| * without any additional vertical scaling (or point-scaling). | * without any additional vertical scaling (or point-scaling). | ||||
| * | * | ||||
| * @param c SWS scaling context | * @param c SWS scaling context | ||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit | |||||
| * output, this is in uint16_t | |||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output | |||||
| * @param dest pointer to the 4 output planes (Y/U/V/A) | |||||
| * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels | * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels | ||||
| * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc | * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc | ||||
| */ | */ | ||||
| @@ -87,19 +82,14 @@ typedef void (*yuv2planar1_fn) (struct SwsContext *c, | |||||
| * | * | ||||
| * @param c SWS scaling context | * @param c SWS scaling context | ||||
| * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] | * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] | ||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output | |||||
| * @param lumFilterSize number of vertical luma/alpha input lines to scale | * @param lumFilterSize number of vertical luma/alpha input lines to scale | ||||
| * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] | * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] | ||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output | |||||
| * @param chrFilterSize number of vertical chroma input lines to scale | * @param chrFilterSize number of vertical chroma input lines to scale | ||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param dest pointer to the 4 output planes (Y/U/V/A). For >8bit | |||||
| * output, this is in uint16_t | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output | |||||
| * @param dest pointer to the 4 output planes (Y/U/V/A) | |||||
| * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels | * @param dstW width of dest[0], dest[3], lumSrc and alpSrc in pixels | ||||
| * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc | * @param chrDstW width of dest[1], dest[2], chrUSrc and chrVSrc | ||||
| */ | */ | ||||
| @@ -115,16 +105,11 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter, | |||||
| * that this function may do chroma scaling, see the "uvalpha" argument. | * that this function may do chroma scaling, see the "uvalpha" argument. | ||||
| * | * | ||||
| * @param c SWS scaling context | * @param c SWS scaling context | ||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param dest pointer to the output plane. For 16bit output, this is | |||||
| * uint16_t | |||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output | |||||
| * @param dest pointer to the output plane | |||||
| * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | ||||
| * to write into dest[] | * to write into dest[] | ||||
| * @param uvalpha chroma scaling coefficient for the second line of chroma | * @param uvalpha chroma scaling coefficient for the second line of chroma | ||||
| @@ -147,16 +132,11 @@ typedef void (*yuv2packed1_fn) (struct SwsContext *c, const int16_t *lumSrc, | |||||
| * output by doing bilinear scaling between two input lines. | * output by doing bilinear scaling between two input lines. | ||||
| * | * | ||||
| * @param c SWS scaling context | * @param c SWS scaling context | ||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param dest pointer to the output plane. For 16bit output, this is | |||||
| * uint16_t | |||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output | |||||
| * @param dest pointer to the output plane | |||||
| * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | ||||
| * to write into dest[] | * to write into dest[] | ||||
| * @param yalpha luma/alpha scaling coefficients for the second input line. | * @param yalpha luma/alpha scaling coefficients for the second input line. | ||||
| @@ -180,19 +160,14 @@ typedef void (*yuv2packed2_fn) (struct SwsContext *c, const int16_t *lumSrc[2], | |||||
| * | * | ||||
| * @param c SWS scaling context | * @param c SWS scaling context | ||||
| * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] | * @param lumFilter vertical luma/alpha scaling coefficients, 12bit [0,4096] | ||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param lumSrc scaled luma (Y) source data, 15bit for 8bit output | |||||
| * @param lumFilterSize number of vertical luma/alpha input lines to scale | * @param lumFilterSize number of vertical luma/alpha input lines to scale | ||||
| * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] | * @param chrFilter vertical chroma scaling coefficients, 12bit [0,4096] | ||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param chrUSrc scaled chroma (U) source data, 15bit for 8bit output | |||||
| * @param chrVSrc scaled chroma (V) source data, 15bit for 8bit output | |||||
| * @param chrFilterSize number of vertical chroma input lines to scale | * @param chrFilterSize number of vertical chroma input lines to scale | ||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8-10bit output, | |||||
| * 19-bit for 16bit output (in int32_t) | |||||
| * @param dest pointer to the output plane. For 16bit output, this is | |||||
| * uint16_t | |||||
| * @param alpSrc scaled alpha (A) source data, 15bit for 8bit output | |||||
| * @param dest pointer to the output plane | |||||
| * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | * @param dstW width of lumSrc and alpSrc in pixels, number of pixels | ||||
| * to write into dest[] | * to write into dest[] | ||||
| * @param y vertical line number for this output. This does not need | * @param y vertical line number for this output. This does not need | ||||
| @@ -896,15 +896,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) | |||||
| } | } | ||||
| } | } | ||||
| c->scalingBpp = 1 + FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, | |||||
| av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1); | |||||
| if (c->scalingBpp <= 8) | |||||
| c->scalingBpp = 8; | |||||
| c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1, | |||||
| av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 8 ? 16 : 8; | |||||
| if (c->scalingBpp == 16) | if (c->scalingBpp == 16) | ||||
| dst_stride <<= 1; | dst_stride <<= 1; | ||||
| FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, | |||||
| FFALIGN(srcW, 16) * 2 * FFALIGN(c->scalingBpp, 8) >> 3, | |||||
| fail); | |||||
| FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW, 16) * 2 * c->scalingBpp >> 3, fail); | |||||
| if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) { | if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) { | ||||
| c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; | c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; | ||||
| if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { | if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) { | ||||
| @@ -1059,7 +1055,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter) | |||||
| c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; | c->lumPixBuf[i] = c->lumPixBuf[i+c->vLumBufSize]; | ||||
| } | } | ||||
| // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) | // 64 / c->scalingBpp is the same as 16 / sizeof(scaling_intermediate) | ||||
| c->uv_off_px = dst_stride_px + 64 / (c->scalingBpp &~ 7); | |||||
| c->uv_off_px = dst_stride_px + 64 / c->scalingBpp; | |||||
| c->uv_off_byte = dst_stride + 16; | c->uv_off_byte = dst_stride + 16; | ||||
| for (i=0; i<c->vChrBufSize; i++) { | for (i=0; i<c->vChrBufSize; i++) { | ||||
| FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); | FF_ALLOC_OR_GOTO(c, c->chrUPixBuf[i+c->vChrBufSize], dst_stride*2+32, fail); | ||||
| @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc | |||||
| yuv420p9be ce880fa07830e5297c22acf6e20555ce | yuv420p9be ce880fa07830e5297c22acf6e20555ce | ||||
| yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | ||||
| yuv422p c9bba4529821d796a6ab09f6a5fd355a | yuv422p c9bba4529821d796a6ab09f6a5fd355a | ||||
| yuv422p10be a4a83d0811280eff7405d94a7de21596 | |||||
| yuv422p10le 23717b6c73a59912c605f27877ae2fb6 | |||||
| yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 | |||||
| yuv422p10le 3f478be644add24b6cc77e718a6e2afa | |||||
| yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | ||||
| yuv422p16le f181c8d8436f1233ba566d9bc88005ec | yuv422p16le f181c8d8436f1233ba566d9bc88005ec | ||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc | |||||
| yuv420p9be ce880fa07830e5297c22acf6e20555ce | yuv420p9be ce880fa07830e5297c22acf6e20555ce | ||||
| yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | ||||
| yuv422p c9bba4529821d796a6ab09f6a5fd355a | yuv422p c9bba4529821d796a6ab09f6a5fd355a | ||||
| yuv422p10be a4a83d0811280eff7405d94a7de21596 | |||||
| yuv422p10le 23717b6c73a59912c605f27877ae2fb6 | |||||
| yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 | |||||
| yuv422p10le 3f478be644add24b6cc77e718a6e2afa | |||||
| yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | ||||
| yuv422p16le f181c8d8436f1233ba566d9bc88005ec | yuv422p16le f181c8d8436f1233ba566d9bc88005ec | ||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| @@ -38,8 +38,8 @@ yuv420p16le 2d59c4f1d0314a5a957a7cfc4b6fabcc | |||||
| yuv420p9be ce880fa07830e5297c22acf6e20555ce | yuv420p9be ce880fa07830e5297c22acf6e20555ce | ||||
| yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | yuv420p9le 16543fda8f87d94a6cf857d2e8d4461a | ||||
| yuv422p c9bba4529821d796a6ab09f6a5fd355a | yuv422p c9bba4529821d796a6ab09f6a5fd355a | ||||
| yuv422p10be a4a83d0811280eff7405d94a7de21596 | |||||
| yuv422p10le 23717b6c73a59912c605f27877ae2fb6 | |||||
| yuv422p10be 107c6e31a3d4d598bca1d8426aaa54f5 | |||||
| yuv422p10le 3f478be644add24b6cc77e718a6e2afa | |||||
| yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | yuv422p16be dc9886f2fccf87cc54b27e071a2c251e | ||||
| yuv422p16le f181c8d8436f1233ba566d9bc88005ec | yuv422p16le f181c8d8436f1233ba566d9bc88005ec | ||||
| yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | yuv440p 5a064afe2b453bb52cdb3f176b1aa1cf | ||||
| @@ -31,15 +31,15 @@ uyvy422 314bd486277111a95d9369b944fa0400 | |||||
| yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 | yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5 | ||||
| yuv411p 1143e7c5cc28fe0922b051b17733bc4c | yuv411p 1143e7c5cc28fe0922b051b17733bc4c | ||||
| yuv420p fdad2d8df8985e3d17e73c71f713cb14 | yuv420p fdad2d8df8985e3d17e73c71f713cb14 | ||||
| yuv420p10be 04663d400e44692fe8a622a067f838da | |||||
| yuv420p10le 6171850f66df7a727b4bed1c87ef9188 | |||||
| yuv420p10be af5429f27b9f95bf955e795921c65cdc | |||||
| yuv420p10le d0b47e6a8a44e6b5ca0fe4349a4e393b | |||||
| yuv420p16be 9688e33e03b8c8275ab2fb1df0f06bee | yuv420p16be 9688e33e03b8c8275ab2fb1df0f06bee | ||||
| yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 | yuv420p16le cba8b390ad5e7b8678e419b8ce79c008 | ||||
| yuv420p9be ab163dfef03c4d563aca99b24276b9fd | |||||
| yuv420p9le cd56c5a76ce74e504dd59d25a5e4389c | |||||
| yuv420p9be a073b2d93b2a7dce2069ba252bc43175 | |||||
| yuv420p9le b67233c3c7d93763d07d88f697c145e1 | |||||
| yuv422p 918e37701ee7377d16a8a6c119c56a40 | yuv422p 918e37701ee7377d16a8a6c119c56a40 | ||||
| yuv422p10be e8e80fed7121f3afac994f2afac42cd2 | |||||
| yuv422p10le 370866666f4889ee0928345b16d68fb4 | |||||
| yuv422p10be 533fd21e7943c20a1026b19069b3b867 | |||||
| yuv422p10le 59b20a4a8609f5da2dc54c78aea11e6c | |||||
| yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 | yuv422p16be 2cf502d7d386db1f1b3b946679d897b1 | ||||
| yuv422p16le 3002a4e47520731dcee5929aff49eb74 | yuv422p16le 3002a4e47520731dcee5929aff49eb74 | ||||
| yuv440p 461503fdb9b90451020aa3b25ddf041c | yuv440p 461503fdb9b90451020aa3b25ddf041c | ||||
| @@ -38,8 +38,8 @@ yuv420p16le 0f609e588e5a258644ef85170d70e030 | |||||
| yuv420p9be be40ec975fb2873891643cbbbddbc3b0 | yuv420p9be be40ec975fb2873891643cbbbddbc3b0 | ||||
| yuv420p9le 7e606310d3f5ff12badf911e8f333471 | yuv420p9le 7e606310d3f5ff12badf911e8f333471 | ||||
| yuv422p d7f5cb44d9b0210d66d6a8762640ab34 | yuv422p d7f5cb44d9b0210d66d6a8762640ab34 | ||||
| yuv422p10be 51d9ef13fe43ea9549b3792bfd449bf7 | |||||
| yuv422p10le 4b286a243ee0e715b2961bc1b469e629 | |||||
| yuv422p10be a28b051168af49435c04af5f58dce47b | |||||
| yuv422p10le 35936ffff30df2697f47b9b8d2cb7dea | |||||
| yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 | yuv422p16be 51d9aa4e78d121c226d919ce97976fe4 | ||||
| yuv422p16le 12965c54bda8932ca72da194419a9908 | yuv422p16le 12965c54bda8932ca72da194419a9908 | ||||
| yuv440p 876385e96165acf51271b20e5d85a416 | yuv440p 876385e96165acf51271b20e5d85a416 | ||||