* commit '05b0998f511ffa699407465d48c7d5805f746ad2': dsputil: Fix error by not using redzone and register name swscale: GBRP output support Conflicts: libswscale/output.c libswscale/swscale.c libswscale/swscale_internal.h libswscale/utils.c tests/ref/lavfi/pixdesc tests/ref/lavfi/pixfmts_copy tests/ref/lavfi/pixfmts_null tests/ref/lavfi/pixfmts_scale tests/ref/lavfi/pixfmts_vflip Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.2
@@ -318,8 +318,8 @@ PUT_NO_RND_PIXELS8_Y2_EXACT | |||||
; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ; avg_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
%macro AVG_PIXELS8 0 | %macro AVG_PIXELS8 0 | ||||
cglobal avg_pixels8, 4,5 | cglobal avg_pixels8, 4,5 | ||||
movsxdifnidn r2, edx | |||||
lea r4, [r2+r2] | |||||
movsxdifnidn r2, r2d | |||||
lea r4, [r2*2] | |||||
.loop: | .loop: | ||||
mova m0, [r0] | mova m0, [r0] | ||||
mova m1, [r0+r2] | mova m1, [r0+r2] | ||||
@@ -349,7 +349,7 @@ AVG_PIXELS8 | |||||
; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
%macro AVG_PIXELS8_X2 0 | %macro AVG_PIXELS8_X2 0 | ||||
cglobal avg_pixels8_x2, 4,5 | cglobal avg_pixels8_x2, 4,5 | ||||
movsxdifnidn r2, edx | |||||
movsxdifnidn r2, r2d | |||||
lea r4, [r2*2] | lea r4, [r2*2] | ||||
.loop: | .loop: | ||||
mova m0, [r1] | mova m0, [r1] | ||||
@@ -169,7 +169,7 @@ INIT_MMX 3dnow | |||||
PUT_NO_RND_PIXELS16_l2 | PUT_NO_RND_PIXELS16_l2 | ||||
%macro MPEG4_QPEL16_H_LOWPASS 1 | %macro MPEG4_QPEL16_H_LOWPASS 1 | ||||
cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8 | |||||
cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 16 | |||||
movsxdifnidn r2, r2d | movsxdifnidn r2, r2d | ||||
movsxdifnidn r3, r3d | movsxdifnidn r3, r3d | ||||
pxor m7, m7 | pxor m7, m7 | ||||
@@ -202,7 +202,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8 | |||||
paddw m6, [PW_ROUND] | paddw m6, [PW_ROUND] | ||||
paddw m0, m6 | paddw m0, m6 | ||||
psraw m0, 5 | psraw m0, 5 | ||||
mova [rsp-8], m0 | |||||
mova [rsp+8], m0 | |||||
mova m0, [r1+5] | mova m0, [r1+5] | ||||
mova m5, m0 | mova m5, m0 | ||||
mova m6, m0 | mova m6, m0 | ||||
@@ -226,7 +226,7 @@ cglobal %1_mpeg4_qpel16_h_lowpass, 5, 5, 0, 8 | |||||
paddw m1, [PW_ROUND] | paddw m1, [PW_ROUND] | ||||
paddw m3, m1 | paddw m3, m1 | ||||
psraw m3, 5 | psraw m3, 5 | ||||
mova m1, [rsp-8] | |||||
mova m1, [rsp+8] | |||||
packuswb m1, m3 | packuswb m1, m3 | ||||
OP_MOV [r0], m1, m4 | OP_MOV [r0], m1, m4 | ||||
mova m1, [r1+9] | mova m1, [r1+9] | ||||
@@ -1374,11 +1374,11 @@ YUV2RGBWRAPPERX(yuv2, rgb_full, rgb8_full, AV_PIX_FMT_RGB8, 0) | |||||
static void | static void | ||||
yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, | yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, | ||||
const int16_t **lumSrc, int lumFilterSize, | |||||
const int16_t *chrFilter, const int16_t **chrUSrc, | |||||
const int16_t **chrVSrc, int chrFilterSize, | |||||
const int16_t **alpSrc, uint8_t **dest, | |||||
int dstW, int y) | |||||
const int16_t **lumSrc, int lumFilterSize, | |||||
const int16_t *chrFilter, const int16_t **chrUSrc, | |||||
const int16_t **chrVSrc, int chrFilterSize, | |||||
const int16_t **alpSrc, uint8_t **dest, | |||||
int dstW, int y) | |||||
{ | { | ||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); | const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(c->dstFormat); | ||||
int i; | int i; | ||||
@@ -1388,36 +1388,42 @@ yuv2gbrp_full_X_c(SwsContext *c, const int16_t *lumFilter, | |||||
for (i = 0; i < dstW; i++) { | for (i = 0; i < dstW; i++) { | ||||
int j; | int j; | ||||
int Y = 1<<9; | |||||
int U = (1<<9)-(128 << 19); | |||||
int V = (1<<9)-(128 << 19); | |||||
int Y = 1 << 9; | |||||
int U = (1 << 9) - (128 << 19); | |||||
int V = (1 << 9) - (128 << 19); | |||||
int R, G, B, A; | int R, G, B, A; | ||||
for (j = 0; j < lumFilterSize; j++) { | |||||
for (j = 0; j < lumFilterSize; j++) | |||||
Y += lumSrc[j][i] * lumFilter[j]; | Y += lumSrc[j][i] * lumFilter[j]; | ||||
} | |||||
for (j = 0; j < chrFilterSize; j++) { | for (j = 0; j < chrFilterSize; j++) { | ||||
U += chrUSrc[j][i] * chrFilter[j]; | U += chrUSrc[j][i] * chrFilter[j]; | ||||
V += chrVSrc[j][i] * chrFilter[j]; | V += chrVSrc[j][i] * chrFilter[j]; | ||||
} | } | ||||
Y >>= 10; | Y >>= 10; | ||||
U >>= 10; | U >>= 10; | ||||
V >>= 10; | V >>= 10; | ||||
if (hasAlpha) { | if (hasAlpha) { | ||||
A = 1 << 18; | A = 1 << 18; | ||||
for (j = 0; j < lumFilterSize; j++) { | |||||
for (j = 0; j < lumFilterSize; j++) | |||||
A += alpSrc[j][i] * lumFilter[j]; | A += alpSrc[j][i] * lumFilter[j]; | ||||
} | |||||
A >>= 19; | A >>= 19; | ||||
if (A & 0x100) | if (A & 0x100) | ||||
A = av_clip_uint8(A); | A = av_clip_uint8(A); | ||||
} | } | ||||
Y -= c->yuv2rgb_y_offset; | Y -= c->yuv2rgb_y_offset; | ||||
Y *= c->yuv2rgb_y_coeff; | Y *= c->yuv2rgb_y_coeff; | ||||
Y += 1 << 21; | Y += 1 << 21; | ||||
R = Y + V*c->yuv2rgb_v2r_coeff; | |||||
G = Y + V*c->yuv2rgb_v2g_coeff + U*c->yuv2rgb_u2g_coeff; | |||||
B = Y + U*c->yuv2rgb_u2b_coeff; | |||||
R = Y + V * c->yuv2rgb_v2r_coeff; | |||||
G = Y + V * c->yuv2rgb_v2g_coeff + U * c->yuv2rgb_u2g_coeff; | |||||
B = Y + U * c->yuv2rgb_u2b_coeff; | |||||
if ((R | G | B) & 0xC0000000) { | if ((R | G | B) & 0xC0000000) { | ||||
R = av_clip_uintp2(R, 30); | R = av_clip_uintp2(R, 30); | ||||
G = av_clip_uintp2(G, 30); | G = av_clip_uintp2(G, 30); | ||||
@@ -250,12 +250,12 @@ typedef void (*yuv2packedX_fn)(struct SwsContext *c, const int16_t *lumFilter, | |||||
* or some output formats. | * or some output formats. | ||||
*/ | */ | ||||
typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter, | typedef void (*yuv2anyX_fn)(struct SwsContext *c, const int16_t *lumFilter, | ||||
const int16_t **lumSrc, int lumFilterSize, | |||||
const int16_t *chrFilter, | |||||
const int16_t **chrUSrc, | |||||
const int16_t **chrVSrc, int chrFilterSize, | |||||
const int16_t **alpSrc, uint8_t **dest, | |||||
int dstW, int y); | |||||
const int16_t **lumSrc, int lumFilterSize, | |||||
const int16_t *chrFilter, | |||||
const int16_t **chrUSrc, | |||||
const int16_t **chrVSrc, int chrFilterSize, | |||||
const int16_t **alpSrc, uint8_t **dest, | |||||
int dstW, int y); | |||||
/* This struct should be aligned on at least a 32-byte boundary. */ | /* This struct should be aligned on at least a 32-byte boundary. */ | ||||
typedef struct SwsContext { | typedef struct SwsContext { | ||||
@@ -1022,8 +1022,8 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, | |||||
if (isPlanarRGB(dstFormat)) { | if (isPlanarRGB(dstFormat)) { | ||||
if (!(flags & SWS_FULL_CHR_H_INT)) { | if (!(flags & SWS_FULL_CHR_H_INT)) { | ||||
av_log(c, AV_LOG_DEBUG, | av_log(c, AV_LOG_DEBUG, | ||||
"%s output is not supported with half chroma resolution, switching to full\n", | |||||
av_get_pix_fmt_name(dstFormat)); | |||||
"%s output is not supported with half chroma resolution, switching to full\n", | |||||
av_get_pix_fmt_name(dstFormat)); | |||||
flags |= SWS_FULL_CHR_H_INT; | flags |= SWS_FULL_CHR_H_INT; | ||||
c->flags = flags; | c->flags = flags; | ||||
} | } | ||||