Adds a diff_pixels_unaligned() Fixes: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=872503 Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>tags/n3.4
@@ -628,7 +628,7 @@ static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); | |||||
s->fdsp.fdct(temp); | s->fdsp.fdct(temp); | ||||
return s->mecc.sum_abs_dctelem(temp); | return s->mecc.sum_abs_dctelem(temp); | ||||
} | } | ||||
@@ -668,7 +668,7 @@ static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
int16_t dct[8][8]; | int16_t dct[8][8]; | ||||
int i, sum = 0; | int i, sum = 0; | ||||
s->pdsp.diff_pixels(dct[0], src1, src2, stride); | |||||
s->pdsp.diff_pixels_unaligned(dct[0], src1, src2, stride); | |||||
#define SRC(x) dct[i][x] | #define SRC(x) dct[i][x] | ||||
#define DST(x, v) dct[i][x] = v | #define DST(x, v) dct[i][x] = v | ||||
@@ -695,7 +695,7 @@ static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); | |||||
s->fdsp.fdct(temp); | s->fdsp.fdct(temp); | ||||
for (i = 0; i < 64; i++) | for (i = 0; i < 64; i++) | ||||
@@ -714,7 +714,7 @@ static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->mb_intra = 0; | s->mb_intra = 0; | ||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); | |||||
memcpy(bak, temp, 64 * sizeof(int16_t)); | memcpy(bak, temp, 64 * sizeof(int16_t)); | ||||
@@ -817,7 +817,7 @@ static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2, | |||||
av_assert2(h == 8); | av_assert2(h == 8); | ||||
s->pdsp.diff_pixels(temp, src1, src2, stride); | |||||
s->pdsp.diff_pixels_unaligned(temp, src1, src2, stride); | |||||
s->block_last_index[0 /* FIXME */] = | s->block_last_index[0 /* FIXME */] = | ||||
last = | last = | ||||
@@ -82,6 +82,7 @@ av_cold void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx) | |||||
{ | { | ||||
const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8; | ||||
c->diff_pixels_unaligned = | |||||
c->diff_pixels = diff_pixels_c; | c->diff_pixels = diff_pixels_c; | ||||
switch (avctx->bits_per_raw_sample) { | switch (avctx->bits_per_raw_sample) { | ||||
@@ -33,6 +33,11 @@ typedef struct PixblockDSPContext { | |||||
const uint8_t *s1 /* align 8 */, | const uint8_t *s1 /* align 8 */, | ||||
const uint8_t *s2 /* align 8 */, | const uint8_t *s2 /* align 8 */, | ||||
ptrdiff_t stride); | ptrdiff_t stride); | ||||
void (*diff_pixels_unaligned)(int16_t *av_restrict block /* align 16 */, | |||||
const uint8_t *s1, | |||||
const uint8_t *s2, | |||||
ptrdiff_t stride); | |||||
} PixblockDSPContext; | } PixblockDSPContext; | ||||
void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); | void ff_pixblockdsp_init(PixblockDSPContext *c, AVCodecContext *avctx); | ||||
@@ -39,12 +39,14 @@ av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c, | |||||
if (EXTERNAL_MMX(cpu_flags)) { | if (EXTERNAL_MMX(cpu_flags)) { | ||||
if (!high_bit_depth) | if (!high_bit_depth) | ||||
c->get_pixels = ff_get_pixels_mmx; | c->get_pixels = ff_get_pixels_mmx; | ||||
c->diff_pixels_unaligned = | |||||
c->diff_pixels = ff_diff_pixels_mmx; | c->diff_pixels = ff_diff_pixels_mmx; | ||||
} | } | ||||
if (EXTERNAL_SSE2(cpu_flags)) { | if (EXTERNAL_SSE2(cpu_flags)) { | ||||
if (!high_bit_depth) | if (!high_bit_depth) | ||||
c->get_pixels = ff_get_pixels_sse2; | c->get_pixels = ff_get_pixels_sse2; | ||||
c->diff_pixels_unaligned = | |||||
c->diff_pixels = ff_diff_pixels_sse2; | c->diff_pixels = ff_diff_pixels_sse2; | ||||
} | } | ||||
} | } |