fixed at least: CVFI1_Sony_D.jsv CVFI1_SVA_C.264 MR6_BT_B.h264 Originally committed as revision 14310 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -347,7 +347,7 @@ typedef struct DSPContext { | |||||
void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); | void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta); | ||||
// h264_loop_filter_strength: simd only. the C version is inlined in h264.c | // h264_loop_filter_strength: simd only. the C version is inlined in h264.c | ||||
void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | void (*h264_loop_filter_strength)(int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | ||||
int bidir, int edges, int step, int mask_mv0, int mask_mv1); | |||||
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field); | |||||
void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale); | ||||
@@ -6495,7 +6495,7 @@ static void filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, | |||||
int step = IS_8x8DCT(mb_type) ? 2 : 1; | int step = IS_8x8DCT(mb_type) ? 2 : 1; | ||||
edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | edges = (mb_type & MB_TYPE_16x16) && !(h->cbp & 15) ? 1 : 4; | ||||
s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | s->dsp.h264_loop_filter_strength( bS, h->non_zero_count_cache, h->ref_cache, h->mv_cache, | ||||
(h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1 ); | |||||
(h->slice_type == FF_B_TYPE), edges, step, mask_edge0, mask_edge1, FIELD_PICTURE); | |||||
} | } | ||||
if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) | if( IS_INTRA(s->current_picture.mb_type[mb_xy-1]) ) | ||||
bSv[0][0] = 0x0004000400040004ULL; | bSv[0][0] = 0x0004000400040004ULL; | ||||
@@ -20,6 +20,9 @@ | |||||
#include "dsputil_mmx.h" | #include "dsputil_mmx.h" | ||||
DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; | |||||
DECLARE_ALIGNED_8 (static const uint64_t, ff_pb_7_3 ) = 0x0307030703070307ULL; | |||||
/***********************************/ | /***********************************/ | ||||
/* IDCT */ | /* IDCT */ | ||||
@@ -623,7 +626,7 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a | |||||
} | } | ||||
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], | ||||
int bidir, int edges, int step, int mask_mv0, int mask_mv1 ) { | |||||
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { | |||||
int dir; | int dir; | ||||
asm volatile( | asm volatile( | ||||
"pxor %%mm7, %%mm7 \n\t" | "pxor %%mm7, %%mm7 \n\t" | ||||
@@ -632,6 +635,13 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40] | |||||
"movq %2, %%mm4 \n\t" | "movq %2, %%mm4 \n\t" | ||||
::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) | ::"m"(ff_pb_1), "m"(ff_pb_3), "m"(ff_pb_7) | ||||
); | ); | ||||
if(field) | |||||
asm volatile( | |||||
"movq %0, %%mm5 \n\t" | |||||
"movq %1, %%mm4 \n\t" | |||||
::"m"(ff_pb_3_1), "m"(ff_pb_7_3) | |||||
); | |||||
// could do a special case for dir==0 && edges==1, but it only reduces the | // could do a special case for dir==0 && edges==1, but it only reduces the | ||||
// average filter time by 1.2% | // average filter time by 1.2% | ||||
for( dir=1; dir>=0; dir-- ) { | for( dir=1; dir>=0; dir-- ) { | ||||