Originally committed as revision 5203 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -3773,6 +3773,8 @@ static void ff_jref_idct1_add(uint8_t *dest, int line_size, DCTELEM *block) | |||||
| dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | dest[0] = cm[dest[0] + ((block[0] + 4)>>3)]; | ||||
| } | } | ||||
| static void just_return() { return; } | |||||
| /* init static data */ | /* init static data */ | ||||
| void dsputil_static_init(void) | void dsputil_static_init(void) | ||||
| { | { | ||||
| @@ -4054,6 +4056,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| c->inner_add_yblock = ff_snow_inner_add_yblock; | c->inner_add_yblock = ff_snow_inner_add_yblock; | ||||
| #endif | #endif | ||||
| c->prefetch= just_return; | |||||
| #ifdef HAVE_MMX | #ifdef HAVE_MMX | ||||
| dsputil_init_mmx(c, avctx); | dsputil_init_mmx(c, avctx); | ||||
| #endif | #endif | ||||
| @@ -343,6 +343,8 @@ typedef struct DSPContext { | |||||
| void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | void (*vertical_compose97i)(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width); | ||||
| void (*horizontal_compose97i)(DWTELEM *b, int width); | void (*horizontal_compose97i)(DWTELEM *b, int width); | ||||
| void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | void (*inner_add_yblock)(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h, int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8); | ||||
| void (*prefetch)(void *mem, int stride, int h); | |||||
| } DSPContext; | } DSPContext; | ||||
| void dsputil_static_init(void); | void dsputil_static_init(void); | ||||
| @@ -2752,6 +2752,22 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height, | |||||
| x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); | x_offset, y_offset, qpix_put, chroma_put, qpix_avg, chroma_avg, list0, list1); | ||||
| } | } | ||||
| static inline void prefetch_motion(H264Context *h, int list){ | |||||
| /* fetch pixels for estimated mv 4 macroblocks ahead | |||||
| * optimized for 64byte cache lines */ | |||||
| MpegEncContext * const s = &h->s; | |||||
| const int refn = h->ref_cache[list][scan8[0]]; | |||||
| if(refn >= 0){ | |||||
| const int mx= (h->mv_cache[list][scan8[0]][0]>>2) + 16*s->mb_x + 8; | |||||
| const int my= (h->mv_cache[list][scan8[0]][1]>>2) + 16*s->mb_y; | |||||
| uint8_t **src= h->ref_list[list][refn].data; | |||||
| int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64; | |||||
| s->dsp.prefetch(src[0]+off, s->linesize, 4); | |||||
| off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; | |||||
| s->dsp.prefetch(src[1]+off, src[2]-src[1], 2); | |||||
| } | |||||
| } | |||||
| static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr, | ||||
| qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | qpel_mc_func (*qpix_put)[16], h264_chroma_mc_func (*chroma_put), | ||||
| qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | qpel_mc_func (*qpix_avg)[16], h264_chroma_mc_func (*chroma_avg), | ||||
| @@ -2762,6 +2778,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t | |||||
| assert(IS_INTER(mb_type)); | assert(IS_INTER(mb_type)); | ||||
| prefetch_motion(h, 0); | |||||
| if(IS_16X16(mb_type)){ | if(IS_16X16(mb_type)){ | ||||
| mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0, | ||||
| qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0], | ||||
| @@ -2833,6 +2851,8 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| prefetch_motion(h, 1); | |||||
| } | } | ||||
| static void decode_init_vlc(H264Context *h){ | static void decode_init_vlc(H264Context *h){ | ||||
| @@ -2489,6 +2489,18 @@ static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){ | |||||
| } | } | ||||
| } | } | ||||
| #define PREFETCH(name, op) \ | |||||
| void name(void *mem, int stride, int h){\ | |||||
| const uint8_t *p= mem;\ | |||||
| do{\ | |||||
| asm volatile(#op" %0" :: "m"(*p));\ | |||||
| p+= stride;\ | |||||
| }while(--h);\ | |||||
| } | |||||
| PREFETCH(prefetch_mmx2, prefetcht0) | |||||
| PREFETCH(prefetch_3dnow, prefetch) | |||||
| #undef PREFETCH | |||||
| #include "h264dsp_mmx.c" | #include "h264dsp_mmx.c" | ||||
| /* external functions, from idct_mmx.c */ | /* external functions, from idct_mmx.c */ | ||||
| @@ -2749,6 +2761,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->h264_idct8_add= ff_h264_idct8_add_mmx; | c->h264_idct8_add= ff_h264_idct8_add_mmx; | ||||
| if (mm_flags & MM_MMXEXT) { | if (mm_flags & MM_MMXEXT) { | ||||
| c->prefetch = prefetch_mmx2; | |||||
| c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; | ||||
| c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; | ||||
| @@ -2879,6 +2893,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||||
| c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | ||||
| #endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||
| } else if (mm_flags & MM_3DNOW) { | } else if (mm_flags & MM_3DNOW) { | ||||
| c->prefetch = prefetch_3dnow; | |||||
| c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | ||||
| c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | ||||