Originally committed as revision 2372 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -2526,6 +2526,24 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |||
| dst[i+0] = src1[i+0]-src2[i+0]; | |||
| } | |||
| static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | |||
| int i; | |||
| uint8_t l, lt; | |||
| l= *left; | |||
| lt= *left_top; | |||
| for(i=0; i<w; i++){ | |||
| const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |||
| lt= src1[i]; | |||
| l= src2[i]; | |||
| dst[i]= l - pred; | |||
| } | |||
| *left= l; | |||
| *left_top= lt; | |||
| } | |||
| #define BUTTERFLY2(o1,o2,i1,i2) \ | |||
| o1= (i1)+(i2);\ | |||
| o2= (i1)-(i2); | |||
| @@ -3007,6 +3025,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||
| c->add_bytes= add_bytes_c; | |||
| c->diff_bytes= diff_bytes_c; | |||
| c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c; | |||
| c->bswap_buf= bswap_buf; | |||
| #ifdef HAVE_MMX | |||
| @@ -234,6 +234,11 @@ typedef struct DSPContext { | |||
| /* huffyuv specific */ | |||
| void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); | |||
| void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); | |||
| /** | |||
| * subtract huffyuv's variant of median prediction | |||
| * note, this might read from src1[-1], src2[-1] | |||
| */ | |||
| void (*sub_hfyu_median_prediction)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top); | |||
| void (*bswap_buf)(uint32_t *dst, uint32_t *src, int w); | |||
| /* (I)DCT */ | |||
| @@ -153,25 +153,6 @@ static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *d | |||
| *left_top= lt; | |||
| } | |||
| //FIXME optimize | |||
| static inline void sub_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | |||
| int i; | |||
| uint8_t l, lt; | |||
| l= *left; | |||
| lt= *left_top; | |||
| for(i=0; i<w; i++){ | |||
| const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF); | |||
| lt= src1[i]; | |||
| l= src2[i]; | |||
| dst[i]= l - pred; | |||
| } | |||
| *left= l; | |||
| *left_top= lt; | |||
| } | |||
| static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w, int *red, int *green, int *blue){ | |||
| int i; | |||
| int r,g,b; | |||
| @@ -999,9 +980,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, | |||
| lefttopy= p->data[0][3]; | |||
| lefttopu= p->data[1][1]; | |||
| lefttopv= p->data[2][1]; | |||
| sub_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); | |||
| sub_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); | |||
| sub_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[0], p->data[0]+4, p->data[0] + fake_ystride+4, width-4 , &lefty, &lefttopy); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[1], p->data[1]+2, p->data[1] + fake_ustride+2, width2-2, &leftu, &lefttopu); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[2], p->data[2]+2, p->data[2] + fake_vstride+2, width2-2, &leftv, &lefttopv); | |||
| encode_422_bitstream(s, width-4); | |||
| y++; cy++; | |||
| @@ -1011,7 +992,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, | |||
| if(s->bitstream_bpp==12){ | |||
| while(2*cy > y){ | |||
| ydst= p->data[0] + p->linesize[0]*y; | |||
| sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); | |||
| encode_gray_bitstream(s, width); | |||
| y++; | |||
| } | |||
| @@ -1021,9 +1002,9 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, | |||
| udst= p->data[1] + p->linesize[1]*cy; | |||
| vdst= p->data[2] + p->linesize[2]*cy; | |||
| sub_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); | |||
| sub_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
| sub_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[0], ydst - fake_ystride, ydst, width , &lefty, &lefttopy); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
| s->dsp.sub_hfyu_median_prediction(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
| encode_422_bitstream(s, width); | |||
| } | |||
| @@ -583,6 +583,43 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){ | |||
| for(; i<w; i++) | |||
| dst[i+0] = src1[i+0]-src2[i+0]; | |||
| } | |||
| static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){ | |||
| int i=0; | |||
| uint8_t l, lt; | |||
| asm volatile( | |||
| "1: \n\t" | |||
| "movq -1(%1, %0), %%mm0 \n\t" // LT | |||
| "movq (%1, %0), %%mm1 \n\t" // T | |||
| "movq -1(%2, %0), %%mm2 \n\t" // L | |||
| "movq (%2, %0), %%mm3 \n\t" // X | |||
| "movq %%mm2, %%mm4 \n\t" // L | |||
| "psubb %%mm0, %%mm2 \n\t" | |||
| "paddb %%mm1, %%mm2 \n\t" // L + T - LT | |||
| "movq %%mm4, %%mm5 \n\t" // L | |||
| "pmaxub %%mm1, %%mm4 \n\t" // max(T, L) | |||
| "pminub %%mm5, %%mm1 \n\t" // min(T, L) | |||
| "pminub %%mm2, %%mm4 \n\t" | |||
| "pmaxub %%mm1, %%mm4 \n\t" | |||
| "psubb %%mm4, %%mm3 \n\t" // dst - pred | |||
| "movq %%mm3, (%3, %0) \n\t" | |||
| "addl $8, %0 \n\t" | |||
| "cmpl %4, %0 \n\t" | |||
| " jb 1b \n\t" | |||
| : "+r" (i) | |||
| : "r"(src1), "r"(src2), "r"(dst), "r"(w) | |||
| ); | |||
| l= *left; | |||
| lt= *left_top; | |||
| dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF); | |||
| *left_top= src1[w-1]; | |||
| *left = src2[w-1]; | |||
| } | |||
| #define LBUTTERFLY2(a1,b1,a2,b2)\ | |||
| "paddw " #b1 ", " #a1 " \n\t"\ | |||
| "paddw " #b2 ", " #a2 " \n\t"\ | |||
| @@ -1699,6 +1736,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) | |||
| SET_QPEL_FUNC(qpel_pixels_tab[1][14], qpel8_mc23_mmx2) | |||
| SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_mmx2) | |||
| #endif | |||
| c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; | |||
| } else if (mm_flags & MM_3DNOW) { | |||
| c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; | |||
| c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; | |||