Originally committed as revision 4824 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
| @@ -1487,6 +1487,17 @@ H264_CHROMA_MC(avg_ , op_avg) | |||||
| #undef op_avg | #undef op_avg | ||||
| #undef op_put | #undef op_put | ||||
| static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | |||||
| { | |||||
| int i; | |||||
| for(i=0; i<h; i++) | |||||
| { | |||||
| ST16(dst , LD16(src )); | |||||
| dst+=dstStride; | |||||
| src+=srcStride; | |||||
| } | |||||
| } | |||||
| static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h) | ||||
| { | { | ||||
| int i; | int i; | ||||
| @@ -2052,6 +2063,68 @@ QPEL_MC(0, avg_ , _ , op_avg) | |||||
| #if 1 | #if 1 | ||||
| #define H264_LOWPASS(OPNAME, OP, OP2) \ | #define H264_LOWPASS(OPNAME, OP, OP2) \ | ||||
| static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |||||
| const int h=2;\ | |||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |||||
| int i;\ | |||||
| for(i=0; i<h; i++)\ | |||||
| {\ | |||||
| OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\ | |||||
| OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\ | |||||
| dst+=dstStride;\ | |||||
| src+=srcStride;\ | |||||
| }\ | |||||
| }\ | |||||
| \ | |||||
| static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | |||||
| const int w=2;\ | |||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |||||
| int i;\ | |||||
| for(i=0; i<w; i++)\ | |||||
| {\ | |||||
| const int srcB= src[-2*srcStride];\ | |||||
| const int srcA= src[-1*srcStride];\ | |||||
| const int src0= src[0 *srcStride];\ | |||||
| const int src1= src[1 *srcStride];\ | |||||
| const int src2= src[2 *srcStride];\ | |||||
| const int src3= src[3 *srcStride];\ | |||||
| const int src4= src[4 *srcStride];\ | |||||
| OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\ | |||||
| OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\ | |||||
| dst++;\ | |||||
| src++;\ | |||||
| }\ | |||||
| }\ | |||||
| \ | |||||
| static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\ | |||||
| const int h=2;\ | |||||
| const int w=2;\ | |||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | |||||
| int i;\ | |||||
| src -= 2*srcStride;\ | |||||
| for(i=0; i<h+5; i++)\ | |||||
| {\ | |||||
| tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\ | |||||
| tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\ | |||||
| tmp+=tmpStride;\ | |||||
| src+=srcStride;\ | |||||
| }\ | |||||
| tmp -= tmpStride*(h+5-2);\ | |||||
| for(i=0; i<w; i++)\ | |||||
| {\ | |||||
| const int tmpB= tmp[-2*tmpStride];\ | |||||
| const int tmpA= tmp[-1*tmpStride];\ | |||||
| const int tmp0= tmp[0 *tmpStride];\ | |||||
| const int tmp1= tmp[1 *tmpStride];\ | |||||
| const int tmp2= tmp[2 *tmpStride];\ | |||||
| const int tmp3= tmp[3 *tmpStride];\ | |||||
| const int tmp4= tmp[4 *tmpStride];\ | |||||
| OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\ | |||||
| OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\ | |||||
| dst++;\ | |||||
| tmp++;\ | |||||
| }\ | |||||
| }\ | |||||
| static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\ | ||||
| const int h=4;\ | const int h=4;\ | ||||
| uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | uint8_t *cm = cropTbl + MAX_NEG_CROP;\ | ||||
| @@ -2398,6 +2471,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, i | |||||
| H264_LOWPASS(put_ , op_put, op2_put) | H264_LOWPASS(put_ , op_put, op2_put) | ||||
| H264_LOWPASS(avg_ , op_avg, op2_avg) | H264_LOWPASS(avg_ , op_avg, op2_avg) | ||||
| H264_MC(put_, 2) | |||||
| H264_MC(put_, 4) | H264_MC(put_, 4) | ||||
| H264_MC(put_, 8) | H264_MC(put_, 8) | ||||
| H264_MC(put_, 16) | H264_MC(put_, 16) | ||||
| @@ -3879,6 +3953,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| dspfunc(put_h264_qpel, 0, 16); | dspfunc(put_h264_qpel, 0, 16); | ||||
| dspfunc(put_h264_qpel, 1, 8); | dspfunc(put_h264_qpel, 1, 8); | ||||
| dspfunc(put_h264_qpel, 2, 4); | dspfunc(put_h264_qpel, 2, 4); | ||||
| dspfunc(put_h264_qpel, 3, 2); | |||||
| dspfunc(avg_h264_qpel, 0, 16); | dspfunc(avg_h264_qpel, 0, 16); | ||||
| dspfunc(avg_h264_qpel, 1, 8); | dspfunc(avg_h264_qpel, 1, 8); | ||||
| dspfunc(avg_h264_qpel, 2, 4); | dspfunc(avg_h264_qpel, 2, 4); | ||||
| @@ -253,8 +253,8 @@ typedef struct DSPContext { | |||||
| h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | h264_chroma_mc_func put_h264_chroma_pixels_tab[3]; | ||||
| h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | h264_chroma_mc_func avg_h264_chroma_pixels_tab[3]; | ||||
| qpel_mc_func put_h264_qpel_pixels_tab[3][16]; | |||||
| qpel_mc_func avg_h264_qpel_pixels_tab[3][16]; | |||||
| qpel_mc_func put_h264_qpel_pixels_tab[4][16]; | |||||
| qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; | |||||
| h264_weight_func weight_h264_pixels_tab[10]; | h264_weight_func weight_h264_pixels_tab[10]; | ||||
| h264_biweight_func biweight_h264_pixels_tab[10]; | h264_biweight_func biweight_h264_pixels_tab[10]; | ||||
| @@ -510,6 +510,7 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed)); | |||||
| #define LD32(a) (((const struct unaligned_32 *) (a))->l) | #define LD32(a) (((const struct unaligned_32 *) (a))->l) | ||||
| #define LD64(a) (((const struct unaligned_64 *) (a))->l) | #define LD64(a) (((const struct unaligned_64 *) (a))->l) | ||||
| #define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b) | |||||
| #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b) | ||||
| #else /* __GNUC__ */ | #else /* __GNUC__ */ | ||||
| @@ -2500,6 +2500,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, | |||||
| int my= block->my*scale; | int my= block->my*scale; | ||||
| const int dx= mx&15; | const int dx= mx&15; | ||||
| const int dy= my&15; | const int dy= my&15; | ||||
| const int tab_index= 3 - (b_w>>2) + (b_w>>4); | |||||
| sx += (mx>>4) - 2; | sx += (mx>>4) - 2; | ||||
| sy += (my>>4) - 2; | sy += (my>>4) - 2; | ||||
| src += sx + sy*stride; | src += sx + sy*stride; | ||||
| @@ -2511,17 +2512,18 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, | |||||
| assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); | assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h); | ||||
| assert(!(b_w&(b_w-1))); | assert(!(b_w&(b_w-1))); | ||||
| assert(b_w>1 && b_h>1); | assert(b_w>1 && b_h>1); | ||||
| if((dx&3) || (dy&3) || b_w==2 || b_h==2) | |||||
| assert(tab_index>=0 && tab_index<4); | |||||
| if((dx&3) || (dy&3)) | |||||
| mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); | mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); | ||||
| else if(b_w==b_h) | else if(b_w==b_h) | ||||
| s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); | |||||
| else if(b_w==2*b_h){ | else if(b_w==2*b_h){ | ||||
| s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); | |||||
| }else{ | }else{ | ||||
| assert(2*b_w==b_h); | assert(2*b_w==b_h); | ||||
| s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); | |||||
| s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -123,8 +123,8 @@ stddev: 0.00 PSNR:99.99 bytes:7602176 | |||||
| 1197138 ./data/a-snow.avi | 1197138 ./data/a-snow.avi | ||||
| e7c746171b092266b0cf55bb5de2a40a *./data/out.yuv | e7c746171b092266b0cf55bb5de2a40a *./data/out.yuv | ||||
| stddev: 2.89 PSNR:38.87 bytes:7602176 | stddev: 2.89 PSNR:38.87 bytes:7602176 | ||||
| 94bedf8929178a8202ae3b5dbcdb84dd *./data/a-snow53.avi | |||||
| 3533696 ./data/a-snow53.avi | |||||
| 11fd61ee7e67ef7a7b2a3df973691305 *./data/a-snow53.avi | |||||
| 3533710 ./data/a-snow53.avi | |||||
| 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv | 799d3db687f6cdd7a837ec156efc171f *./data/out.yuv | ||||
| stddev: 0.00 PSNR:99.99 bytes:7602176 | stddev: 0.00 PSNR:99.99 bytes:7602176 | ||||
| e1da20e3f52f4d2aa18e9486986161fc *./data/a-dv.dv | e1da20e3f52f4d2aa18e9486986161fc *./data/a-dv.dv | ||||
| @@ -123,8 +123,8 @@ b926518ac399c7af0f218a7115315b4f *./data/a-snow.avi | |||||
| 286800 ./data/a-snow.avi | 286800 ./data/a-snow.avi | ||||
| 6c59db71d950610f854d05e2cef18609 *./data/out.yuv | 6c59db71d950610f854d05e2cef18609 *./data/out.yuv | ||||
| stddev: 2.32 PSNR:40.80 bytes:7602176 | stddev: 2.32 PSNR:40.80 bytes:7602176 | ||||
| 4d2bcc832e318fad3c25614e31daa6fe *./data/a-snow53.avi | |||||
| 2725630 ./data/a-snow53.avi | |||||
| 3f20642bb789dfb75ae3e8c03f9b425c *./data/a-snow53.avi | |||||
| 2725570 ./data/a-snow53.avi | |||||
| dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv | dde5895817ad9d219f79a52d0bdfb001 *./data/out.yuv | ||||
| stddev: 0.00 PSNR:99.99 bytes:7602176 | stddev: 0.00 PSNR:99.99 bytes:7602176 | ||||
| a553532dcd54c1c421b52c3b6fece6ef *./data/a-dv.dv | a553532dcd54c1c421b52c3b6fece6ef *./data/a-dv.dv | ||||