* commit '57f09608e1600d1cf1679885a46f5004d522d68f': dsputil: Move thirdpel-related bits into their own context Conflicts: libavcodec/svq3.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -1780,6 +1780,7 @@ CONFIG_EXTRA=" | |||
| rtpdec | |||
| rtpenc_chain | |||
| sinewin | |||
| tpeldsp | |||
| videodsp | |||
| vp3dsp | |||
| " | |||
| @@ -2090,7 +2091,7 @@ sonic_ls_encoder_select="golomb" | |||
| sp5x_decoder_select="mjpeg_decoder" | |||
| svq1_decoder_select="hpeldsp" | |||
| svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" | |||
| svq3_decoder_select="h264_decoder hpeldsp" | |||
| svq3_decoder_select="h264_decoder hpeldsp tpeldsp" | |||
| svq3_decoder_suggest="zlib" | |||
| tak_decoder_select="dsputil" | |||
| theora_decoder_select="vp3_decoder" | |||
| @@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4 | |||
| Just used to work around a bug in an old libavcodec encoder version. | |||
| Don't optimize them. | |||
| tpel_mc_func {put,avg}_tpel_pixels_tab | |||
| Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding. | |||
| add_bytes/diff_bytes | |||
| For huffyuv only, optimize if you want a faster ffhuffyuv codec. | |||
| @@ -76,6 +76,7 @@ RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | |||
| OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) | |||
| OBJS-$(CONFIG_SHARED) += log2_tab.o | |||
| OBJS-$(CONFIG_SINEWIN) += sinewin.o | |||
| OBJS-$(CONFIG_TPELDSP) += tpeldsp.o | |||
| OBJS-$(CONFIG_VAAPI) += vaapi.o | |||
| OBJS-$(CONFIG_VDPAU) += vdpau.o | |||
| OBJS-$(CONFIG_VIDEODSP) += videodsp.o | |||
| @@ -50,6 +50,7 @@ uint32_t ff_square_tab[512] = { 0, }; | |||
| #undef BIT_DEPTH | |||
| #define BIT_DEPTH 8 | |||
| #include "tpel_template.c" | |||
| #include "dsputil_template.c" | |||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
| @@ -604,284 +605,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| switch (width) { | |||
| case 2: | |||
| put_pixels2_8_c(dst, src, stride, height); | |||
| break; | |||
| case 4: | |||
| put_pixels4_8_c(dst, src, stride, height); | |||
| break; | |||
| case 8: | |||
| put_pixels8_8_c(dst, src, stride, height); | |||
| break; | |||
| case 16: | |||
| put_pixels16_8_c(dst, src, stride, height); | |||
| break; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + src[j + 1] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((src[j] + 2 * src[j + 1] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + src[j + stride] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((4 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((3 * src[j] + 2 * src[j + 1] + | |||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((src[j] + 2 * src[j + stride] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((3 * src[j] + 4 * src[j + 1] + | |||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| switch (width) { | |||
| case 2: | |||
| avg_pixels2_8_c(dst, src, stride, height); | |||
| break; | |||
| case 4: | |||
| avg_pixels4_8_c(dst, src, stride, height); | |||
| break; | |||
| case 8: | |||
| avg_pixels8_8_c(dst, src, stride, height); | |||
| break; | |||
| case 16: | |||
| avg_pixels16_8_c(dst, src, stride, height); | |||
| break; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + src[j + 1] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((src[j] + 2 * src[j + 1] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + src[j + stride] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((4 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((3 * src[j] + 2 * src[j + 1] + | |||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((src[j] + 2 * src[j + stride] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((3 * src[j] + 4 * src[j + 1] + | |||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| #define QPEL_MC(r, OPNAME, RND, OP) \ | |||
| static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ | |||
| int dstStride, int srcStride, \ | |||
| @@ -2974,26 +2697,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| c->pix_abs[1][2] = pix_abs8_y2_c; | |||
| c->pix_abs[1][3] = pix_abs8_xy2_c; | |||
| c->put_tpel_pixels_tab[0] = put_tpel_pixels_mc00_c; | |||
| c->put_tpel_pixels_tab[1] = put_tpel_pixels_mc10_c; | |||
| c->put_tpel_pixels_tab[2] = put_tpel_pixels_mc20_c; | |||
| c->put_tpel_pixels_tab[4] = put_tpel_pixels_mc01_c; | |||
| c->put_tpel_pixels_tab[5] = put_tpel_pixels_mc11_c; | |||
| c->put_tpel_pixels_tab[6] = put_tpel_pixels_mc21_c; | |||
| c->put_tpel_pixels_tab[8] = put_tpel_pixels_mc02_c; | |||
| c->put_tpel_pixels_tab[9] = put_tpel_pixels_mc12_c; | |||
| c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | |||
| c->avg_tpel_pixels_tab[0] = avg_tpel_pixels_mc00_c; | |||
| c->avg_tpel_pixels_tab[1] = avg_tpel_pixels_mc10_c; | |||
| c->avg_tpel_pixels_tab[2] = avg_tpel_pixels_mc20_c; | |||
| c->avg_tpel_pixels_tab[4] = avg_tpel_pixels_mc01_c; | |||
| c->avg_tpel_pixels_tab[5] = avg_tpel_pixels_mc11_c; | |||
| c->avg_tpel_pixels_tab[6] = avg_tpel_pixels_mc21_c; | |||
| c->avg_tpel_pixels_tab[8] = avg_tpel_pixels_mc02_c; | |||
| c->avg_tpel_pixels_tab[9] = avg_tpel_pixels_mc12_c; | |||
| c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |||
| #define dspfunc(PFX, IDX, NUM) \ | |||
| c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ | |||
| c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ | |||
| @@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||
| * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | |||
| * h for op_pixels_func is limited to { width / 2, width }, | |||
| * but never larger than 16 and never smaller than 4. */ | |||
| typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, | |||
| const uint8_t *pixels /* align 1 */, | |||
| int line_size, int w, int h); | |||
| typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, | |||
| uint8_t *src /* align 1 */, ptrdiff_t stride); | |||
| @@ -190,19 +187,6 @@ typedef struct DSPContext { | |||
| int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | |||
| int size); | |||
| /** | |||
| * Thirdpel motion compensation with rounding (a + b + 1) >> 1. | |||
| * this is an array[12] of motion compensation functions for the | |||
| * 9 thirdpel positions<br> | |||
| * *pixels_tab[xthirdpel + 4 * ythirdpel] | |||
| * @param block destination where the result is stored | |||
| * @param pixels source | |||
| * @param line_size number of bytes in a horizontal line of block | |||
| * @param h height | |||
| */ | |||
| tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||
| tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||
| qpel_mc_func put_qpel_pixels_tab[2][16]; | |||
| qpel_mc_func avg_qpel_pixels_tab[2][16]; | |||
| qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |||
| @@ -24,6 +24,7 @@ | |||
| #include "bit_depth_template.c" | |||
| #include "hpel_template.c" | |||
| #include "tpel_template.c" | |||
| static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | |||
| { | |||
| @@ -22,47 +22,6 @@ | |||
| #include "pixels.h" | |||
| #define DEF_HPEL(OPNAME, OP) \ | |||
| static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel2 *) block), AV_RN2P(pixels)); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||
| OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ | |||
| AV_RN4P(pixels + 4 * sizeof(pixel))); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, \ | |||
| const uint8_t *src1, \ | |||
| const uint8_t *src2, \ | |||
| @@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, \ | |||
| dst_stride, src_stride1, \ | |||
| src_stride2, h); \ | |||
| } \ | |||
| \ | |||
| CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ | |||
| FUNCC(OPNAME ## _pixels8), \ | |||
| 8 * sizeof(pixel)) | |||
| #define op_avg(a, b) a = rnd_avg_pixel4(a, b) | |||
| #define op_put(a, b) a = b | |||
| @@ -33,6 +33,7 @@ | |||
| #include "bit_depth_template.c" | |||
| #include "hpel_template.c" | |||
| #include "tpel_template.c" | |||
| #define PIXOP2(OPNAME, OP) \ | |||
| static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, \ | |||
| @@ -54,6 +54,7 @@ | |||
| #include "golomb.h" | |||
| #include "hpeldsp.h" | |||
| #include "rectangle.h" | |||
| #include "tpeldsp.h" | |||
| #include "vdpau_internal.h" | |||
| #if CONFIG_ZLIB | |||
| @@ -71,6 +72,7 @@ | |||
| typedef struct { | |||
| H264Context h; | |||
| HpelDSPContext hdsp; | |||
| TpelDSPContext tdsp; | |||
| H264Picture *cur_pic; | |||
| H264Picture *next_pic; | |||
| H264Picture *last_pic; | |||
| @@ -328,9 +330,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, | |||
| src = h->edge_emu_buffer; | |||
| } | |||
| if (thirdpel) | |||
| (avg ? h->dsp.avg_tpel_pixels_tab | |||
| : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, | |||
| width, height); | |||
| (avg ? s->tdsp.avg_tpel_pixels_tab | |||
| : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, | |||
| width, height); | |||
| else | |||
| (avg ? s->hdsp.avg_pixels_tab | |||
| : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize, | |||
| @@ -356,10 +358,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, | |||
| src = h->edge_emu_buffer; | |||
| } | |||
| if (thirdpel) | |||
| (avg ? h->dsp.avg_tpel_pixels_tab | |||
| : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, | |||
| h->uvlinesize, | |||
| width, height); | |||
| (avg ? s->tdsp.avg_tpel_pixels_tab | |||
| : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, | |||
| h->uvlinesize, | |||
| width, height); | |||
| else | |||
| (avg ? s->hdsp.avg_pixels_tab | |||
| : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, | |||
| @@ -887,6 +889,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) | |||
| goto fail; | |||
| ff_hpeldsp_init(&s->hdsp, avctx->flags); | |||
| ff_tpeldsp_init(&s->tdsp); | |||
| h->flags = avctx->flags; | |||
| h->is_complex = 1; | |||
| h->sps.chroma_format_idc = 1; | |||
| @@ -0,0 +1,80 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stddef.h> | |||
| #include <stdint.h> | |||
| #include "libavutil/intreadwrite.h" | |||
| #include "pixels.h" | |||
| #include "rnd_avg.h" | |||
| #include "bit_depth_template.c" | |||
| #define DEF_TPEL(OPNAME, OP) \ | |||
| static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel2 *) block), AV_RN2P(pixels)); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ | |||
| const uint8_t *pixels, \ | |||
| ptrdiff_t line_size, \ | |||
| int h) \ | |||
| { \ | |||
| int i; \ | |||
| for (i = 0; i < h; i++) { \ | |||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||
| OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ | |||
| AV_RN4P(pixels + 4 * sizeof(pixel))); \ | |||
| pixels += line_size; \ | |||
| block += line_size; \ | |||
| } \ | |||
| } \ | |||
| \ | |||
| CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ | |||
| FUNCC(OPNAME ## _pixels8), \ | |||
| 8 * sizeof(pixel)) | |||
| #define op_avg(a, b) a = rnd_avg_pixel4(a, b) | |||
| #define op_put(a, b) a = b | |||
| DEF_TPEL(avg, op_avg) | |||
| DEF_TPEL(put, op_put) | |||
| #undef op_avg | |||
| #undef op_put | |||
| @@ -0,0 +1,333 @@ | |||
| /* | |||
| * thirdpel DSP functions | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| /** | |||
| * @file | |||
| * thirdpel DSP functions | |||
| */ | |||
| #include <stdint.h> | |||
| #include "libavutil/attributes.h" | |||
| #include "tpeldsp.h" | |||
| #define BIT_DEPTH 8 | |||
| #include "tpel_template.c" | |||
| static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| switch (width) { | |||
| case 2: | |||
| put_pixels2_8_c(dst, src, stride, height); | |||
| break; | |||
| case 4: | |||
| put_pixels4_8_c(dst, src, stride, height); | |||
| break; | |||
| case 8: | |||
| put_pixels8_8_c(dst, src, stride, height); | |||
| break; | |||
| case 16: | |||
| put_pixels16_8_c(dst, src, stride, height); | |||
| break; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + src[j + 1] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((src[j] + 2 * src[j + 1] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + src[j + stride] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((4 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((3 * src[j] + 2 * src[j + 1] + | |||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((src[j] + 2 * src[j + stride] + 1) * | |||
| 683) >> 11; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((3 * src[j] + 4 * src[j + 1] + | |||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = ((2 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| switch (width) { | |||
| case 2: | |||
| avg_pixels2_8_c(dst, src, stride, height); | |||
| break; | |||
| case 4: | |||
| avg_pixels4_8_c(dst, src, stride, height); | |||
| break; | |||
| case 8: | |||
| avg_pixels8_8_c(dst, src, stride, height); | |||
| break; | |||
| case 16: | |||
| avg_pixels16_8_c(dst, src, stride, height); | |||
| break; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + src[j + 1] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((src[j] + 2 * src[j + 1] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + src[j + stride] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((4 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((3 * src[j] + 2 * src[j + 1] + | |||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((src[j] + 2 * src[j + stride] + 1) * | |||
| 683) >> 11) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((3 * src[j] + 4 * src[j + 1] + | |||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||
| int stride, int width, int height) | |||
| { | |||
| int i, j; | |||
| for (i = 0; i < height; i++) { | |||
| for (j = 0; j < width; j++) | |||
| dst[j] = (dst[j] + | |||
| (((2 * src[j] + 3 * src[j + 1] + | |||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||
| 2731) >> 15) + 1) >> 1; | |||
| src += stride; | |||
| dst += stride; | |||
| } | |||
| } | |||
| av_cold void ff_tpeldsp_init(TpelDSPContext *c) | |||
| { | |||
| c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; | |||
| c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; | |||
| c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; | |||
| c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; | |||
| c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; | |||
| c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; | |||
| c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; | |||
| c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; | |||
| c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | |||
| c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; | |||
| c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |||
| c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |||
| c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |||
| c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |||
| c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |||
| c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |||
| c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |||
| c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |||
| } | |||
| @@ -0,0 +1,59 @@ | |||
| /* | |||
| * thirdpel DSP functions | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| /** | |||
| * @file | |||
| * thirdpel DSP functions | |||
| */ | |||
| #ifndef AVCODEC_TPELDSP_H | |||
| #define AVCODEC_TPELDSP_H | |||
| #include <stdint.h> | |||
| /* add and put pixel (decoding) */ | |||
| // blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16 | |||
| // h for hpel_pixels_func is limited to {width/2, width} but never larger | |||
| // than 16 and never smaller than 4 | |||
| typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, | |||
| const uint8_t *pixels /* align 1 */, | |||
| int line_size, int w, int h); | |||
| /** | |||
| * thirdpel DSP context | |||
| */ | |||
| typedef struct TpelDSPContext { | |||
| /** | |||
| * Thirdpel motion compensation with rounding (a + b + 1) >> 1. | |||
| * this is an array[12] of motion compensation functions for the | |||
| * 9 thirdpel positions<br> | |||
| * *pixels_tab[xthirdpel + 4 * ythirdpel] | |||
| * @param block destination where the result is stored | |||
| * @param pixels source | |||
| * @param line_size number of bytes in a horizontal line of block | |||
| * @param h height | |||
| */ | |||
| tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||
| tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||
| } TpelDSPContext; | |||
| void ff_tpeldsp_init(TpelDSPContext *c); | |||
| #endif /* AVCODEC_TPELDSP_H */ | |||