dsputil: Move thirdpel-related bits into their own context

12 years ago · 57f09608e1
--- a/configure
+++ b/configure
@@ -1537,6 +1537,7 @@ CONFIG_EXTRA="
    rtpdec
    rtpenc_chain
    sinewin
    tpeldsp
    videodsp
    vp3dsp
 "
@@ -1820,7 +1821,7 @@ sipr_decoder_select="lsp"
 sp5x_decoder_select="mjpeg_decoder"
 svq1_decoder_select="hpeldsp"
 svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc"
 svq3_decoder_select="h264_decoder hpeldsp"
 svq3_decoder_select="h264_decoder hpeldsp tpeldsp"
 svq3_decoder_suggest="zlib"
 tak_decoder_select="dsputil"
 theora_decoder_select="vp3_decoder"
--- a/doc/optimization.txt
+++ b/doc/optimization.txt
@@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4
    Just used to work around a bug in an old libavcodec encoder version.
    Don't optimize them.

 tpel_mc_func {put,avg}_tpel_pixels_tab
    Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding.

 add_bytes/diff_bytes
    For huffyuv only, optimize if you want a faster ffhuffyuv codec.

--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@@ -65,6 +65,7 @@ OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
 OBJS-$(CONFIG_TPELDSP)                 += tpeldsp.o
 OBJS-$(CONFIG_VAAPI)                   += vaapi.o
 OBJS-$(CONFIG_VDPAU)                   += vdpau.o
 OBJS-$(CONFIG_VIDEODSP)                += videodsp.o
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -48,6 +48,7 @@ uint32_t ff_square_tab[512] = { 0, };
 #undef BIT_DEPTH

 #define BIT_DEPTH 8
 #include "tpel_template.c"
 #include "dsputil_template.c"

 // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
@@ -540,284 +541,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
    }
 }

 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    switch (width) {
    case 2:
        put_pixels2_8_c(dst, src, stride, height);
        break;
    case 4:
        put_pixels4_8_c(dst, src, stride, height);
        break;
    case 8:
        put_pixels8_8_c(dst, src, stride, height);
        break;
    case 16:
        put_pixels16_8_c(dst, src, stride, height);
        break;
    }
 }

 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    switch (width) {
    case 2:
        avg_pixels2_8_c(dst, src, stride, height);
        break;
    case 4:
        avg_pixels4_8_c(dst, src, stride, height);
        break;
    case 8:
        avg_pixels8_8_c(dst, src, stride, height);
        break;
    case 16:
        avg_pixels16_8_c(dst, src, stride, height);
        break;
    }
 }

 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j] + src[j + 1] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((src[j] + 2 * src[j + 1] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j] + src[j + stride] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((4 * src[j]          + 3 * src[j + 1] +
                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((3 * src[j]          + 2 * src[j + 1] +
                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((src[j] + 2 * src[j + stride] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((3 * src[j]          + 4 * src[j + 1] +
                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j]          + 3 * src[j + 1] +
                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 #define QPEL_MC(r, OPNAME, RND, OP)                                           \
 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src,       \
                                            int dstStride, int srcStride,     \
@@ -2781,26 +2504,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
    c->pix_abs[1][2] = pix_abs8_y2_c;
    c->pix_abs[1][3] = pix_abs8_xy2_c;

    c->put_tpel_pixels_tab[0]  = put_tpel_pixels_mc00_c;
    c->put_tpel_pixels_tab[1]  = put_tpel_pixels_mc10_c;
    c->put_tpel_pixels_tab[2]  = put_tpel_pixels_mc20_c;
    c->put_tpel_pixels_tab[4]  = put_tpel_pixels_mc01_c;
    c->put_tpel_pixels_tab[5]  = put_tpel_pixels_mc11_c;
    c->put_tpel_pixels_tab[6]  = put_tpel_pixels_mc21_c;
    c->put_tpel_pixels_tab[8]  = put_tpel_pixels_mc02_c;
    c->put_tpel_pixels_tab[9]  = put_tpel_pixels_mc12_c;
    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;

    c->avg_tpel_pixels_tab[0]  = avg_tpel_pixels_mc00_c;
    c->avg_tpel_pixels_tab[1]  = avg_tpel_pixels_mc10_c;
    c->avg_tpel_pixels_tab[2]  = avg_tpel_pixels_mc20_c;
    c->avg_tpel_pixels_tab[4]  = avg_tpel_pixels_mc01_c;
    c->avg_tpel_pixels_tab[5]  = avg_tpel_pixels_mc11_c;
    c->avg_tpel_pixels_tab[6]  = avg_tpel_pixels_mc21_c;
    c->avg_tpel_pixels_tab[8]  = avg_tpel_pixels_mc02_c;
    c->avg_tpel_pixels_tab[9]  = avg_tpel_pixels_mc12_c;
    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;

 #define dspfunc(PFX, IDX, NUM)                              \
    c->PFX ## _pixels_tab[IDX][0]  = PFX ## NUM ## _mc00_c; \
    c->PFX ## _pixels_tab[IDX][1]  = PFX ## NUM ## _mc10_c; \
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
 * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16.
 * h for op_pixels_func is limited to { width / 2, width },
 * but never larger than 16 and never smaller than 4. */
 typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
                             const uint8_t *pixels /* align 1 */,
                             int line_size, int w, int h);
 typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */,
                             uint8_t *src /* align 1 */, ptrdiff_t stride);

@@ -188,19 +185,6 @@ typedef struct DSPContext {
    int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2,
                             int size);

    /**
     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
     * this is an array[12] of motion compensation functions for the
     * 9 thirdpel positions<br>
     * *pixels_tab[xthirdpel + 4 * ythirdpel]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?

    qpel_mc_func put_qpel_pixels_tab[2][16];
    qpel_mc_func avg_qpel_pixels_tab[2][16];
    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
--- a/libavcodec/h264qpel_template.c
+++ b/libavcodec/h264qpel_template.c
@@ -24,6 +24,7 @@

 #include "bit_depth_template.c"
 #include "hpel_template.c"
 #include "tpel_template.c"

 static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h)
 {
--- a/libavcodec/hpel_template.c
+++ b/libavcodec/hpel_template.c
@@ -22,47 +22,6 @@
 #include "pixels.h"

 #define DEF_HPEL(OPNAME, OP)                                            \
 static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst,            \
                                               const uint8_t *src1,     \
                                               const uint8_t *src2,     \
@@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst,           \
                                dst_stride, src_stride1,                \
                                src_stride2, h);                        \
 }                                                                       \
                                                                        \
 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
               FUNCC(OPNAME ## _pixels8),                               \
               8 * sizeof(pixel))

 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
 #define op_put(a, b) a = b
--- a/libavcodec/hpeldsp_template.c
+++ b/libavcodec/hpeldsp_template.c
@@ -33,6 +33,7 @@
 #include "bit_depth_template.c"

 #include "hpel_template.c"
 #include "tpel_template.c"

 #define PIXOP2(OPNAME, OP)                                              \
 static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst,     \
--- a/libavcodec/svq3.c
+++ b/libavcodec/svq3.c
@@ -54,6 +54,7 @@
 #include "golomb.h"
 #include "hpeldsp.h"
 #include "rectangle.h"
 #include "tpeldsp.h"

 #if CONFIG_ZLIB
 #include <zlib.h>
@@ -70,6 +71,7 @@
 typedef struct {
    H264Context h;
    HpelDSPContext hdsp;
    TpelDSPContext tdsp;
    H264Picture *cur_pic;
    H264Picture *next_pic;
    H264Picture *last_pic;
@@ -321,9 +323,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
        src = h->edge_emu_buffer;
    }
    if (thirdpel)
        (avg ? h->dsp.avg_tpel_pixels_tab
             : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
                                                width, height);
        (avg ? s->tdsp.avg_tpel_pixels_tab
             : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
                                                 width, height);
    else
        (avg ? s->hdsp.avg_pixels_tab
             : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
@@ -349,10 +351,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s,
                src = h->edge_emu_buffer;
            }
            if (thirdpel)
                (avg ? h->dsp.avg_tpel_pixels_tab
                     : h->dsp.put_tpel_pixels_tab)[dxy](dest, src,
                                                        h->uvlinesize,
                                                        width, height);
                (avg ? s->tdsp.avg_tpel_pixels_tab
                     : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
                                                         h->uvlinesize,
                                                         width, height);
            else
                (avg ? s->hdsp.avg_pixels_tab
                     : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
@@ -881,6 +883,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx)
        return -1;

    ff_hpeldsp_init(&s->hdsp, avctx->flags);
    ff_tpeldsp_init(&s->tdsp);

    h->flags           = avctx->flags;
    h->is_complex      = 1;
    h->picture_structure = PICT_FRAME;
--- a/libavcodec/tpel_template.c
+++ b/libavcodec/tpel_template.c
@@ -0,0 +1,80 @@
 /*
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 #include <stddef.h>
 #include <stdint.h>

 #include "libavutil/intreadwrite.h"
 #include "pixels.h"
 #include "rnd_avg.h"

 #include "bit_depth_template.c"

 #define DEF_TPEL(OPNAME, OP)                                            \
 static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel2 *) block), AV_RN2P(pixels));                       \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block,            \
                                             const uint8_t *pixels,     \
                                             ptrdiff_t line_size,       \
                                             int h)                     \
 {                                                                       \
    int i;                                                              \
    for (i = 0; i < h; i++) {                                           \
        OP(*((pixel4 *) block), AV_RN4P(pixels));                       \
        OP(*((pixel4 *) (block + 4 * sizeof(pixel))),                   \
           AV_RN4P(pixels + 4 * sizeof(pixel)));                        \
        pixels += line_size;                                            \
        block  += line_size;                                            \
    }                                                                   \
 }                                                                       \
                                                                        \
 CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16),                              \
               FUNCC(OPNAME ## _pixels8),                               \
               8 * sizeof(pixel))

 #define op_avg(a, b) a = rnd_avg_pixel4(a, b)
 #define op_put(a, b) a = b

 DEF_TPEL(avg, op_avg)
 DEF_TPEL(put, op_put)
 #undef op_avg
 #undef op_put
--- a/libavcodec/tpeldsp.c
+++ b/libavcodec/tpeldsp.c
@@ -0,0 +1,333 @@
 /*
 * thirdpel DSP functions
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 /**
 * @file
 * thirdpel DSP functions
 */

 #include <stdint.h>

 #include "libavutil/attributes.h"
 #include "tpeldsp.h"

 #define BIT_DEPTH 8
 #include "tpel_template.c"

 static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    switch (width) {
    case 2:
        put_pixels2_8_c(dst, src, stride, height);
        break;
    case 4:
        put_pixels4_8_c(dst, src, stride, height);
        break;
    case 8:
        put_pixels8_8_c(dst, src, stride, height);
        break;
    case 16:
        put_pixels16_8_c(dst, src, stride, height);
        break;
    }
 }

 static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j] + src[j + 1] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((src[j] + 2 * src[j + 1] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j] + src[j + stride] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((4 * src[j]          + 3 * src[j + 1] +
                       3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((3 * src[j]          + 2 * src[j + 1] +
                       4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((src[j] + 2 * src[j + stride] + 1) *
                      683) >> 11;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((3 * src[j]          + 4 * src[j + 1] +
                       2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = ((2 * src[j]          + 3 * src[j + 1] +
                       3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                      2731) >> 15;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    switch (width) {
    case 2:
        avg_pixels2_8_c(dst, src, stride, height);
        break;
    case 4:
        avg_pixels4_8_c(dst, src, stride, height);
        break;
    case 8:
        avg_pixels8_8_c(dst, src, stride, height);
        break;
    case 16:
        avg_pixels16_8_c(dst, src, stride, height);
        break;
    }
 }

 static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j] + src[j + 1] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((src[j] + 2 * src[j + 1] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j] + src[j + stride] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((4 * src[j]          + 3 * src[j + 1] +
                         3 * src[j + stride] + 2 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((3 * src[j]          + 2 * src[j + 1] +
                         4 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((src[j] + 2 * src[j + stride] + 1) *
                        683) >> 11) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((3 * src[j]          + 4 * src[j + 1] +
                         2 * src[j + stride] + 3 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src,
                                          int stride, int width, int height)
 {
    int i, j;

    for (i = 0; i < height; i++) {
        for (j = 0; j < width; j++)
            dst[j] = (dst[j] +
                      (((2 * src[j]          + 3 * src[j + 1] +
                         3 * src[j + stride] + 4 * src[j + stride + 1] + 6) *
                        2731) >> 15) + 1) >> 1;
        src += stride;
        dst += stride;
    }
 }

 av_cold void ff_tpeldsp_init(TpelDSPContext *c)
 {
    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;

    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
 }
--- a/libavcodec/tpeldsp.h
+++ b/libavcodec/tpeldsp.h
@@ -0,0 +1,59 @@
 /*
 * thirdpel DSP functions
 *
 * This file is part of Libav.
 *
 * Libav is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * Libav is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with Libav; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 /**
 * @file
 * thirdpel DSP functions
 */

 #ifndef AVCODEC_TPELDSP_H
 #define AVCODEC_TPELDSP_H

 #include <stdint.h>

 /* add and put pixel (decoding) */
 // blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16
 // h for hpel_pixels_func is limited to {width/2, width} but never larger
 // than 16 and never smaller than 4
 typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */,
                             const uint8_t *pixels /* align 1 */,
                             int line_size, int w, int h);

 /**
 * thirdpel DSP context
 */
 typedef struct TpelDSPContext {
    /**
     * Thirdpel motion compensation with rounding (a + b + 1) >> 1.
     * this is an array[12] of motion compensation functions for the
     * 9 thirdpel positions<br>
     * *pixels_tab[xthirdpel + 4 * ythirdpel]
     * @param block destination where the result is stored
     * @param pixels source
     * @param line_size number of bytes in a horizontal line of block
     * @param h height
     */
    tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
    tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width?
 } TpelDSPContext;

 void ff_tpeldsp_init(TpelDSPContext *c);

 #endif /* AVCODEC_TPELDSP_H */