Add add_pixels4/8() to h264dsp, and remove add_pixels4 from dsputil.

These functions are mostly H264-specific (the only other user I can spot is bink), and this allows us to special-case some functionality for H264. Also remove the 16-bit-coeff with >8bpp versions (unused) and merge the duplicate 32-bit-coeff for >8bpp (identical). Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
12 years ago · 7ff1a4b10f
--- a/libavcodec/dsputil.c
+++ b/libavcodec/dsputil.c
@@ -443,6 +443,27 @@ static void put_signed_pixels_clamped_c(const int16_t *block,
    }
 }

 static void add_pixels8_c(uint8_t *av_restrict pixels,
                          int16_t *block,
                          int line_size)
 {
    int i;

    for(i=0;i<8;i++) {
        pixels[0] += block[0];
        pixels[1] += block[1];
        pixels[2] += block[2];
        pixels[3] += block[3];
        pixels[4] += block[4];
        pixels[5] += block[5];
        pixels[6] += block[6];
        pixels[7] += block[7];
        pixels += line_size;
        block += 8;
    }
 }


 static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
                                 int line_size)
 {
@@ -2852,6 +2873,8 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
    c->shrink[2]= ff_shrink44;
    c->shrink[3]= ff_shrink88;

    c->add_pixels8 = add_pixels8_c;

 #define hpel_funcs(prefix, idx, num) \
    c->prefix ## _pixels_tab idx [0] = prefix ## _pixels ## num ## _8_c; \
    c->prefix ## _pixels_tab idx [1] = prefix ## _pixels ## num ## _x2_8_c; \
@@ -2879,9 +2902,7 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
    c->get_pixels                    = FUNCC(get_pixels   ## dct   , depth);\
    c->draw_edges                    = FUNCC(draw_edges            , depth);\
    c->clear_block                   = FUNCC(clear_block  ## dct   , depth);\
    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth);\
    c->add_pixels8                   = FUNCC(add_pixels8  ## dct   , depth);\
    c->add_pixels4                   = FUNCC(add_pixels4  ## dct   , depth);\
    c->clear_blocks                  = FUNCC(clear_blocks ## dct   , depth)

    switch (avctx->bits_per_raw_sample) {
    case 9:
--- a/libavcodec/dsputil.h
+++ b/libavcodec/dsputil.h
@@ -155,7 +155,6 @@ typedef struct DSPContext {
    void (*put_signed_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
    void (*add_pixels_clamped)(const int16_t *block/*align 16*/, uint8_t *pixels/*align 8*/, int line_size);
    void (*add_pixels8)(uint8_t *pixels, int16_t *block, int line_size);
    void (*add_pixels4)(uint8_t *pixels, int16_t *block, int line_size);
    int (*sum_abs_dctelem)(int16_t *block/*align 16*/);
    /**
     * translational global motion compensation.
--- a/libavcodec/dsputil_template.c
+++ b/libavcodec/dsputil_template.c
@@ -89,48 +89,6 @@ static void FUNCC(get_pixels ## suffix)(int16_t *av_restrict _block,    \
    }                                                                   \
 }                                                                       \
                                                                        \
 static void FUNCC(add_pixels8 ## suffix)(uint8_t *av_restrict _pixels,  \
                                         int16_t *_block,               \
                                         int line_size)                 \
 {                                                                       \
    int i;                                                              \
    pixel *av_restrict pixels = (pixel *av_restrict)_pixels;            \
    dctcoef *block = (dctcoef*)_block;                                  \
    line_size /= sizeof(pixel);                                         \
                                                                        \
    for(i=0;i<8;i++) {                                                  \
        pixels[0] += block[0];                                          \
        pixels[1] += block[1];                                          \
        pixels[2] += block[2];                                          \
        pixels[3] += block[3];                                          \
        pixels[4] += block[4];                                          \
        pixels[5] += block[5];                                          \
        pixels[6] += block[6];                                          \
        pixels[7] += block[7];                                          \
        pixels += line_size;                                            \
        block += 8;                                                     \
    }                                                                   \
 }                                                                       \
                                                                        \
 static void FUNCC(add_pixels4 ## suffix)(uint8_t *av_restrict _pixels,  \
                                         int16_t *_block,               \
                                         int line_size)                 \
 {                                                                       \
    int i;                                                              \
    pixel *av_restrict pixels = (pixel *av_restrict)_pixels;            \
    dctcoef *block = (dctcoef*)_block;                                  \
    line_size /= sizeof(pixel);                                         \
                                                                        \
    for(i=0;i<4;i++) {                                                  \
        pixels[0] += block[0];                                          \
        pixels[1] += block[1];                                          \
        pixels[2] += block[2];                                          \
        pixels[3] += block[3];                                          \
        pixels += line_size;                                            \
        block += 4;                                                     \
    }                                                                   \
 }                                                                       \
                                                                        \
 static void FUNCC(clear_block ## suffix)(int16_t *block)                \
 {                                                                       \
    memset(block, 0, sizeof(dctcoef)*64);                               \
--- a/libavcodec/h264.c
+++ b/libavcodec/h264.c
@@ -1818,7 +1818,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
            if (IS_8x8DCT(mb_type)) {
                if (transform_bypass) {
                    idct_dc_add  =
                    idct_add     = s->dsp.add_pixels8;
                    idct_add     = h->h264dsp.h264_add_pixels8;
                } else {
                    idct_dc_add = h->h264dsp.h264_idct8_dc_add;
                    idct_add    = h->h264dsp.h264_idct8_add;
@@ -1843,7 +1843,7 @@ static av_always_inline void hl_decode_mb_predict_luma(H264Context *h,
            } else {
                if (transform_bypass) {
                    idct_dc_add  =
                        idct_add = s->dsp.add_pixels4;
                        idct_add = h->h264dsp.h264_add_pixels4;
                } else {
                    idct_dc_add = h->h264dsp.h264_idct_dc_add;
                    idct_add    = h->h264dsp.h264_idct_add;
@@ -1942,9 +1942,9 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
                        for (i = 0; i < 16; i++)
                            if (h->non_zero_count_cache[scan8[i + p * 16]] ||
                                dctcoef_get(h->mb, pixel_shift, i * 16 + p * 256))
                                s->dsp.add_pixels4(dest_y + block_offset[i],
                                                   h->mb + (i * 16 + p * 256 << pixel_shift),
                                                   linesize);
                                h->h264dsp.h264_add_pixels4(dest_y + block_offset[i],
                                                            h->mb + (i * 16 + p * 256 << pixel_shift),
                                                            linesize);
                    }
                } else {
                    h->h264dsp.h264_idct_add16intra(dest_y, block_offset,
@@ -1955,8 +1955,8 @@ static av_always_inline void hl_decode_mb_idct_luma(H264Context *h, int mb_type,
            } else if (h->cbp & 15) {
                if (transform_bypass) {
                    const int di = IS_8x8DCT(mb_type) ? 4 : 1;
                    idct_add = IS_8x8DCT(mb_type) ? s->dsp.add_pixels8
                                                  : s->dsp.add_pixels4;
                    idct_add = IS_8x8DCT(mb_type) ? h->h264dsp.h264_add_pixels8
                                                  : h->h264dsp.h264_add_pixels4;
                    for (i = 0; i < 16; i += di)
                        if (h->non_zero_count_cache[scan8[i + p * 16]])
                            idct_add(dest_y + block_offset[i],
--- a/libavcodec/h264_mb_template.c
+++ b/libavcodec/h264_mb_template.c
@@ -205,7 +205,7 @@ static av_noinline void FUNC(hl_decode_mb)(H264Context *h)
                                                            h->mb + (16 * 16 * 2 << PIXEL_SHIFT),
                                                            uvlinesize);
                } else {
                    idct_add = s->dsp.add_pixels4;
                    idct_add = h->h264dsp.h264_add_pixels4;
                    for (j = 1; j < 3; j++) {
                        for (i = j * 16; i < j * 16 + 4; i++)
                            if (h->non_zero_count_cache[scan8[i]] ||
--- a/libavcodec/h264addpx_template.c
+++ b/libavcodec/h264addpx_template.c
@@ -0,0 +1,68 @@
 /*
 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
 * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

 /**
 * @file
 * H.264 / AVC / MPEG4 part10 DSP functions.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

 #include "bit_depth_template.c"

 static void FUNCC(ff_h264_add_pixels4)(uint8_t *_dst, int16_t *_src, int stride)
 {
    int i;
    pixel *dst = (pixel *) _dst;
    dctcoef *src = (dctcoef *) _src;
    stride /= sizeof(pixel);

    for (i = 0; i < 4; i++) {
        dst[0] += src[0];
        dst[1] += src[1];
        dst[2] += src[2];
        dst[3] += src[3];

        dst += stride;
        src += 4;
    }
 }

 static void FUNCC(ff_h264_add_pixels8)(uint8_t *_dst, int16_t *_src, int stride)
 {
    int i;
    pixel *dst = (pixel *) _dst;
    dctcoef *src = (dctcoef *) _src;
    stride /= sizeof(pixel);

    for (i = 0; i < 8; i++) {
        dst[0] += src[0];
        dst[1] += src[1];
        dst[2] += src[2];
        dst[3] += src[3];
        dst[4] += src[4];
        dst[5] += src[5];
        dst[6] += src[6];
        dst[7] += src[7];

        dst += stride;
        src += 8;
    }
 }
--- a/libavcodec/h264dsp.c
+++ b/libavcodec/h264dsp.c
@@ -52,11 +52,29 @@
 #include "h264dsp_template.c"
 #undef BIT_DEPTH

 #define BIT_DEPTH 8
 #include "h264addpx_template.c"
 #undef BIT_DEPTH

 #define BIT_DEPTH 16
 #include "h264addpx_template.c"
 #undef BIT_DEPTH

 void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc)
 {
 #undef FUNC
 #define FUNC(a, depth) a ## _ ## depth ## _c

 #define ADDPX_DSP(depth) \
    c->h264_add_pixels4 = FUNC(ff_h264_add_pixels4, depth);\
    c->h264_add_pixels8 = FUNC(ff_h264_add_pixels8, depth)

    if (bit_depth > 8 && bit_depth <= 16) {
        ADDPX_DSP(16);
    } else {
        ADDPX_DSP(8);
    }

 #define H264_DSP(depth) \
    c->h264_idct_add= FUNC(ff_h264_idct_add, depth);\
    c->h264_idct8_add= FUNC(ff_h264_idct8_add, depth);\
--- a/libavcodec/h264dsp.h
+++ b/libavcodec/h264dsp.h
@@ -101,6 +101,10 @@ typedef struct H264DSPContext {
    void (*h264_luma_dc_dequant_idct)(int16_t *output,
                                      int16_t *input /*align 16*/, int qmul);
    void (*h264_chroma_dc_dequant_idct)(int16_t *block, int qmul);

    /* bypass-transform */
    void (*h264_add_pixels8)(uint8_t *dst, int16_t *block, int stride);
    void (*h264_add_pixels4)(uint8_t *dst, int16_t *block, int stride);
 } H264DSPContext;

 void ff_h264dsp_init(H264DSPContext *c, const int bit_depth,