Browse Source

Add weighted motion compensation for RV40 B-frames

Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>
tags/n0.9
Kostya Shishkov Ronald S. Bultje 14 years ago
parent
commit
b86ab38137
5 changed files with 189 additions and 104 deletions
  1. +65
    -11
      libavcodec/rv34.c
  2. +5
    -0
      libavcodec/rv34.h
  3. +6
    -0
      libavcodec/rv34dsp.h
  4. +20
    -0
      libavcodec/rv40dsp.c
  5. +93
    -93
      tests/ref/fate/real-rv40

+ 65
- 11
libavcodec/rv34.c View File

@@ -717,7 +717,7 @@ static const int chroma_coeffs[3] = { 0, 3, 5 };
static inline void rv34_mc(RV34DecContext *r, const int block_type, static inline void rv34_mc(RV34DecContext *r, const int block_type,
const int xoff, const int yoff, int mv_off, const int xoff, const int yoff, int mv_off,
const int width, const int height, int dir, const int width, const int height, int dir,
const int thirdpel,
const int thirdpel, int weighted,
qpel_mc_func (*qpel_mc)[16], qpel_mc_func (*qpel_mc)[16],
h264_chroma_mc_func (*chroma_mc)) h264_chroma_mc_func (*chroma_mc))
{ {
@@ -781,9 +781,15 @@ static inline void rv34_mc(RV34DecContext *r, const int block_type,
srcU = uvbuf; srcU = uvbuf;
srcV = uvbuf + 16; srcV = uvbuf + 16;
} }
Y = s->dest[0] + xoff + yoff *s->linesize;
U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
if(!weighted){
Y = s->dest[0] + xoff + yoff *s->linesize;
U = s->dest[1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = s->dest[2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
}else{
Y = r->tmp_b_block_y [dir] + xoff + yoff *s->linesize;
U = r->tmp_b_block_uv[dir*2] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
V = r->tmp_b_block_uv[dir*2+1] + (xoff>>1) + (yoff>>1)*s->uvlinesize;
}


if(block_type == RV34_MB_P_16x8){ if(block_type == RV34_MB_P_16x8){
qpel_mc[1][dxy](Y, srcY, s->linesize); qpel_mc[1][dxy](Y, srcY, s->linesize);
@@ -804,33 +810,70 @@ static void rv34_mc_1mv(RV34DecContext *r, const int block_type,
const int xoff, const int yoff, int mv_off, const int xoff, const int yoff, int mv_off,
const int width, const int height, int dir) const int width, const int height, int dir)
{ {
rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30,
rv34_mc(r, block_type, xoff, yoff, mv_off, width, height, dir, r->rv30, 0,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
} }


static void rv4_weight(RV34DecContext *r)
{
r->rdsp.rv40_weight_pixels_tab[0](r->s.dest[0],
r->tmp_b_block_y[0],
r->tmp_b_block_y[1],
r->weight1,
r->weight2,
r->s.linesize);
r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[1],
r->tmp_b_block_uv[0],
r->tmp_b_block_uv[2],
r->weight1,
r->weight2,
r->s.uvlinesize);
r->rdsp.rv40_weight_pixels_tab[1](r->s.dest[2],
r->tmp_b_block_uv[1],
r->tmp_b_block_uv[3],
r->weight1,
r->weight2,
r->s.uvlinesize);
}

static void rv34_mc_2mv(RV34DecContext *r, const int block_type) static void rv34_mc_2mv(RV34DecContext *r, const int block_type)
{ {
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30,
int weighted = !r->rv30 && block_type != RV34_MB_B_BIDIR && r->weight1 != 8192;

rv34_mc(r, block_type, 0, 0, 0, 2, 2, 0, r->rv30, weighted,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30,
r->rdsp.avg_pixels_tab,
r->rdsp.avg_chroma_pixels_tab);
if(!weighted){
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 0,
r->rdsp.avg_pixels_tab,
r->rdsp.avg_chroma_pixels_tab);
}else{
rv34_mc(r, block_type, 0, 0, 0, 2, 2, 1, r->rv30, 1,
r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab);
rv4_weight(r);
}
} }


static void rv34_mc_2mv_skip(RV34DecContext *r) static void rv34_mc_2mv_skip(RV34DecContext *r)
{ {
int i, j; int i, j;
int weighted = !r->rv30 && r->weight1 != 8192;

for(j = 0; j < 2; j++) for(j = 0; j < 2; j++)
for(i = 0; i < 2; i++){ for(i = 0; i < 2; i++){
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30, rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 0, r->rv30,
weighted,
r->rdsp.put_pixels_tab, r->rdsp.put_pixels_tab,
r->rdsp.put_chroma_pixels_tab); r->rdsp.put_chroma_pixels_tab);
rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30, rv34_mc(r, RV34_MB_P_8x8, i*8, j*8, i+j*r->s.b8_stride, 1, 1, 1, r->rv30,
r->rdsp.avg_pixels_tab,
r->rdsp.avg_chroma_pixels_tab);
weighted,
weighted ? r->rdsp.put_pixels_tab : r->rdsp.avg_pixels_tab,
weighted ? r->rdsp.put_chroma_pixels_tab : r->rdsp.avg_chroma_pixels_tab);
} }
if(weighted)
rv4_weight(r);
} }


/** number of motion vectors in each macroblock type */ /** number of motion vectors in each macroblock type */
@@ -1265,6 +1308,16 @@ static int rv34_decode_slice(RV34DecContext *r, int end, const uint8_t* buf, int
if(MPV_frame_start(s, s->avctx) < 0) if(MPV_frame_start(s, s->avctx) < 0)
return -1; return -1;
ff_er_frame_start(s); ff_er_frame_start(s);
if (!r->tmp_b_block_base || s->width != r->si.width || s->height != r->si.height) {
int i;

r->tmp_b_block_base = av_realloc(r->tmp_b_block_base, s->linesize * 48);
for (i = 0; i < 2; i++)
r->tmp_b_block_y[i] = r->tmp_b_block_base + i * 16 * s->linesize;
for (i = 0; i < 4; i++)
r->tmp_b_block_uv[i] = r->tmp_b_block_base + 32 * s->linesize
+ (i >> 1) * 8 * s->uvlinesize + (i & 1) * 16;
}
r->cur_pts = r->si.pts; r->cur_pts = r->si.pts;
if(s->pict_type != AV_PICTURE_TYPE_B){ if(s->pict_type != AV_PICTURE_TYPE_B){
r->last_pts = r->next_pts; r->last_pts = r->next_pts;
@@ -1500,6 +1553,7 @@ av_cold int ff_rv34_decode_end(AVCodecContext *avctx)


av_freep(&r->intra_types_hist); av_freep(&r->intra_types_hist);
r->intra_types = NULL; r->intra_types = NULL;
av_freep(&r->tmp_b_block_base);
av_freep(&r->mb_type); av_freep(&r->mb_type);
av_freep(&r->cbp_luma); av_freep(&r->cbp_luma);
av_freep(&r->cbp_chroma); av_freep(&r->cbp_chroma);


+ 5
- 0
libavcodec/rv34.h View File

@@ -116,6 +116,11 @@ typedef struct RV34DecContext{
/** 8x8 block available flags (for MV prediction) */ /** 8x8 block available flags (for MV prediction) */
DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4]; DECLARE_ALIGNED(8, uint32_t, avail_cache)[3*4];


/** temporary blocks for RV4 weighted MC */
uint8_t *tmp_b_block_y[2];
uint8_t *tmp_b_block_uv[4];
uint8_t *tmp_b_block_base;

int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si); int (*parse_slice_header)(struct RV34DecContext *r, GetBitContext *gb, SliceInfo *si);
int (*decode_mb_info)(struct RV34DecContext *r); int (*decode_mb_info)(struct RV34DecContext *r);
int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst); int (*decode_intra_types)(struct RV34DecContext *r, GetBitContext *gb, int8_t *dst);


+ 6
- 0
libavcodec/rv34dsp.h View File

@@ -29,11 +29,17 @@


#include "dsputil.h" #include "dsputil.h"


typedef void (*rv40_weight_func)(uint8_t *dst/*align width (8 or 16)*/,
uint8_t *src1/*align width (8 or 16)*/,
uint8_t *src2/*align width (8 or 16)*/,
int w1, int w2, int stride);

typedef struct RV34DSPContext { typedef struct RV34DSPContext {
qpel_mc_func put_pixels_tab[4][16]; qpel_mc_func put_pixels_tab[4][16];
qpel_mc_func avg_pixels_tab[4][16]; qpel_mc_func avg_pixels_tab[4][16];
h264_chroma_mc_func put_chroma_pixels_tab[3]; h264_chroma_mc_func put_chroma_pixels_tab[3];
h264_chroma_mc_func avg_chroma_pixels_tab[3]; h264_chroma_mc_func avg_chroma_pixels_tab[3];
rv40_weight_func rv40_weight_pixels_tab[2];
} RV34DSPContext; } RV34DSPContext;


void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp); void ff_rv30dsp_init(RV34DSPContext *c, DSPContext* dsp);


+ 20
- 0
libavcodec/rv40dsp.c View File

@@ -285,6 +285,23 @@ static void OPNAME ## rv40_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*a
RV40_CHROMA_MC(put_, op_put) RV40_CHROMA_MC(put_, op_put)
RV40_CHROMA_MC(avg_, op_avg) RV40_CHROMA_MC(avg_, op_avg)


#define RV40_WEIGHT_FUNC(size) \
static void rv40_weight_func_ ## size (uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)\
{\
int i, j;\
\
for (j = 0; j < size; j++) {\
for (i = 0; i < size; i++)\
dst[i] = (((w2 * src1[i]) >> 9) + ((w1 * src2[i]) >> 9) + 0x10) >> 5;\
src1 += stride;\
src2 += stride;\
dst += stride;\
}\
}

RV40_WEIGHT_FUNC(16)
RV40_WEIGHT_FUNC(8)

av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) { av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0]; c->put_pixels_tab[0][ 0] = dsp->put_h264_qpel_pixels_tab[0][0];
c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c; c->put_pixels_tab[0][ 1] = put_rv40_qpel16_mc10_c;
@@ -356,6 +373,9 @@ av_cold void ff_rv40dsp_init(RV34DSPContext *c, DSPContext* dsp) {
c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c; c->avg_chroma_pixels_tab[0] = avg_rv40_chroma_mc8_c;
c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c; c->avg_chroma_pixels_tab[1] = avg_rv40_chroma_mc4_c;


c->rv40_weight_pixels_tab[0] = rv40_weight_func_16;
c->rv40_weight_pixels_tab[1] = rv40_weight_func_8;

if (HAVE_MMX) if (HAVE_MMX)
ff_rv40dsp_init_x86(c, dsp); ff_rv40dsp_init_x86(c, dsp);
} }

+ 93
- 93
tests/ref/fate/real-rv40 View File

@@ -16,106 +16,106 @@
0, 112500, 276480, 0x5f7a0d4f 0, 112500, 276480, 0x5f7a0d4f
0, 120000, 276480, 0x5f7a0d4f 0, 120000, 276480, 0x5f7a0d4f
0, 127500, 276480, 0x5f7a0d4f 0, 127500, 276480, 0x5f7a0d4f
0, 135000, 276480, 0x2d722f8a
0, 142500, 276480, 0xebbb3c8f
0, 150000, 276480, 0x8574c868
0, 135000, 276480, 0x75641594
0, 142500, 276480, 0x32ee3526
0, 150000, 276480, 0x5ce39368
0, 157500, 276480, 0x4ec1e418 0, 157500, 276480, 0x4ec1e418
0, 165000, 276480, 0x95f22651
0, 172500, 276480, 0x071d897e
0, 180000, 276480, 0x9f7623f9
0, 187500, 276480, 0x86d4dedf
0, 195000, 276480, 0xc0a0be22
0, 202500, 276480, 0xc5902aec
0, 210000, 276480, 0xe000f066
0, 217500, 276480, 0x0b2a48d5
0, 225000, 276480, 0xa1565256
0, 232500, 276480, 0x8de3ceb3
0, 240000, 276480, 0x654b564a
0, 165000, 276480, 0x85cbc3b5
0, 172500, 276480, 0x377c7b46
0, 180000, 276480, 0x756a4a2e
0, 187500, 276480, 0xcb379547
0, 195000, 276480, 0x99c085be
0, 202500, 276480, 0xe479ffed
0, 210000, 276480, 0x1e4fae19
0, 217500, 276480, 0x776412ef
0, 225000, 276480, 0x58ce0f38
0, 232500, 276480, 0x5ab69b27
0, 240000, 276480, 0xc3db9706
0, 247500, 276480, 0xc9c57884 0, 247500, 276480, 0xc9c57884
0, 255000, 276480, 0x89cdcdd4
0, 262500, 276480, 0x3594fe61
0, 270000, 276480, 0x9d082a81
0, 277500, 276480, 0x4e6cd0c3
0, 285000, 276480, 0xc129765f
0, 292500, 276480, 0x92a04c99
0, 300000, 276480, 0x5ca62953
0, 307500, 276480, 0xb7e478aa
0, 315000, 276480, 0x932735d5
0, 322500, 276480, 0xaaa2d7aa
0, 330000, 276480, 0xd1329996
0, 255000, 276480, 0x000b5269
0, 262500, 276480, 0x27ff7a5d
0, 270000, 276480, 0x70647530
0, 277500, 276480, 0x97612c4b
0, 285000, 276480, 0xdf4e04d7
0, 292500, 276480, 0xbd98f57c
0, 300000, 276480, 0x5163b29b
0, 307500, 276480, 0x99170e64
0, 315000, 276480, 0x8a4e991f
0, 322500, 276480, 0x6a45425f
0, 330000, 276480, 0x7bf6b1ef
0, 337500, 276480, 0x6de1e34b 0, 337500, 276480, 0x6de1e34b
0, 345000, 276480, 0x8c963c9b
0, 352500, 276480, 0xce6eff29
0, 360000, 276480, 0x25412f7e
0, 367500, 276480, 0x11a5ad85
0, 375000, 276480, 0x26ea3248
0, 382500, 276480, 0x86c35fa4
0, 390000, 276480, 0xa98a2d38
0, 397500, 276480, 0xed827333
0, 405000, 276480, 0x5d44a824
0, 412500, 276480, 0x46d54d04
0, 420000, 276480, 0x413fd26a
0, 345000, 276480, 0xdcaaa99a
0, 352500, 276480, 0xd1e98808
0, 360000, 276480, 0x6e2d524e
0, 367500, 276480, 0x22c50a3d
0, 375000, 276480, 0x62b76407
0, 382500, 276480, 0x51e9b3eb
0, 390000, 276480, 0x441f7afd
0, 397500, 276480, 0xfb01efc6
0, 405000, 276480, 0x294bb441
0, 412500, 276480, 0xe04ac45e
0, 420000, 276480, 0x58f275ea
0, 427500, 276480, 0xf0b3b71b 0, 427500, 276480, 0xf0b3b71b
0, 435000, 276480, 0x459bc06d
0, 442500, 276480, 0x4199cd45
0, 450000, 276480, 0xa8d35683
0, 457500, 276480, 0x9a3e7de0
0, 465000, 276480, 0x5a30f666
0, 472500, 276480, 0x40152668
0, 480000, 276480, 0x90c4d22c
0, 487500, 276480, 0x5cbaacc9
0, 495000, 276480, 0x72b658f1
0, 502500, 276480, 0x0ba3dcc9
0, 510000, 276480, 0x259ed5c1
0, 435000, 276480, 0x674e34e4
0, 442500, 276480, 0x41dda2d9
0, 450000, 276480, 0xf46ba7fb
0, 457500, 276480, 0x28b54815
0, 465000, 276480, 0xaf2b5d89
0, 472500, 276480, 0x8facba58
0, 480000, 276480, 0x28a63236
0, 487500, 276480, 0x1ad43fd7
0, 495000, 276480, 0x71507bd2
0, 502500, 276480, 0x35626022
0, 510000, 276480, 0x7c1139b3
0, 517500, 276480, 0x7fd73a99 0, 517500, 276480, 0x7fd73a99
0, 525000, 276480, 0x488980c5
0, 532500, 276480, 0x1d4c96a5
0, 540000, 276480, 0x41ced7f2
0, 547500, 276480, 0xd62d1837
0, 555000, 276480, 0xf5fd9d20
0, 562500, 276480, 0x2af91fda
0, 570000, 276480, 0x38ce229d
0, 577500, 276480, 0xf3a712c0
0, 585000, 276480, 0x57b111d2
0, 592500, 276480, 0x8556b792
0, 600000, 276480, 0xb32d0896
0, 525000, 276480, 0xb52e1aa2
0, 532500, 276480, 0xd6f82cae
0, 540000, 276480, 0xf88f75d4
0, 547500, 276480, 0x04a8e3ee
0, 555000, 276480, 0xa29f5b01
0, 562500, 276480, 0x754ceaf5
0, 570000, 276480, 0x5a38b4af
0, 577500, 276480, 0xfcebc261
0, 585000, 276480, 0x3d3ca985
0, 592500, 276480, 0x94a03c75
0, 600000, 276480, 0x2f98911c
0, 607500, 276480, 0x923b9937 0, 607500, 276480, 0x923b9937
0, 615000, 276480, 0x0da1e7e3
0, 622500, 276480, 0x7f172382
0, 630000, 276480, 0x93622b88
0, 637500, 276480, 0x2599d540
0, 645000, 276480, 0xed20c105
0, 652500, 276480, 0x62ce256e
0, 660000, 276480, 0x286a04bb
0, 667500, 276480, 0x423f7e7c
0, 675000, 276480, 0x21fc252a
0, 682500, 276480, 0xf8a8e8ee
0, 690000, 276480, 0x770d4a8d
0, 615000, 276480, 0xefab7ffd
0, 622500, 276480, 0x6b9fbc80
0, 630000, 276480, 0xe4bdbd1e
0, 637500, 276480, 0x225a56c0
0, 645000, 276480, 0xf58b1b7c
0, 652500, 276480, 0xbaffcdcc
0, 660000, 276480, 0xeb6eb88f
0, 667500, 276480, 0xdb753d35
0, 675000, 276480, 0xea80a82e
0, 682500, 276480, 0x2aae902a
0, 690000, 276480, 0x9b9ee961
0, 697500, 276480, 0xaa12b6fd 0, 697500, 276480, 0xaa12b6fd
0, 705000, 276480, 0xdc7221a8
0, 712500, 276480, 0x487eeb30
0, 720000, 276480, 0x1e74f2db
0, 727500, 276480, 0x40ae2bc3
0, 735000, 276480, 0x9ca9b930
0, 742500, 276480, 0x9fb19b0f
0, 750000, 276480, 0x7bdf836c
0, 757500, 276480, 0x1e607ba7
0, 765000, 276480, 0xbd96578b
0, 772500, 276480, 0x2124bf07
0, 780000, 276480, 0x4895e27a
0, 705000, 276480, 0x50c31e73
0, 712500, 276480, 0xdd9fb89f
0, 720000, 276480, 0xaf82399a
0, 727500, 276480, 0x7ce5f23c
0, 735000, 276480, 0x5aaa7519
0, 742500, 276480, 0xe45a5599
0, 750000, 276480, 0x704411fb
0, 757500, 276480, 0x9d7430a1
0, 765000, 276480, 0x2c230702
0, 772500, 276480, 0x4a4f76cd
0, 780000, 276480, 0x27f54854
0, 787500, 276480, 0x694d76e3 0, 787500, 276480, 0x694d76e3
0, 795000, 276480, 0xe70df513
0, 802500, 276480, 0xcacafe6b
0, 810000, 276480, 0x64087748
0, 817500, 276480, 0x571fda23
0, 825000, 276480, 0x8c86cbe9
0, 832500, 276480, 0xc8ea4671
0, 840000, 276480, 0xbfb74300
0, 847500, 276480, 0xbe1e3770
0, 855000, 276480, 0x757a0232
0, 862500, 276480, 0xa5f50c84
0, 870000, 276480, 0x6d95f808
0, 795000, 276480, 0x525463e2
0, 802500, 276480, 0x819898f9
0, 810000, 276480, 0xeeed00fc
0, 817500, 276480, 0xb6f99ee3
0, 825000, 276480, 0xefc83107
0, 832500, 276480, 0xbb22e024
0, 840000, 276480, 0x300f922a
0, 847500, 276480, 0x826fc3bd
0, 855000, 276480, 0x679a53f8
0, 862500, 276480, 0x976c9e93
0, 870000, 276480, 0xb194656e
0, 877500, 276480, 0xf002c5ca 0, 877500, 276480, 0xf002c5ca
0, 885000, 276480, 0x1a2abb26
0, 892500, 276480, 0x6cf69bf2
0, 885000, 276480, 0xb243dda5
0, 892500, 276480, 0x1700efbb
0, 900000, 276480, 0x8f316c66 0, 900000, 276480, 0x8f316c66

Loading…
Cancel
Save