Originally committed as revision 11188 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -407,9 +407,9 @@ typedef struct DSPContext { | |||||
/* vc1 functions */ | /* vc1 functions */ | ||||
void (*vc1_inv_trans_8x8)(DCTELEM *b); | void (*vc1_inv_trans_8x8)(DCTELEM *b); | ||||
void (*vc1_inv_trans_8x4)(DCTELEM *b, int n); | |||||
void (*vc1_inv_trans_4x8)(DCTELEM *b, int n); | |||||
void (*vc1_inv_trans_4x4)(DCTELEM *b, int n); | |||||
void (*vc1_inv_trans_8x4)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void (*vc1_inv_trans_4x8)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void (*vc1_inv_trans_4x4)(uint8_t *dest, int line_size, DCTELEM *block); | |||||
void (*vc1_v_overlap)(uint8_t* src, int stride); | void (*vc1_v_overlap)(uint8_t* src, int stride); | ||||
void (*vc1_h_overlap)(uint8_t* src, int stride); | void (*vc1_h_overlap)(uint8_t* src, int stride); | ||||
/* put 8x8 block with bicubic interpolation and quarterpel precision | /* put 8x8 block with bicubic interpolation and quarterpel precision | ||||
@@ -332,5 +332,5 @@ static void vc1_inv_trans_8x4_altivec(DCTELEM block[64], int n) | |||||
void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) { | void vc1dsp_init_altivec(DSPContext* dsp, AVCodecContext *avctx) { | ||||
dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; | dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_altivec; | ||||
dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; | |||||
//dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_altivec; | |||||
} | } |
@@ -2884,7 +2884,8 @@ static int vc1_decode_intra_block(VC1Context *v, DCTELEM block[64], int n, int c | |||||
/** Decode P block | /** Decode P block | ||||
*/ | */ | ||||
static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquant, int ttmb, int first_block) | |||||
static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquant, int ttmb, int first_block, | |||||
uint8_t *dst, int linesize, int skip_block) | |||||
{ | { | ||||
MpegEncContext *s = &v->s; | MpegEncContext *s = &v->s; | ||||
GetBitContext *gb = &s->gb; | GetBitContext *gb = &s->gb; | ||||
@@ -2930,7 +2931,10 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan | |||||
if(!v->pquantizer) | if(!v->pquantizer) | ||||
block[idx] += (block[idx] < 0) ? -mquant : mquant; | block[idx] += (block[idx] < 0) ? -mquant : mquant; | ||||
} | } | ||||
s->dsp.vc1_inv_trans_8x8(block); | |||||
if(!skip_block){ | |||||
s->dsp.vc1_inv_trans_8x8(block); | |||||
s->dsp.add_pixels_clamped(block, dst, linesize); | |||||
} | |||||
break; | break; | ||||
case TT_4X4: | case TT_4X4: | ||||
for(j = 0; j < 4; j++) { | for(j = 0; j < 4; j++) { | ||||
@@ -2947,8 +2951,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan | |||||
if(!v->pquantizer) | if(!v->pquantizer) | ||||
block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | ||||
} | } | ||||
if(!(subblkpat & (1 << (3 - j)))) | |||||
s->dsp.vc1_inv_trans_4x4(block, j); | |||||
if(!(subblkpat & (1 << (3 - j))) && !skip_block) | |||||
s->dsp.vc1_inv_trans_4x4(dst + (j&1)*4 + (j&2)*2*linesize, linesize, block + off); | |||||
} | } | ||||
break; | break; | ||||
case TT_8X4: | case TT_8X4: | ||||
@@ -2969,8 +2973,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan | |||||
if(!v->pquantizer) | if(!v->pquantizer) | ||||
block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | ||||
} | } | ||||
if(!(subblkpat & (1 << (1 - j)))) | |||||
s->dsp.vc1_inv_trans_8x4(block, j); | |||||
if(!(subblkpat & (1 << (1 - j))) && !skip_block) | |||||
s->dsp.vc1_inv_trans_8x4(dst + j*4*linesize, linesize, block + off); | |||||
} | } | ||||
break; | break; | ||||
case TT_4X8: | case TT_4X8: | ||||
@@ -2991,8 +2995,8 @@ static int vc1_decode_p_block(VC1Context *v, DCTELEM block[64], int n, int mquan | |||||
if(!v->pquantizer) | if(!v->pquantizer) | ||||
block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | block[idx + off] += (block[idx + off] < 0) ? -mquant : mquant; | ||||
} | } | ||||
if(!(subblkpat & (1 << (1 - j)))) | |||||
s->dsp.vc1_inv_trans_4x8(block, j); | |||||
if(!(subblkpat & (1 << (1 - j))) && !skip_block) | |||||
s->dsp.vc1_inv_trans_4x8(dst + j*4, linesize, block + off); | |||||
} | } | ||||
break; | break; | ||||
} | } | ||||
@@ -3101,11 +3105,9 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | ||||
} | } | ||||
} else if(val) { | } else if(val) { | ||||
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block); | |||||
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY)); | |||||
if(!v->ttmbf && ttmb < 8) ttmb = -1; | if(!v->ttmbf && ttmb < 8) ttmb = -1; | ||||
first_block = 0; | first_block = 0; | ||||
if((i<4) || !(s->flags & CODEC_FLAG_GRAY)) | |||||
s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -3203,11 +3205,9 @@ static int vc1_decode_p_mb(VC1Context *v) | |||||
s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | s->dsp.vc1_v_overlap(s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | ||||
} | } | ||||
} else if(is_coded[i]) { | } else if(is_coded[i]) { | ||||
status = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block); | |||||
status = vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY)); | |||||
if(!v->ttmbf && ttmb < 8) ttmb = -1; | if(!v->ttmbf && ttmb < 8) ttmb = -1; | ||||
first_block = 0; | first_block = 0; | ||||
if((i<4) || !(s->flags & CODEC_FLAG_GRAY)) | |||||
s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); | |||||
} | } | ||||
} | } | ||||
return status; | return status; | ||||
@@ -3377,11 +3377,9 @@ static void vc1_decode_b_mb(VC1Context *v) | |||||
if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; | if(v->rangeredfrm) for(j = 0; j < 64; j++) s->block[i][j] <<= 1; | ||||
s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | s->dsp.put_signed_pixels_clamped(s->block[i], s->dest[dst_idx] + off, s->linesize >> ((i & 4) >> 2)); | ||||
} else if(val) { | } else if(val) { | ||||
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block); | |||||
vc1_decode_p_block(v, s->block[i], i, mquant, ttmb, first_block, s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize, (i&4) && (s->flags & CODEC_FLAG_GRAY)); | |||||
if(!v->ttmbf && ttmb < 8) ttmb = -1; | if(!v->ttmbf && ttmb < 8) ttmb = -1; | ||||
first_block = 0; | first_block = 0; | ||||
if((i<4) || !(s->flags & CODEC_FLAG_GRAY)) | |||||
s->dsp.add_pixels_clamped(s->block[i], s->dest[dst_idx] + off, (i&4)?s->uvlinesize:s->linesize); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -152,16 +152,15 @@ static void vc1_inv_trans_8x8_c(DCTELEM block[64]) | |||||
/** Do inverse transform on 8x4 part of block | /** Do inverse transform on 8x4 part of block | ||||
*/ | */ | ||||
static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n) | |||||
static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
{ | { | ||||
int i; | int i; | ||||
register int t1,t2,t3,t4,t5,t6,t7,t8; | register int t1,t2,t3,t4,t5,t6,t7,t8; | ||||
DCTELEM *src, *dst; | DCTELEM *src, *dst; | ||||
int off; | |||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
off = n * 32; | |||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
dst = block; | |||||
for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
t1 = 12 * (src[0] + src[4]); | t1 = 12 * (src[0] + src[4]); | ||||
t2 = 12 * (src[0] - src[4]); | t2 = 12 * (src[0] - src[4]); | ||||
@@ -191,8 +190,7 @@ static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n) | |||||
dst += 8; | dst += 8; | ||||
} | } | ||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
t1 = 17 * (src[ 0] + src[16]); | t1 = 17 * (src[ 0] + src[16]); | ||||
t2 = 17 * (src[ 0] - src[16]); | t2 = 17 * (src[ 0] - src[16]); | ||||
@@ -201,28 +199,27 @@ static void vc1_inv_trans_8x4_c(DCTELEM block[64], int n) | |||||
t5 = 10 * src[ 8]; | t5 = 10 * src[ 8]; | ||||
t6 = 10 * src[24]; | t6 = 10 * src[24]; | ||||
dst[ 0] = (t1 + t3 + t6 + 64) >> 7; | |||||
dst[ 8] = (t2 - t4 + t5 + 64) >> 7; | |||||
dst[16] = (t2 + t4 - t5 + 64) >> 7; | |||||
dst[24] = (t1 - t3 - t6 + 64) >> 7; | |||||
dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; | |||||
dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; | |||||
dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; | |||||
dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; | |||||
src ++; | src ++; | ||||
dst ++; | |||||
dest++; | |||||
} | } | ||||
} | } | ||||
/** Do inverse transform on 4x8 parts of block | /** Do inverse transform on 4x8 parts of block | ||||
*/ | */ | ||||
static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n) | |||||
static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
{ | { | ||||
int i; | int i; | ||||
register int t1,t2,t3,t4,t5,t6,t7,t8; | register int t1,t2,t3,t4,t5,t6,t7,t8; | ||||
DCTELEM *src, *dst; | DCTELEM *src, *dst; | ||||
int off; | |||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
off = n * 4; | |||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
dst = block; | |||||
for(i = 0; i < 8; i++){ | for(i = 0; i < 8; i++){ | ||||
t1 = 17 * (src[0] + src[2]); | t1 = 17 * (src[0] + src[2]); | ||||
t2 = 17 * (src[0] - src[2]); | t2 = 17 * (src[0] - src[2]); | ||||
@@ -240,8 +237,7 @@ static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n) | |||||
dst += 8; | dst += 8; | ||||
} | } | ||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
t1 = 12 * (src[ 0] + src[32]); | t1 = 12 * (src[ 0] + src[32]); | ||||
t2 = 12 * (src[ 0] - src[32]); | t2 = 12 * (src[ 0] - src[32]); | ||||
@@ -258,32 +254,31 @@ static void vc1_inv_trans_4x8_c(DCTELEM block[64], int n) | |||||
t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | t3 = 9 * src[ 8] - 16 * src[24] + 4 * src[40] + 15 * src[56]; | ||||
t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | t4 = 4 * src[ 8] - 9 * src[24] + 15 * src[40] - 16 * src[56]; | ||||
dst[ 0] = (t5 + t1 + 64) >> 7; | |||||
dst[ 8] = (t6 + t2 + 64) >> 7; | |||||
dst[16] = (t7 + t3 + 64) >> 7; | |||||
dst[24] = (t8 + t4 + 64) >> 7; | |||||
dst[32] = (t8 - t4 + 64 + 1) >> 7; | |||||
dst[40] = (t7 - t3 + 64 + 1) >> 7; | |||||
dst[48] = (t6 - t2 + 64 + 1) >> 7; | |||||
dst[56] = (t5 - t1 + 64 + 1) >> 7; | |||||
dest[0*linesize] = cm[dest[0*linesize] + ((t5 + t1 + 64) >> 7)]; | |||||
dest[1*linesize] = cm[dest[1*linesize] + ((t6 + t2 + 64) >> 7)]; | |||||
dest[2*linesize] = cm[dest[2*linesize] + ((t7 + t3 + 64) >> 7)]; | |||||
dest[3*linesize] = cm[dest[3*linesize] + ((t8 + t4 + 64) >> 7)]; | |||||
dest[4*linesize] = cm[dest[4*linesize] + ((t8 - t4 + 64 + 1) >> 7)]; | |||||
dest[5*linesize] = cm[dest[5*linesize] + ((t7 - t3 + 64 + 1) >> 7)]; | |||||
dest[6*linesize] = cm[dest[6*linesize] + ((t6 - t2 + 64 + 1) >> 7)]; | |||||
dest[7*linesize] = cm[dest[7*linesize] + ((t5 - t1 + 64 + 1) >> 7)]; | |||||
src++; | |||||
dst++; | |||||
src ++; | |||||
dest++; | |||||
} | } | ||||
} | } | ||||
/** Do inverse transform on 4x4 part of block | /** Do inverse transform on 4x4 part of block | ||||
*/ | */ | ||||
static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) | |||||
static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, DCTELEM *block) | |||||
{ | { | ||||
int i; | int i; | ||||
register int t1,t2,t3,t4,t5,t6; | register int t1,t2,t3,t4,t5,t6; | ||||
DCTELEM *src, *dst; | DCTELEM *src, *dst; | ||||
int off; | |||||
const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; | |||||
off = (n&1) * 4 + (n&2) * 16; | |||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
dst = block; | |||||
for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
t1 = 17 * (src[0] + src[2]); | t1 = 17 * (src[0] + src[2]); | ||||
t2 = 17 * (src[0] - src[2]); | t2 = 17 * (src[0] - src[2]); | ||||
@@ -301,8 +296,7 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) | |||||
dst += 8; | dst += 8; | ||||
} | } | ||||
src = block + off; | |||||
dst = block + off; | |||||
src = block; | |||||
for(i = 0; i < 4; i++){ | for(i = 0; i < 4; i++){ | ||||
t1 = 17 * (src[ 0] + src[16]); | t1 = 17 * (src[ 0] + src[16]); | ||||
t2 = 17 * (src[ 0] - src[16]); | t2 = 17 * (src[ 0] - src[16]); | ||||
@@ -311,13 +305,13 @@ static void vc1_inv_trans_4x4_c(DCTELEM block[64], int n) | |||||
t5 = 10 * src[ 8]; | t5 = 10 * src[ 8]; | ||||
t6 = 10 * src[24]; | t6 = 10 * src[24]; | ||||
dst[ 0] = (t1 + t3 + t6 + 64) >> 7; | |||||
dst[ 8] = (t2 - t4 + t5 + 64) >> 7; | |||||
dst[16] = (t2 + t4 - t5 + 64) >> 7; | |||||
dst[24] = (t1 - t3 - t6 + 64) >> 7; | |||||
dest[0*linesize] = cm[dest[0*linesize] + ((t1 + t3 + t6 + 64) >> 7)]; | |||||
dest[1*linesize] = cm[dest[1*linesize] + ((t2 - t4 + t5 + 64) >> 7)]; | |||||
dest[2*linesize] = cm[dest[2*linesize] + ((t2 + t4 - t5 + 64) >> 7)]; | |||||
dest[3*linesize] = cm[dest[3*linesize] + ((t1 - t3 - t6 + 64) >> 7)]; | |||||
src ++; | src ++; | ||||
dst ++; | |||||
dest++; | |||||
} | } | ||||
} | } | ||||