allow h264 idct to be used for lowres=1 Originally committed as revision 3524 to svn://svn.ffmpeg.org/ffmpeg/trunktags/v0.5
@@ -1113,6 +1113,7 @@ typedef struct AVCodecContext { | |||||
#define FF_IDCT_ALTIVEC 8 | #define FF_IDCT_ALTIVEC 8 | ||||
#define FF_IDCT_SH4 9 | #define FF_IDCT_SH4 9 | ||||
#define FF_IDCT_SIMPLEARM 10 | #define FF_IDCT_SIMPLEARM 10 | ||||
#define FF_IDCT_H264 11 | |||||
/** | /** | ||||
* slice count. | * slice count. | ||||
@@ -3434,8 +3434,13 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
#endif //CONFIG_ENCODERS | #endif //CONFIG_ENCODERS | ||||
if(avctx->lowres==1){ | if(avctx->lowres==1){ | ||||
c->idct_put= ff_jref_idct4_put; | |||||
c->idct_add= ff_jref_idct4_add; | |||||
if(avctx->idct_algo==FF_IDCT_INT || avctx->idct_algo==FF_IDCT_AUTO){ | |||||
c->idct_put= ff_jref_idct4_put; | |||||
c->idct_add= ff_jref_idct4_add; | |||||
}else{ | |||||
c->idct_put= ff_h264_lowres_idct_put_c; | |||||
c->idct_add= ff_h264_lowres_idct_add_c; | |||||
} | |||||
c->idct = j_rev_dct4; | c->idct = j_rev_dct4; | ||||
c->idct_permutation_type= FF_NO_IDCT_PERM; | c->idct_permutation_type= FF_NO_IDCT_PERM; | ||||
}else if(avctx->lowres==2){ | }else if(avctx->lowres==2){ | ||||
@@ -3462,6 +3467,8 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
} | } | ||||
} | } | ||||
c->h264_idct_add= ff_h264_idct_add_c; | |||||
/* VP3 DSP support */ | /* VP3 DSP support */ | ||||
c->vp3_dsp_init = vp3_dsp_init_c; | c->vp3_dsp_init = vp3_dsp_init_c; | ||||
c->vp3_idct = vp3_idct_c; | c->vp3_idct = vp3_idct_c; | ||||
@@ -50,6 +50,10 @@ void ff_fdct_mmx(DCTELEM *block); | |||||
void ff_fdct_mmx2(DCTELEM *block); | void ff_fdct_mmx2(DCTELEM *block); | ||||
void ff_fdct_sse2(DCTELEM *block); | void ff_fdct_sse2(DCTELEM *block); | ||||
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride); | |||||
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block); | |||||
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block); | |||||
/* encoding scans */ | /* encoding scans */ | ||||
extern const uint8_t ff_alternate_horizontal_scan[64]; | extern const uint8_t ff_alternate_horizontal_scan[64]; | ||||
extern const uint8_t ff_alternate_vertical_scan[64]; | extern const uint8_t ff_alternate_vertical_scan[64]; | ||||
@@ -330,7 +334,8 @@ typedef struct DSPContext { | |||||
*/ | */ | ||||
void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, | void (*vp3_idct)(int16_t *input_data, int16_t *dequant_matrix, | ||||
int coeff_count, DCTELEM *output_samples); | int coeff_count, DCTELEM *output_samples); | ||||
void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride); | |||||
} DSPContext; | } DSPContext; | ||||
void dsputil_static_init(void); | void dsputil_static_init(void); | ||||
@@ -1323,40 +1323,6 @@ static inline int get_chroma_qp(H264Context *h, int qscale){ | |||||
} | } | ||||
/** | |||||
* | |||||
*/ | |||||
static void h264_add_idct_c(uint8_t *dst, DCTELEM *block, int stride){ | |||||
int i; | |||||
uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||||
block[0] += 32; | |||||
for(i=0; i<4; i++){ | |||||
const int z0= block[0 + 4*i] + block[2 + 4*i]; | |||||
const int z1= block[0 + 4*i] - block[2 + 4*i]; | |||||
const int z2= (block[1 + 4*i]>>1) - block[3 + 4*i]; | |||||
const int z3= block[1 + 4*i] + (block[3 + 4*i]>>1); | |||||
block[0 + 4*i]= z0 + z3; | |||||
block[1 + 4*i]= z1 + z2; | |||||
block[2 + 4*i]= z1 - z2; | |||||
block[3 + 4*i]= z0 - z3; | |||||
} | |||||
for(i=0; i<4; i++){ | |||||
const int z0= block[i + 4*0] + block[i + 4*2]; | |||||
const int z1= block[i + 4*0] - block[i + 4*2]; | |||||
const int z2= (block[i + 4*1]>>1) - block[i + 4*3]; | |||||
const int z3= block[i + 4*1] + (block[i + 4*3]>>1); | |||||
dst[i + 0*stride]= cm[ dst[i + 0*stride] + ((z0 + z3) >> 6) ]; | |||||
dst[i + 1*stride]= cm[ dst[i + 1*stride] + ((z1 + z2) >> 6) ]; | |||||
dst[i + 2*stride]= cm[ dst[i + 2*stride] + ((z1 - z2) >> 6) ]; | |||||
dst[i + 3*stride]= cm[ dst[i + 3*stride] + ((z0 - z3) >> 6) ]; | |||||
} | |||||
} | |||||
#if 0 | #if 0 | ||||
static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ | static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){ | ||||
int i; | int i; | ||||
@@ -2440,7 +2406,7 @@ static void hl_decode_mb(H264Context *h){ | |||||
h->pred4x4[ dir ](ptr, topright, linesize); | h->pred4x4[ dir ](ptr, topright, linesize); | ||||
if(h->non_zero_count_cache[ scan8[i] ]){ | if(h->non_zero_count_cache[ scan8[i] ]){ | ||||
if(s->codec_id == CODEC_ID_H264) | if(s->codec_id == CODEC_ID_H264) | ||||
h264_add_idct_c(ptr, h->mb + i*16, linesize); | |||||
s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); | |||||
else | else | ||||
svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); | svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, 0); | ||||
} | } | ||||
@@ -2467,7 +2433,7 @@ static void hl_decode_mb(H264Context *h){ | |||||
for(i=0; i<16; i++){ | for(i=0; i<16; i++){ | ||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below | ||||
uint8_t * const ptr= dest_y + h->block_offset[i]; | uint8_t * const ptr= dest_y + h->block_offset[i]; | ||||
h264_add_idct_c(ptr, h->mb + i*16, linesize); | |||||
s->dsp.h264_idct_add(ptr, h->mb + i*16, linesize); | |||||
} | } | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -2487,13 +2453,13 @@ static void hl_decode_mb(H264Context *h){ | |||||
for(i=16; i<16+4; i++){ | for(i=16; i<16+4; i++){ | ||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | ||||
uint8_t * const ptr= dest_cb + h->block_offset[i]; | uint8_t * const ptr= dest_cb + h->block_offset[i]; | ||||
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); | |||||
s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); | |||||
} | } | ||||
} | } | ||||
for(i=20; i<20+4; i++){ | for(i=20; i<20+4; i++){ | ||||
if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ | ||||
uint8_t * const ptr= dest_cr + h->block_offset[i]; | uint8_t * const ptr= dest_cr + h->block_offset[i]; | ||||
h264_add_idct_c(ptr, h->mb + i*16, uvlinesize); | |||||
s->dsp.h264_idct_add(ptr, h->mb + i*16, uvlinesize); | |||||
} | } | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -3232,7 +3198,7 @@ static inline int get_level_prefix(GetBitContext *gb){ | |||||
log= 32 - av_log2(buf); | log= 32 - av_log2(buf); | ||||
#ifdef TRACE | #ifdef TRACE | ||||
print_bin(buf>>(32-log), log); | print_bin(buf>>(32-log), log); | ||||
printf("%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); | |||||
av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d lpr @%5d in %s get_level_prefix\n", buf>>(32-log), log, log-1, get_bits_count(gb), __FILE__); | |||||
#endif | #endif | ||||
LAST_SKIP_BITS(re, gb, log); | LAST_SKIP_BITS(re, gb, log); | ||||
@@ -5975,7 +5941,7 @@ int main(){ | |||||
} | } | ||||
// printf("\n"); | // printf("\n"); | ||||
h264_add_idct_c(ref, block, 4); | |||||
s->dsp.h264_idct_add(ref, block, 4); | |||||
/* for(j=0; j<16; j++){ | /* for(j=0; j<16; j++){ | ||||
printf("%d ", ref[j]); | printf("%d ", ref[j]); | ||||
} | } | ||||
@@ -0,0 +1,70 @@ | |||||
/* | |||||
* H.264 IDCT | |||||
* Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at> | |||||
* | |||||
* This library is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2 of the License, or (at your option) any later version. | |||||
* | |||||
* This library is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with this library; if not, write to the Free Software | |||||
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |||||
* | |||||
*/ | |||||
/** | |||||
* @file h264-idct.c | |||||
* H.264 IDCT. | |||||
* @author Michael Niedermayer <michaelni@gmx.at> | |||||
*/ | |||||
#include "dsputil.h" | |||||
static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){ | |||||
int i; | |||||
uint8_t *cm = cropTbl + MAX_NEG_CROP; | |||||
block[0] += 1<<(shift-1); | |||||
for(i=0; i<4; i++){ | |||||
const int z0= block[0 + block_stride*i] + block[2 + block_stride*i]; | |||||
const int z1= block[0 + block_stride*i] - block[2 + block_stride*i]; | |||||
const int z2= (block[1 + block_stride*i]>>1) - block[3 + block_stride*i]; | |||||
const int z3= block[1 + block_stride*i] + (block[3 + block_stride*i]>>1); | |||||
block[0 + block_stride*i]= z0 + z3; | |||||
block[1 + block_stride*i]= z1 + z2; | |||||
block[2 + block_stride*i]= z1 - z2; | |||||
block[3 + block_stride*i]= z0 - z3; | |||||
} | |||||
for(i=0; i<4; i++){ | |||||
const int z0= block[i + block_stride*0] + block[i + block_stride*2]; | |||||
const int z1= block[i + block_stride*0] - block[i + block_stride*2]; | |||||
const int z2= (block[i + block_stride*1]>>1) - block[i + block_stride*3]; | |||||
const int z3= block[i + block_stride*1] + (block[i + block_stride*3]>>1); | |||||
dst[i + 0*stride]= cm[ add*dst[i + 0*stride] + ((z0 + z3) >> shift) ]; | |||||
dst[i + 1*stride]= cm[ add*dst[i + 1*stride] + ((z1 + z2) >> shift) ]; | |||||
dst[i + 2*stride]= cm[ add*dst[i + 2*stride] + ((z1 - z2) >> shift) ]; | |||||
dst[i + 3*stride]= cm[ add*dst[i + 3*stride] + ((z0 - z3) >> shift) ]; | |||||
} | |||||
} | |||||
void ff_h264_idct_add_c(uint8_t *dst, DCTELEM *block, int stride){ | |||||
idct_internal(dst, block, stride, 4, 6, 1); | |||||
} | |||||
void ff_h264_lowres_idct_add_c(uint8_t *dst, int stride, DCTELEM *block){ | |||||
idct_internal(dst, block, stride, 8, 3, 1); | |||||
} | |||||
void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block){ | |||||
idct_internal(dst, block, stride, 8, 3, 0); | |||||
} |