Signed-off-by: Diego Biurrun <diego@biurrun.de> Signed-off-by: Ronald S. Bultje <rsbultje@gmail.com>tags/n0.9
@@ -54,6 +54,7 @@ easier to use. The changes are: | |||||
- boxblur filter | - boxblur filter | ||||
- Ut Video decoder | - Ut Video decoder | ||||
- Speex encoding via libspeex | - Speex encoding via libspeex | ||||
- 4:2:2 H.264 decoding support | |||||
version 0.7: | version 0.7: | ||||
@@ -92,7 +92,7 @@ void ff_h264_idct8_add4_neon(uint8_t *dst, const int *block_offset, | |||||
DCTELEM *block, int stride, | DCTELEM *block, int stride, | ||||
const uint8_t nnzc[6*8]); | const uint8_t nnzc[6*8]); | ||||
static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) | |||||
static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; | c->h264_v_loop_filter_luma = ff_h264_v_loop_filter_luma_neon; | ||||
@@ -122,14 +122,15 @@ static void ff_h264dsp_init_neon(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; | c->h264_idct_dc_add = ff_h264_idct_dc_add_neon; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_neon; | c->h264_idct_add16 = ff_h264_idct_add16_neon; | ||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; | c->h264_idct_add16intra = ff_h264_idct_add16intra_neon; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_neon; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_neon; | |||||
c->h264_idct8_add = ff_h264_idct8_add_neon; | c->h264_idct8_add = ff_h264_idct8_add_neon; | ||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_neon; | ||||
c->h264_idct8_add4 = ff_h264_idct8_add4_neon; | c->h264_idct8_add4 = ff_h264_idct8_add4_neon; | ||||
} | } | ||||
} | } | ||||
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth) | |||||
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth); | |||||
if (HAVE_NEON) ff_h264dsp_init_neon(c, bit_depth, chroma_format_idc); | |||||
} | } |
@@ -42,7 +42,7 @@ void ff_pred8x8_0lt_dc_neon(uint8_t *src, int stride); | |||||
void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); | void ff_pred8x8_l00_dc_neon(uint8_t *src, int stride); | ||||
void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); | void ff_pred8x8_0l0_dc_neon(uint8_t *src, int stride); | ||||
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth) | |||||
static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
const int high_depth = bit_depth > 8; | const int high_depth = bit_depth > 8; | ||||
@@ -74,7 +74,7 @@ static void ff_h264_pred_init_neon(H264PredContext *h, int codec_id, const int b | |||||
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; | h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_neon; | ||||
} | } | ||||
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth) | |||||
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth); | |||||
if (HAVE_NEON) ff_h264_pred_init_neon(h, codec_id, bit_depth, chroma_format_idc); | |||||
} | } |
@@ -63,8 +63,10 @@ void ff_h264_idct_dc_add_ ## depth ## _c(uint8_t *dst, DCTELEM *block, int strid | |||||
void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | void ff_h264_idct_add16_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | ||||
void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | void ff_h264_idct_add16intra_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | ||||
void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | void ff_h264_idct8_add4_ ## depth ## _c(uint8_t *dst, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | ||||
void ff_h264_idct_add8_422_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | |||||
void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | void ff_h264_idct_add8_ ## depth ## _c(uint8_t **dest, const int *blockoffset, DCTELEM *block, int stride, const uint8_t nnzc[6*8]);\ | ||||
void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ | void ff_h264_luma_dc_dequant_idct_ ## depth ## _c(DCTELEM *output, DCTELEM *input, int qmul);\ | ||||
void ff_h264_chroma422_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul);\ | |||||
void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); | void ff_h264_chroma_dc_dequant_idct_ ## depth ## _c(DCTELEM *block, int qmul); | ||||
H264_IDCT( 8) | H264_IDCT( 8) | ||||
@@ -942,7 +942,7 @@ static void clone_tables(H264Context *dst, H264Context *src, int i){ | |||||
dst->list_counts = src->list_counts; | dst->list_counts = src->list_counts; | ||||
dst->s.obmc_scratchpad = NULL; | dst->s.obmc_scratchpad = NULL; | ||||
ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma); | |||||
ff_h264_pred_init(&dst->hpc, src->s.codec_id, src->sps.bit_depth_luma, src->sps.chroma_format_idc); | |||||
} | } | ||||
/** | /** | ||||
@@ -970,8 +970,8 @@ static av_cold void common_init(H264Context *h){ | |||||
s->height = s->avctx->height; | s->height = s->avctx->height; | ||||
s->codec_id= s->avctx->codec->id; | s->codec_id= s->avctx->codec->id; | ||||
ff_h264dsp_init(&h->h264dsp, 8); | |||||
ff_h264_pred_init(&h->hpc, s->codec_id, 8); | |||||
ff_h264dsp_init(&h->h264dsp, 8, 1); | |||||
ff_h264_pred_init(&h->hpc, s->codec_id, 8, 1); | |||||
h->dequant_coeff_pps= -1; | h->dequant_coeff_pps= -1; | ||||
s->unrestricted_mv=1; | s->unrestricted_mv=1; | ||||
@@ -1432,11 +1432,16 @@ static void decode_postinit(H264Context *h, int setup_finished){ | |||||
ff_thread_finish_setup(s->avctx); | ff_thread_finish_setup(s->avctx); | ||||
} | } | ||||
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize, int chroma444, int simple){ | |||||
static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, | |||||
uint8_t *src_cb, uint8_t *src_cr, | |||||
int linesize, int uvlinesize, int simple) | |||||
{ | |||||
MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
uint8_t *top_border; | uint8_t *top_border; | ||||
int top_idx = 1; | int top_idx = 1; | ||||
const int pixel_shift = h->pixel_shift; | const int pixel_shift = h->pixel_shift; | ||||
int chroma444 = CHROMA444; | |||||
int chroma422 = CHROMA422; | |||||
src_y -= linesize; | src_y -= linesize; | ||||
src_cb -= uvlinesize; | src_cb -= uvlinesize; | ||||
@@ -1460,6 +1465,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui | |||||
AV_COPY128(top_border+16, src_cb + 15*uvlinesize); | AV_COPY128(top_border+16, src_cb + 15*uvlinesize); | ||||
AV_COPY128(top_border+32, src_cr + 15*uvlinesize); | AV_COPY128(top_border+32, src_cr + 15*uvlinesize); | ||||
} | } | ||||
} else if(chroma422) { | |||||
if (pixel_shift) { | |||||
AV_COPY128(top_border+32, src_cb + 15*uvlinesize); | |||||
AV_COPY128(top_border+48, src_cr + 15*uvlinesize); | |||||
} else { | |||||
AV_COPY64(top_border+16, src_cb + 15*uvlinesize); | |||||
AV_COPY64(top_border+24, src_cr + 15*uvlinesize); | |||||
} | |||||
} else { | } else { | ||||
if (pixel_shift) { | if (pixel_shift) { | ||||
AV_COPY128(top_border+32, src_cb+7*uvlinesize); | AV_COPY128(top_border+32, src_cb+7*uvlinesize); | ||||
@@ -1495,6 +1508,14 @@ static av_always_inline void backup_mb_border(H264Context *h, uint8_t *src_y, ui | |||||
AV_COPY128(top_border+16, src_cb + 16*linesize); | AV_COPY128(top_border+16, src_cb + 16*linesize); | ||||
AV_COPY128(top_border+32, src_cr + 16*linesize); | AV_COPY128(top_border+32, src_cr + 16*linesize); | ||||
} | } | ||||
} else if(chroma422) { | |||||
if (pixel_shift) { | |||||
AV_COPY128(top_border+32, src_cb+16*uvlinesize); | |||||
AV_COPY128(top_border+48, src_cr+16*uvlinesize); | |||||
} else { | |||||
AV_COPY64(top_border+16, src_cb+16*uvlinesize); | |||||
AV_COPY64(top_border+24, src_cr+16*uvlinesize); | |||||
} | |||||
} else { | } else { | ||||
if (pixel_shift) { | if (pixel_shift) { | ||||
AV_COPY128(top_border+32, src_cb+8*uvlinesize); | AV_COPY128(top_border+32, src_cb+8*uvlinesize); | ||||
@@ -1773,10 +1794,11 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
/* is_h264 should always be true if SVQ3 is disabled. */ | /* is_h264 should always be true if SVQ3 is disabled. */ | ||||
const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; | const int is_h264 = !CONFIG_SVQ3_DECODER || simple || s->codec_id == CODEC_ID_H264; | ||||
void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); | void (*idct_add)(uint8_t *dst, DCTELEM *block, int stride); | ||||
const int block_h = 16 >> s->chroma_y_shift; | |||||
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | ||||
dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; | |||||
dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * 8; | |||||
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; | |||||
dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift)*8 + mb_y * s->uvlinesize * block_h; | |||||
s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); | s->dsp.prefetch(dest_y + (s->mb_x&3)*4*s->linesize + (64 << pixel_shift), s->linesize, 4); | ||||
s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); | s->dsp.prefetch(dest_cb + (s->mb_x&7)*s->uvlinesize + (64 << pixel_shift), dest_cr - dest_cb, 2); | ||||
@@ -1789,8 +1811,8 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
block_offset = &h->block_offset[48]; | block_offset = &h->block_offset[48]; | ||||
if(mb_y&1){ //FIXME move out of this function? | if(mb_y&1){ //FIXME move out of this function? | ||||
dest_y -= s->linesize*15; | dest_y -= s->linesize*15; | ||||
dest_cb-= s->uvlinesize*7; | |||||
dest_cr-= s->uvlinesize*7; | |||||
dest_cb-= s->uvlinesize * (block_h - 1); | |||||
dest_cr-= s->uvlinesize * (block_h - 1); | |||||
} | } | ||||
if(FRAME_MBAFF) { | if(FRAME_MBAFF) { | ||||
int list; | int list; | ||||
@@ -1842,12 +1864,12 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
for (i = 0; i < 8; i++) { | |||||
for (i = 0; i < block_h; i++) { | |||||
uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); | uint16_t *tmp_cb = (uint16_t*)(dest_cb + i*uvlinesize); | ||||
for (j = 0; j < 8; j++) | for (j = 0; j < 8; j++) | ||||
tmp_cb[j] = get_bits(&gb, bit_depth); | tmp_cb[j] = get_bits(&gb, bit_depth); | ||||
} | } | ||||
for (i = 0; i < 8; i++) { | |||||
for (i = 0; i < block_h; i++) { | |||||
uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); | uint16_t *tmp_cr = (uint16_t*)(dest_cr + i*uvlinesize); | ||||
for (j = 0; j < 8; j++) | for (j = 0; j < 8; j++) | ||||
tmp_cr[j] = get_bits(&gb, bit_depth); | tmp_cr[j] = get_bits(&gb, bit_depth); | ||||
@@ -1865,7 +1887,7 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
memset(dest_cr + i*uvlinesize, 128, 8); | memset(dest_cr + i*uvlinesize, 128, 8); | ||||
} | } | ||||
} else { | } else { | ||||
for (i = 0; i < 8; i++) { | |||||
for (i = 0; i < block_h; i++) { | |||||
memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); | memcpy(dest_cb + i*uvlinesize, h->mb + 128 + i*4, 8); | ||||
memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); | memcpy(dest_cr + i*uvlinesize, h->mb + 160 + i*4, 8); | ||||
} | } | ||||
@@ -1913,10 +1935,18 @@ static av_always_inline void hl_decode_mb_internal(H264Context *h, int simple, i | |||||
} | } | ||||
}else{ | }else{ | ||||
if(is_h264){ | if(is_h264){ | ||||
int qp[2]; | |||||
if (CHROMA422) { | |||||
qp[0] = h->chroma_qp[0] + 3; | |||||
qp[1] = h->chroma_qp[1] + 3; | |||||
} else { | |||||
qp[0] = h->chroma_qp[0]; | |||||
qp[1] = h->chroma_qp[1]; | |||||
} | |||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) | if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+0] ]) | ||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp[0]][0]); | |||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*1 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][qp[0]][0]); | |||||
if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) | if(h->non_zero_count_cache[ scan8[CHROMA_DC_BLOCK_INDEX+1] ]) | ||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp[1]][0]); | |||||
h->h264dsp.h264_chroma_dc_dequant_idct(h->mb + (16*16*2 << pixel_shift), h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][qp[1]][0]); | |||||
h->h264dsp.h264_idct_add8(dest, block_offset, | h->h264dsp.h264_idct_add8(dest, block_offset, | ||||
h->mb, uvlinesize, | h->mb, uvlinesize, | ||||
h->non_zero_count_cache); | h->non_zero_count_cache); | ||||
@@ -2555,11 +2585,13 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ | |||||
h->b_stride= s->mb_width*4; | h->b_stride= s->mb_width*4; | ||||
s->chroma_y_shift = h->sps.chroma_format_idc <= 1; // 400 uses yuv420p | |||||
s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); | s->width = 16*s->mb_width - (2>>CHROMA444)*FFMIN(h->sps.crop_right, (8<<CHROMA444)-1); | ||||
if(h->sps.frame_mbs_only_flag) | if(h->sps.frame_mbs_only_flag) | ||||
s->height= 16*s->mb_height - (2>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); | |||||
s->height= 16*s->mb_height - (1<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); | |||||
else | else | ||||
s->height= 16*s->mb_height - (4>>CHROMA444)*FFMIN(h->sps.crop_bottom, (8<<CHROMA444)-1); | |||||
s->height= 16*s->mb_height - (2<<s->chroma_y_shift)*FFMIN(h->sps.crop_bottom, (16>>s->chroma_y_shift)-1); | |||||
if (s->context_initialized | if (s->context_initialized | ||||
&& ( s->width != s->avctx->width || s->height != s->avctx->height | && ( s->width != s->avctx->width || s->height != s->avctx->height | ||||
@@ -2601,14 +2633,26 @@ static int decode_slice_header(H264Context *h, H264Context *h0){ | |||||
switch (h->sps.bit_depth_luma) { | switch (h->sps.bit_depth_luma) { | ||||
case 9 : | case 9 : | ||||
s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P9 : PIX_FMT_YUV420P9; | |||||
if (CHROMA444) | |||||
s->avctx->pix_fmt = PIX_FMT_YUV444P9; | |||||
else if (CHROMA422) | |||||
s->avctx->pix_fmt = PIX_FMT_YUV422P9; | |||||
else | |||||
s->avctx->pix_fmt = PIX_FMT_YUV420P9; | |||||
break; | break; | ||||
case 10 : | case 10 : | ||||
s->avctx->pix_fmt = CHROMA444 ? PIX_FMT_YUV444P10 : PIX_FMT_YUV420P10; | |||||
if (CHROMA444) | |||||
s->avctx->pix_fmt = PIX_FMT_YUV444P10; | |||||
else if (CHROMA422) | |||||
s->avctx->pix_fmt = PIX_FMT_YUV422P10; | |||||
else | |||||
s->avctx->pix_fmt = PIX_FMT_YUV420P10; | |||||
break; | break; | ||||
default: | default: | ||||
if (CHROMA444){ | if (CHROMA444){ | ||||
s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; | s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ444P : PIX_FMT_YUV444P; | ||||
} else if (CHROMA422) { | |||||
s->avctx->pix_fmt = s->avctx->color_range == AVCOL_RANGE_JPEG ? PIX_FMT_YUVJ422P : PIX_FMT_YUV422P; | |||||
}else{ | }else{ | ||||
s->avctx->pix_fmt = s->avctx->get_format(s->avctx, | s->avctx->pix_fmt = s->avctx->get_format(s->avctx, | ||||
s->avctx->codec->pix_fmts ? | s->avctx->codec->pix_fmts ? | ||||
@@ -3272,6 +3316,7 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
const int end_mb_y= s->mb_y + FRAME_MBAFF; | const int end_mb_y= s->mb_y + FRAME_MBAFF; | ||||
const int old_slice_type= h->slice_type; | const int old_slice_type= h->slice_type; | ||||
const int pixel_shift = h->pixel_shift; | const int pixel_shift = h->pixel_shift; | ||||
const int block_h = 16 >> s->chroma_y_shift; | |||||
if(h->deblocking_filter) { | if(h->deblocking_filter) { | ||||
for(mb_x= start_x; mb_x<end_x; mb_x++){ | for(mb_x= start_x; mb_x<end_x; mb_x++){ | ||||
@@ -3288,8 +3333,8 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
s->mb_x= mb_x; | s->mb_x= mb_x; | ||||
s->mb_y= mb_y; | s->mb_y= mb_y; | ||||
dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | dest_y = s->current_picture.f.data[0] + ((mb_x << pixel_shift) + mb_y * s->linesize ) * 16; | ||||
dest_cb = s->current_picture.f.data[1] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); | |||||
dest_cr = s->current_picture.f.data[2] + ((mb_x << pixel_shift) + mb_y * s->uvlinesize) * (8 << CHROMA444); | |||||
dest_cb = s->current_picture.f.data[1] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
dest_cr = s->current_picture.f.data[2] + (mb_x << pixel_shift) * (8 << CHROMA444) + mb_y * s->uvlinesize * block_h; | |||||
//FIXME simplify above | //FIXME simplify above | ||||
if (MB_FIELD) { | if (MB_FIELD) { | ||||
@@ -3297,14 +3342,14 @@ static void loop_filter(H264Context *h, int start_x, int end_x){ | |||||
uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | uvlinesize = h->mb_uvlinesize = s->uvlinesize * 2; | ||||
if(mb_y&1){ //FIXME move out of this function? | if(mb_y&1){ //FIXME move out of this function? | ||||
dest_y -= s->linesize*15; | dest_y -= s->linesize*15; | ||||
dest_cb-= s->uvlinesize*((8 << CHROMA444)-1); | |||||
dest_cr-= s->uvlinesize*((8 << CHROMA444)-1); | |||||
dest_cb-= s->uvlinesize * (block_h - 1); | |||||
dest_cr-= s->uvlinesize * (block_h - 1); | |||||
} | } | ||||
} else { | } else { | ||||
linesize = h->mb_linesize = s->linesize; | linesize = h->mb_linesize = s->linesize; | ||||
uvlinesize = h->mb_uvlinesize = s->uvlinesize; | uvlinesize = h->mb_uvlinesize = s->uvlinesize; | ||||
} | } | ||||
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, CHROMA444, 0); | |||||
backup_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); | |||||
if(fill_filter_caches(h, mb_type)) | if(fill_filter_caches(h, mb_type)) | ||||
continue; | continue; | ||||
h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]); | h->chroma_qp[0] = get_chroma_qp(h, 0, s->current_picture.f.qscale_table[mb_xy]); | ||||
@@ -3742,13 +3787,15 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ | |||||
if(avctx->has_b_frames < 2) | if(avctx->has_b_frames < 2) | ||||
avctx->has_b_frames= !s->low_delay; | avctx->has_b_frames= !s->low_delay; | ||||
if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma) { | |||||
if (avctx->bits_per_raw_sample != h->sps.bit_depth_luma || | |||||
h->cur_chroma_format_idc != h->sps.chroma_format_idc) { | |||||
if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { | if (h->sps.bit_depth_luma >= 8 && h->sps.bit_depth_luma <= 10) { | ||||
avctx->bits_per_raw_sample = h->sps.bit_depth_luma; | avctx->bits_per_raw_sample = h->sps.bit_depth_luma; | ||||
h->cur_chroma_format_idc = h->sps.chroma_format_idc; | |||||
h->pixel_shift = h->sps.bit_depth_luma > 8; | h->pixel_shift = h->sps.bit_depth_luma > 8; | ||||
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); | |||||
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); | |||||
ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma, h->sps.chroma_format_idc); | |||||
ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma, h->sps.chroma_format_idc); | |||||
s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; | s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; | ||||
dsputil_init(&s->dsp, s->avctx); | dsputil_init(&s->dsp, s->avctx); | ||||
} else { | } else { | ||||
@@ -39,13 +39,6 @@ | |||||
#define interlaced_dct interlaced_dct_is_a_bad_name | #define interlaced_dct interlaced_dct_is_a_bad_name | ||||
#define mb_intra mb_intra_is_not_initialized_see_mb_type | #define mb_intra mb_intra_is_not_initialized_see_mb_type | ||||
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 | |||||
#define COEFF_TOKEN_VLC_BITS 8 | |||||
#define TOTAL_ZEROS_VLC_BITS 9 | |||||
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 | |||||
#define RUN_VLC_BITS 3 | |||||
#define RUN7_VLC_BITS 6 | |||||
#define MAX_SPS_COUNT 32 | #define MAX_SPS_COUNT 32 | ||||
#define MAX_PPS_COUNT 256 | #define MAX_PPS_COUNT 256 | ||||
@@ -92,6 +85,7 @@ | |||||
#define CABAC h->pps.cabac | #define CABAC h->pps.cabac | ||||
#endif | #endif | ||||
#define CHROMA422 (h->sps.chroma_format_idc == 2) | |||||
#define CHROMA444 (h->sps.chroma_format_idc == 3) | #define CHROMA444 (h->sps.chroma_format_idc == 3) | ||||
#define EXTENDED_SAR 255 | #define EXTENDED_SAR 255 | ||||
@@ -582,6 +576,8 @@ typedef struct H264Context{ | |||||
// Timestamp stuff | // Timestamp stuff | ||||
int sei_buffering_period_present; ///< Buffering period SEI flag | int sei_buffering_period_present; ///< Buffering period SEI flag | ||||
int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs | int initial_cpb_removal_delay[32]; ///< Initial timestamps for CPBs | ||||
int cur_chroma_format_idc; | |||||
}H264Context; | }H264Context; | ||||
@@ -809,7 +805,7 @@ static av_always_inline void write_back_non_zero_count(H264Context *h){ | |||||
AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); | AV_COPY32(&nnz[32], &nnz_cache[4+8*11]); | ||||
AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); | AV_COPY32(&nnz[36], &nnz_cache[4+8*12]); | ||||
if(CHROMA444){ | |||||
if(!h->s.chroma_y_shift){ | |||||
AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); | AV_COPY32(&nnz[24], &nnz_cache[4+8* 8]); | ||||
AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); | AV_COPY32(&nnz[28], &nnz_cache[4+8* 9]); | ||||
AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); | AV_COPY32(&nnz[40], &nnz_cache[4+8*13]); | ||||
@@ -1565,7 +1565,12 @@ DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = { | |||||
5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 | ||||
}; | }; | ||||
static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) { | |||||
static av_always_inline void | |||||
decode_cabac_residual_internal(H264Context *h, DCTELEM *block, | |||||
int cat, int n, const uint8_t *scantable, | |||||
const uint32_t *qmul, int max_coeff, | |||||
int is_dc, int chroma422) | |||||
{ | |||||
static const int significant_coeff_flag_offset[2][14] = { | static const int significant_coeff_flag_offset[2][14] = { | ||||
{ 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, | { 105+0, 105+15, 105+29, 105+44, 105+47, 402, 484+0, 484+15, 484+29, 660, 528+0, 528+15, 528+29, 718 }, | ||||
{ 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } | { 277+0, 277+15, 277+29, 277+44, 277+47, 436, 776+0, 776+15, 776+29, 675, 820+0, 820+15, 820+29, 733 } | ||||
@@ -1587,12 +1592,16 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, | 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, | ||||
9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } | 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } | ||||
}; | }; | ||||
static const uint8_t sig_coeff_offset_dc[7] = { 0, 0, 1, 1, 2, 2, 2 }; | |||||
/* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). | /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0). | ||||
* 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). | * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter). | ||||
* map node ctx => cabac ctx for level=1 */ | * map node ctx => cabac ctx for level=1 */ | ||||
static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; | static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; | ||||
/* map node ctx => cabac ctx for level>1 */ | /* map node ctx => cabac ctx for level>1 */ | ||||
static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 }; | |||||
static const uint8_t coeff_abs_levelgt1_ctx[2][8] = { | |||||
{ 5, 5, 5, 5, 6, 7, 8, 9 }, | |||||
{ 5, 5, 5, 5, 6, 7, 8, 8 }, // 422/dc case | |||||
}; | |||||
static const uint8_t coeff_abs_level_transition[2][8] = { | static const uint8_t coeff_abs_level_transition[2][8] = { | ||||
/* update node ctx after decoding a level=1 */ | /* update node ctx after decoding a level=1 */ | ||||
{ 1, 2, 3, 3, 4, 5, 6, 7 }, | { 1, 2, 3, 3, 4, 5, 6, 7 }, | ||||
@@ -1651,12 +1660,20 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, | ||||
last_coeff_ctx_base, sig_off); | last_coeff_ctx_base, sig_off); | ||||
} else { | } else { | ||||
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | |||||
last_coeff_ctx_base-significant_coeff_ctx_base); | |||||
if (is_dc && chroma422) { // dc 422 | |||||
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | |||||
} else { | |||||
coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index, | |||||
last_coeff_ctx_base-significant_coeff_ctx_base); | |||||
} | |||||
#else | #else | ||||
DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); | ||||
} else { | } else { | ||||
DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); | |||||
if (is_dc && chroma422) { // dc 422 | |||||
DECODE_SIGNIFICANCE(7, sig_coeff_offset_dc[last], sig_coeff_offset_dc[last]); | |||||
} else { | |||||
DECODE_SIGNIFICANCE(max_coeff - 1, last, last); | |||||
} | |||||
#endif | #endif | ||||
} | } | ||||
assert(coeff_count > 0); | assert(coeff_count > 0); | ||||
@@ -1691,7 +1708,7 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
} \ | } \ | ||||
} else { \ | } else { \ | ||||
int coeff_abs = 2; \ | int coeff_abs = 2; \ | ||||
ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base; \ | |||||
ctx = coeff_abs_levelgt1_ctx[is_dc && chroma422][node_ctx] + abs_level_m1_ctx_base; \ | |||||
node_ctx = coeff_abs_level_transition[1][node_ctx]; \ | node_ctx = coeff_abs_level_transition[1][node_ctx]; \ | ||||
\ | \ | ||||
while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ | while( coeff_abs < 15 && get_cabac( CC, ctx ) ) { \ | ||||
@@ -1733,11 +1750,18 @@ static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCT | |||||
} | } | ||||
static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { | static void decode_cabac_residual_dc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) { | ||||
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1); | |||||
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 0); | |||||
} | |||||
static void decode_cabac_residual_dc_internal_422(H264Context *h, DCTELEM *block, | |||||
int cat, int n, const uint8_t *scantable, | |||||
int max_coeff) | |||||
{ | |||||
decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1, 1); | |||||
} | } | ||||
static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | static void decode_cabac_residual_nondc_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | ||||
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0); | |||||
decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0, 0); | |||||
} | } | ||||
/* cat: 0-> DC 16x16 n = 0 | /* cat: 0-> DC 16x16 n = 0 | ||||
@@ -1761,6 +1785,19 @@ static av_always_inline void decode_cabac_residual_dc( H264Context *h, DCTELEM * | |||||
decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); | decode_cabac_residual_dc_internal( h, block, cat, n, scantable, max_coeff ); | ||||
} | } | ||||
static av_always_inline void | |||||
decode_cabac_residual_dc_422(H264Context *h, DCTELEM *block, | |||||
int cat, int n, const uint8_t *scantable, | |||||
int max_coeff) | |||||
{ | |||||
/* read coded block flag */ | |||||
if (get_cabac(&h->cabac, &h->cabac_state[get_cabac_cbf_ctx(h, cat, n, max_coeff, 1)]) == 0) { | |||||
h->non_zero_count_cache[scan8[n]] = 0; | |||||
return; | |||||
} | |||||
decode_cabac_residual_dc_internal_422(h, block, cat, n, scantable, max_coeff); | |||||
} | |||||
static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | static av_always_inline void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) { | ||||
/* read coded block flag */ | /* read coded block flag */ | ||||
if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { | if( (cat != 5 || CHROMA444) && get_cabac( &h->cabac, &h->cabac_state[get_cabac_cbf_ctx( h, cat, n, max_coeff, 0 ) ] ) == 0 ) { | ||||
@@ -2313,7 +2350,36 @@ decode_intra_mb: | |||||
if(CHROMA444){ | if(CHROMA444){ | ||||
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); | decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 1); | ||||
decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); | decode_cabac_luma_residual(h, scan, scan8x8, pixel_shift, mb_type, cbp, 2); | ||||
} else { | |||||
} else if (CHROMA422) { | |||||
if( cbp&0x30 ){ | |||||
int c; | |||||
for( c = 0; c < 2; c++ ) { | |||||
//av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c ); | |||||
decode_cabac_residual_dc_422(h, h->mb + ((256 + 16*16*c) << pixel_shift), 3, | |||||
CHROMA_DC_BLOCK_INDEX + c, | |||||
chroma422_dc_scan, 8); | |||||
} | |||||
} | |||||
if( cbp&0x20 ) { | |||||
int c, i, i8x8; | |||||
for( c = 0; c < 2; c++ ) { | |||||
DCTELEM *mb = h->mb + (16*(16 + 16*c) << pixel_shift); | |||||
qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; | |||||
for (i8x8 = 0; i8x8 < 2; i8x8++) { | |||||
for (i = 0; i < 4; i++) { | |||||
const int index = 16 + 16 * c + 8*i8x8 + i; | |||||
//av_log(s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16); | |||||
decode_cabac_residual_nondc(h, mb, 4, index, scan + 1, qmul, 15); | |||||
mb += 16<<pixel_shift; | |||||
} | |||||
} | |||||
} | |||||
} else { | |||||
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); | |||||
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); | |||||
} | |||||
} else /* yuv420 */ { | |||||
if( cbp&0x30 ){ | if( cbp&0x30 ){ | ||||
int c; | int c; | ||||
for( c = 0; c < 2; c++ ) { | for( c = 0; c < 2; c++ ) { | ||||
@@ -62,6 +62,30 @@ static const uint8_t chroma_dc_coeff_token_bits[4*5]={ | |||||
2, 3, 2, 0, | 2, 3, 2, 0, | ||||
}; | }; | ||||
static const uint8_t chroma422_dc_coeff_token_len[4*9]={ | |||||
1, 0, 0, 0, | |||||
7, 2, 0, 0, | |||||
7, 7, 3, 0, | |||||
9, 7, 7, 5, | |||||
9, 9, 7, 6, | |||||
10, 10, 9, 7, | |||||
11, 11, 10, 7, | |||||
12, 12, 11, 10, | |||||
13, 12, 12, 11, | |||||
}; | |||||
static const uint8_t chroma422_dc_coeff_token_bits[4*9]={ | |||||
1, 0, 0, 0, | |||||
15, 1, 0, 0, | |||||
14, 13, 1, 0, | |||||
7, 12, 11, 1, | |||||
6, 5, 10, 1, | |||||
7, 6, 4, 9, | |||||
7, 6, 5, 8, | |||||
7, 6, 5, 4, | |||||
7, 5, 4, 4, | |||||
}; | |||||
static const uint8_t coeff_token_len[4][4*17]={ | static const uint8_t coeff_token_len[4][4*17]={ | ||||
{ | { | ||||
1, 0, 0, 0, | 1, 0, 0, 0, | ||||
@@ -172,6 +196,26 @@ static const uint8_t chroma_dc_total_zeros_bits[3][4]= { | |||||
{ 1, 0, 0, 0,}, | { 1, 0, 0, 0,}, | ||||
}; | }; | ||||
static const uint8_t chroma422_dc_total_zeros_len[7][8]= { | |||||
{ 1, 3, 3, 4, 4, 4, 5, 5 }, | |||||
{ 3, 2, 3, 3, 3, 3, 3 }, | |||||
{ 3, 3, 2, 2, 3, 3 }, | |||||
{ 3, 2, 2, 2, 3 }, | |||||
{ 2, 2, 2, 2 }, | |||||
{ 2, 2, 1 }, | |||||
{ 1, 1 }, | |||||
}; | |||||
static const uint8_t chroma422_dc_total_zeros_bits[7][8]= { | |||||
{ 1, 2, 3, 2, 3, 1, 1, 0 }, | |||||
{ 0, 1, 1, 4, 5, 6, 7 }, | |||||
{ 0, 1, 1, 2, 6, 7 }, | |||||
{ 6, 0, 1, 2, 7 }, | |||||
{ 0, 1, 2, 3 }, | |||||
{ 0, 1, 1 }, | |||||
{ 0, 1 }, | |||||
}; | |||||
static const uint8_t run_len[7][16]={ | static const uint8_t run_len[7][16]={ | ||||
{1,1}, | {1,1}, | ||||
{1,2,2}, | {1,2,2}, | ||||
@@ -200,6 +244,10 @@ static VLC chroma_dc_coeff_token_vlc; | |||||
static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; | static VLC_TYPE chroma_dc_coeff_token_vlc_table[256][2]; | ||||
static const int chroma_dc_coeff_token_vlc_table_size = 256; | static const int chroma_dc_coeff_token_vlc_table_size = 256; | ||||
static VLC chroma422_dc_coeff_token_vlc; | |||||
static VLC_TYPE chroma422_dc_coeff_token_vlc_table[8192][2]; | |||||
static const int chroma422_dc_coeff_token_vlc_table_size = 8192; | |||||
static VLC total_zeros_vlc[15]; | static VLC total_zeros_vlc[15]; | ||||
static VLC_TYPE total_zeros_vlc_tables[15][512][2]; | static VLC_TYPE total_zeros_vlc_tables[15][512][2]; | ||||
static const int total_zeros_vlc_tables_size = 512; | static const int total_zeros_vlc_tables_size = 512; | ||||
@@ -208,6 +256,10 @@ static VLC chroma_dc_total_zeros_vlc[3]; | |||||
static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; | static VLC_TYPE chroma_dc_total_zeros_vlc_tables[3][8][2]; | ||||
static const int chroma_dc_total_zeros_vlc_tables_size = 8; | static const int chroma_dc_total_zeros_vlc_tables_size = 8; | ||||
static VLC chroma422_dc_total_zeros_vlc[7]; | |||||
static VLC_TYPE chroma422_dc_total_zeros_vlc_tables[7][32][2]; | |||||
static const int chroma422_dc_total_zeros_vlc_tables_size = 32; | |||||
static VLC run_vlc[6]; | static VLC run_vlc[6]; | ||||
static VLC_TYPE run_vlc_tables[6][8][2]; | static VLC_TYPE run_vlc_tables[6][8][2]; | ||||
static const int run_vlc_tables_size = 8; | static const int run_vlc_tables_size = 8; | ||||
@@ -219,6 +271,14 @@ static const int run7_vlc_table_size = 96; | |||||
#define LEVEL_TAB_BITS 8 | #define LEVEL_TAB_BITS 8 | ||||
static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; | static int8_t cavlc_level_tab[7][1<<LEVEL_TAB_BITS][2]; | ||||
#define CHROMA_DC_COEFF_TOKEN_VLC_BITS 8 | |||||
#define CHROMA422_DC_COEFF_TOKEN_VLC_BITS 13 | |||||
#define COEFF_TOKEN_VLC_BITS 8 | |||||
#define TOTAL_ZEROS_VLC_BITS 9 | |||||
#define CHROMA_DC_TOTAL_ZEROS_VLC_BITS 3 | |||||
#define CHROMA422_DC_TOTAL_ZEROS_VLC_BITS 5 | |||||
#define RUN_VLC_BITS 3 | |||||
#define RUN7_VLC_BITS 6 | |||||
/** | /** | ||||
* gets the predicted number of non-zero coefficients. | * gets the predicted number of non-zero coefficients. | ||||
@@ -278,6 +338,13 @@ av_cold void ff_h264_decode_init_vlc(void){ | |||||
&chroma_dc_coeff_token_bits[0], 1, 1, | &chroma_dc_coeff_token_bits[0], 1, 1, | ||||
INIT_VLC_USE_NEW_STATIC); | INIT_VLC_USE_NEW_STATIC); | ||||
chroma422_dc_coeff_token_vlc.table = chroma422_dc_coeff_token_vlc_table; | |||||
chroma422_dc_coeff_token_vlc.table_allocated = chroma422_dc_coeff_token_vlc_table_size; | |||||
init_vlc(&chroma422_dc_coeff_token_vlc, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 4*9, | |||||
&chroma422_dc_coeff_token_len [0], 1, 1, | |||||
&chroma422_dc_coeff_token_bits[0], 1, 1, | |||||
INIT_VLC_USE_NEW_STATIC); | |||||
offset = 0; | offset = 0; | ||||
for(i=0; i<4; i++){ | for(i=0; i<4; i++){ | ||||
coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; | coeff_token_vlc[i].table = coeff_token_vlc_tables+offset; | ||||
@@ -304,6 +371,17 @@ av_cold void ff_h264_decode_init_vlc(void){ | |||||
&chroma_dc_total_zeros_bits[i][0], 1, 1, | &chroma_dc_total_zeros_bits[i][0], 1, 1, | ||||
INIT_VLC_USE_NEW_STATIC); | INIT_VLC_USE_NEW_STATIC); | ||||
} | } | ||||
for(i=0; i<7; i++){ | |||||
chroma422_dc_total_zeros_vlc[i].table = chroma422_dc_total_zeros_vlc_tables[i]; | |||||
chroma422_dc_total_zeros_vlc[i].table_allocated = chroma422_dc_total_zeros_vlc_tables_size; | |||||
init_vlc(&chroma422_dc_total_zeros_vlc[i], | |||||
CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 8, | |||||
&chroma422_dc_total_zeros_len [i][0], 1, 1, | |||||
&chroma422_dc_total_zeros_bits[i][0], 1, 1, | |||||
INIT_VLC_USE_NEW_STATIC); | |||||
} | |||||
for(i=0; i<15; i++){ | for(i=0; i<15; i++){ | ||||
total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; | total_zeros_vlc[i].table = total_zeros_vlc_tables[i]; | ||||
total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; | total_zeros_vlc[i].table_allocated = total_zeros_vlc_tables_size; | ||||
@@ -373,7 +451,10 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in | |||||
//FIXME put trailing_onex into the context | //FIXME put trailing_onex into the context | ||||
if(max_coeff <= 8){ | if(max_coeff <= 8){ | ||||
coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); | |||||
if (max_coeff == 4) | |||||
coeff_token = get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1); | |||||
else | |||||
coeff_token = get_vlc2(gb, chroma422_dc_coeff_token_vlc.table, CHROMA422_DC_COEFF_TOKEN_VLC_BITS, 1); | |||||
total_coeff= coeff_token>>2; | total_coeff= coeff_token>>2; | ||||
}else{ | }else{ | ||||
if(n >= LUMA_DC_BLOCK_INDEX){ | if(n >= LUMA_DC_BLOCK_INDEX){ | ||||
@@ -483,11 +564,16 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in | |||||
if(total_coeff == max_coeff) | if(total_coeff == max_coeff) | ||||
zeros_left=0; | zeros_left=0; | ||||
else{ | else{ | ||||
/* FIXME: we don't actually support 4:2:2 yet. */ | |||||
if(max_coeff <= 8) | |||||
zeros_left= get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[ total_coeff ].table, CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); | |||||
else | |||||
if (max_coeff <= 8) { | |||||
if (max_coeff == 4) | |||||
zeros_left = get_vlc2(gb, (chroma_dc_total_zeros_vlc-1)[total_coeff].table, | |||||
CHROMA_DC_TOTAL_ZEROS_VLC_BITS, 1); | |||||
else | |||||
zeros_left = get_vlc2(gb, (chroma422_dc_total_zeros_vlc-1)[total_coeff].table, | |||||
CHROMA422_DC_TOTAL_ZEROS_VLC_BITS, 1); | |||||
} else { | |||||
zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); | zeros_left= get_vlc2(gb, (total_zeros_vlc-1)[ total_coeff ].table, TOTAL_ZEROS_VLC_BITS, 1); | ||||
} | |||||
} | } | ||||
#define STORE_BLOCK(type) \ | #define STORE_BLOCK(type) \ | ||||
@@ -994,7 +1080,7 @@ decode_intra_mb: | |||||
s->current_picture.f.mb_type[mb_xy] = mb_type; | s->current_picture.f.mb_type[mb_xy] = mb_type; | ||||
if(cbp || IS_INTRA16x16(mb_type)){ | if(cbp || IS_INTRA16x16(mb_type)){ | ||||
int i4x4, chroma_idx; | |||||
int i4x4, i8x8, chroma_idx; | |||||
int dquant; | int dquant; | ||||
int ret; | int ret; | ||||
GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; | GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr; | ||||
@@ -1036,7 +1122,34 @@ decode_intra_mb: | |||||
if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){ | if( decode_luma_residual(h, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 2) < 0 ){ | ||||
return -1; | return -1; | ||||
} | } | ||||
} else { | |||||
} else if (CHROMA422) { | |||||
if(cbp&0x30){ | |||||
for(chroma_idx=0; chroma_idx<2; chroma_idx++) | |||||
if (decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), | |||||
CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma422_dc_scan, | |||||
NULL, 8) < 0) { | |||||
return -1; | |||||
} | |||||
} | |||||
if(cbp&0x20){ | |||||
for(chroma_idx=0; chroma_idx<2; chroma_idx++){ | |||||
const uint32_t *qmul = h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[chroma_idx]]; | |||||
DCTELEM *mb = h->mb + (16*(16 + 16*chroma_idx) << pixel_shift); | |||||
for (i8x8 = 0; i8x8 < 2; i8x8++) { | |||||
for (i4x4 = 0; i4x4 < 4; i4x4++) { | |||||
const int index = 16 + 16*chroma_idx + 8*i8x8 + i4x4; | |||||
if (decode_residual(h, gb, mb, index, scan + 1, qmul, 15) < 0) | |||||
return -1; | |||||
mb += 16 << pixel_shift; | |||||
} | |||||
} | |||||
} | |||||
}else{ | |||||
fill_rectangle(&h->non_zero_count_cache[scan8[16]], 4, 4, 8, 0, 1); | |||||
fill_rectangle(&h->non_zero_count_cache[scan8[32]], 4, 4, 8, 0, 1); | |||||
} | |||||
} else /* yuv420 */ { | |||||
if(cbp&0x30){ | if(cbp&0x30){ | ||||
for(chroma_idx=0; chroma_idx<2; chroma_idx++) | for(chroma_idx=0; chroma_idx<2; chroma_idx++) | ||||
if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ | if( decode_residual(h, gb, h->mb + ((256 + 16*16*chroma_idx) << pixel_shift), CHROMA_DC_BLOCK_INDEX+chroma_idx, chroma_dc_scan, NULL, 4) < 0){ | ||||
@@ -212,6 +212,7 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m | |||||
MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); | int chroma = !(CONFIG_GRAY && (s->flags&CODEC_FLAG_GRAY)); | ||||
int chroma444 = CHROMA444; | int chroma444 = CHROMA444; | ||||
int chroma422 = CHROMA422; | |||||
int mb_xy = h->mb_xy; | int mb_xy = h->mb_xy; | ||||
int left_type= h->left_type[LTOP]; | int left_type= h->left_type[LTOP]; | ||||
@@ -289,6 +290,23 @@ static void av_always_inline h264_filter_mb_fast_internal( H264Context *h, int m | |||||
filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); | filter_mb_edgeh( &img_cb[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); | ||||
filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); | filter_mb_edgeh( &img_cr[4*3*linesize], linesize, bS3, qpc, a, b, h, 0); | ||||
} | } | ||||
}else if(chroma422){ | |||||
if(left_type){ | |||||
filter_mb_edgecv(&img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); | |||||
filter_mb_edgecv(&img_cr[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); | |||||
} | |||||
filter_mb_edgecv(&img_cb[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgecv(&img_cr[2*2<<pixel_shift], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
if(top_type){ | |||||
filter_mb_edgech(&img_cb[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); | |||||
filter_mb_edgech(&img_cr[4*0*uvlinesize], uvlinesize, bSH, qpc1, a, b, h, 1); | |||||
} | |||||
filter_mb_edgech(&img_cb[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgech(&img_cr[4*1*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgech(&img_cb[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgech(&img_cr[4*2*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgech(&img_cb[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
filter_mb_edgech(&img_cr[4*3*uvlinesize], uvlinesize, bS3, qpc, a, b, h, 0); | |||||
}else{ | }else{ | ||||
if(left_type){ | if(left_type){ | ||||
filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); | filter_mb_edgecv( &img_cb[2*0<<pixel_shift], uvlinesize, bS4, qpc0, a, b, h, 1); | ||||
@@ -411,10 +429,12 @@ static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){ | |||||
return v; | return v; | ||||
} | } | ||||
static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int chroma444, int dir) { | |||||
static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int a, int b, int chroma, int dir) { | |||||
MpegEncContext * const s = &h->s; | MpegEncContext * const s = &h->s; | ||||
int edge; | int edge; | ||||
int chroma_qp_avg[2]; | int chroma_qp_avg[2]; | ||||
int chroma444 = CHROMA444; | |||||
int chroma422 = CHROMA422; | |||||
const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; | ||||
const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; | const int mbm_type = dir == 0 ? h->left_type[LTOP] : h->top_type; | ||||
@@ -564,8 +584,9 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u | |||||
for( edge = 1; edge < edges; edge++ ) { | for( edge = 1; edge < edges; edge++ ) { | ||||
DECLARE_ALIGNED(8, int16_t, bS)[4]; | DECLARE_ALIGNED(8, int16_t, bS)[4]; | ||||
int qp; | int qp; | ||||
const int deblock_edge = !IS_8x8DCT(mb_type & (edge<<24)); // (edge&1) && IS_8x8DCT(mb_type) | |||||
if( IS_8x8DCT(mb_type & (edge<<24)) ) // (edge&1) && IS_8x8DCT(mb_type) | |||||
if (!deblock_edge && (!chroma422 || dir == 0)) | |||||
continue; | continue; | ||||
if( IS_INTRA(mb_type)) { | if( IS_INTRA(mb_type)) { | ||||
@@ -627,14 +648,23 @@ static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, u | |||||
} | } | ||||
} | } | ||||
} else { | } else { | ||||
filter_mb_edgeh( &img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0 ); | |||||
if (chroma) { | |||||
if (chroma444) { | |||||
filter_mb_edgeh ( &img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); | |||||
filter_mb_edgeh ( &img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); | |||||
} else if( (edge&1) == 0 ) { | |||||
filter_mb_edgech( &img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); | |||||
filter_mb_edgech( &img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); | |||||
if (chroma422) { | |||||
if (deblock_edge) | |||||
filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); | |||||
if (chroma) { | |||||
filter_mb_edgech(&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); | |||||
filter_mb_edgech(&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); | |||||
} | |||||
} else { | |||||
filter_mb_edgeh(&img_y[4*edge*linesize], linesize, bS, qp, a, b, h, 0); | |||||
if (chroma) { | |||||
if (chroma444) { | |||||
filter_mb_edgeh (&img_cb[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); | |||||
filter_mb_edgeh (&img_cr[4*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); | |||||
} else if ((edge&1) == 0) { | |||||
filter_mb_edgech(&img_cb[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[0], a, b, h, 0); | |||||
filter_mb_edgech(&img_cr[2*edge*uvlinesize], uvlinesize, bS, h->chroma_qp[1], a, b, h, 0); | |||||
} | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -726,6 +756,11 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint | |||||
filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); | filter_mb_mbaff_edgev ( h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); | ||||
filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 ); | filter_mb_mbaff_edgev ( h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1 ); | ||||
filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 ); | filter_mb_mbaff_edgev ( h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1 ); | ||||
} else if (CHROMA422) { | |||||
filter_mb_mbaff_edgecv(h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1); | |||||
filter_mb_mbaff_edgecv(h, img_cb + 8*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1); | |||||
filter_mb_mbaff_edgecv(h, img_cr, uvlinesize, bS , 1, rqp[0], a, b, 1); | |||||
filter_mb_mbaff_edgecv(h, img_cr + 8*uvlinesize, uvlinesize, bS+4, 1, rqp[1], a, b, 1); | |||||
}else{ | }else{ | ||||
filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 ); | filter_mb_mbaff_edgecv( h, img_cb, uvlinesize, bS , 1, bqp[0], a, b, 1 ); | ||||
filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); | filter_mb_mbaff_edgecv( h, img_cb + 4*uvlinesize, uvlinesize, bS+4, 1, bqp[1], a, b, 1 ); | ||||
@@ -754,9 +789,9 @@ void ff_h264_filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint | |||||
#if CONFIG_SMALL | #if CONFIG_SMALL | ||||
for( dir = 0; dir < 2; dir++ ) | for( dir = 0; dir < 2; dir++ ) | ||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, CHROMA444, dir); | |||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, dir ? 0 : first_vertical_edge_done, a, b, chroma, dir); | |||||
#else | #else | ||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, CHROMA444, 0); | |||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, CHROMA444, 1); | |||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, first_vertical_edge_done, a, b, chroma, 0); | |||||
filter_mb_dir(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize, mb_xy, mb_type, mvy_limit, 0, a, b, chroma, 1); | |||||
#endif | #endif | ||||
} | } |
@@ -510,7 +510,7 @@ static void fill_decode_caches(H264Context *h, int mb_type){ | |||||
if(top_type){ | if(top_type){ | ||||
nnz = h->non_zero_count[top_xy]; | nnz = h->non_zero_count[top_xy]; | ||||
AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); | AV_COPY32(&nnz_cache[4+8* 0], &nnz[4*3]); | ||||
if(CHROMA444){ | |||||
if(!s->chroma_y_shift){ | |||||
AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); | AV_COPY32(&nnz_cache[4+8* 5], &nnz[4* 7]); | ||||
AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); | AV_COPY32(&nnz_cache[4+8*10], &nnz[4*11]); | ||||
}else{ | }else{ | ||||
@@ -534,6 +534,11 @@ static void fill_decode_caches(H264Context *h, int mb_type){ | |||||
nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; | nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]+4*4]; | ||||
nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; | nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]+8*4]; | ||||
nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; | nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]+8*4]; | ||||
}else if(CHROMA422) { | |||||
nnz_cache[3+8* 6 + 2*8*i]= nnz[left_block[8+0+2*i]-2+4*4]; | |||||
nnz_cache[3+8* 7 + 2*8*i]= nnz[left_block[8+1+2*i]-2+4*4]; | |||||
nnz_cache[3+8*11 + 2*8*i]= nnz[left_block[8+0+2*i]-2+8*4]; | |||||
nnz_cache[3+8*12 + 2*8*i]= nnz[left_block[8+1+2*i]-2+8*4]; | |||||
}else{ | }else{ | ||||
nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; | nnz_cache[3+8* 6 + 8*i]= nnz[left_block[8+4+2*i]]; | ||||
nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; | nnz_cache[3+8*11 + 8*i]= nnz[left_block[8+5+2*i]]; | ||||
@@ -396,7 +396,8 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ | |||||
#endif | #endif | ||||
sps->crop= get_bits1(&s->gb); | sps->crop= get_bits1(&s->gb); | ||||
if(sps->crop){ | if(sps->crop){ | ||||
int crop_limit = sps->chroma_format_idc == 3 ? 16 : 8; | |||||
int crop_vertical_limit = sps->chroma_format_idc & 2 ? 16 : 8; | |||||
int crop_horizontal_limit = sps->chroma_format_idc == 3 ? 16 : 8; | |||||
sps->crop_left = get_ue_golomb(&s->gb); | sps->crop_left = get_ue_golomb(&s->gb); | ||||
sps->crop_right = get_ue_golomb(&s->gb); | sps->crop_right = get_ue_golomb(&s->gb); | ||||
sps->crop_top = get_ue_golomb(&s->gb); | sps->crop_top = get_ue_golomb(&s->gb); | ||||
@@ -404,7 +405,7 @@ int ff_h264_decode_seq_parameter_set(H264Context *h){ | |||||
if(sps->crop_left || sps->crop_top){ | if(sps->crop_left || sps->crop_top){ | ||||
av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); | av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n"); | ||||
} | } | ||||
if(sps->crop_right >= crop_limit || sps->crop_bottom >= crop_limit){ | |||||
if(sps->crop_right >= crop_horizontal_limit || sps->crop_bottom >= crop_vertical_limit){ | |||||
av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); | av_log(h->s.avctx, AV_LOG_ERROR, "brainfart cropping not supported, this could look slightly wrong ...\n"); | ||||
} | } | ||||
}else{ | }else{ | ||||
@@ -80,7 +80,14 @@ static const uint8_t luma_dc_field_scan[16]={ | |||||
static const uint8_t chroma_dc_scan[4]={ | static const uint8_t chroma_dc_scan[4]={ | ||||
(0+0*2)*16, (1+0*2)*16, | (0+0*2)*16, (1+0*2)*16, | ||||
(0+1*2)*16, (1+1*2)*16, //FIXME | |||||
(0+1*2)*16, (1+1*2)*16, | |||||
}; | |||||
static const uint8_t chroma422_dc_scan[8]={ | |||||
(0+0*2)*16, (0+1*2)*16, | |||||
(1+0*2)*16, (0+2*2)*16, | |||||
(0+3*2)*16, (1+1*2)*16, | |||||
(1+2*2)*16, (1+3*2)*16, | |||||
}; | }; | ||||
// zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] | // zigzag_scan8x8_cavlc[i] = zigzag_scan8x8[(i/4) + 16*(i%4)] | ||||
@@ -41,7 +41,7 @@ | |||||
#include "h264dsp_template.c" | #include "h264dsp_template.c" | ||||
#undef BIT_DEPTH | #undef BIT_DEPTH | ||||
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) | |||||
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
#undef FUNC | #undef FUNC | ||||
#define FUNC(a, depth) a ## _ ## depth ## _c | #define FUNC(a, depth) a ## _ ## depth ## _c | ||||
@@ -53,10 +53,16 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ | c->h264_idct8_dc_add= FUNC(ff_h264_idct8_dc_add, depth);\ | ||||
c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ | c->h264_idct_add16 = FUNC(ff_h264_idct_add16, depth);\ | ||||
c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ | c->h264_idct8_add4 = FUNC(ff_h264_idct8_add4, depth);\ | ||||
c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_idct_add8 = FUNC(ff_h264_idct_add8, depth);\ | |||||
else\ | |||||
c->h264_idct_add8 = FUNC(ff_h264_idct_add8_422, depth);\ | |||||
c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ | c->h264_idct_add16intra= FUNC(ff_h264_idct_add16intra, depth);\ | ||||
c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ | c->h264_luma_dc_dequant_idct= FUNC(ff_h264_luma_dc_dequant_idct, depth);\ | ||||
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma_dc_dequant_idct, depth);\ | |||||
else\ | |||||
c->h264_chroma_dc_dequant_idct= FUNC(ff_h264_chroma422_dc_dequant_idct, depth);\ | |||||
\ | \ | ||||
c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ | c->weight_h264_pixels_tab[0]= FUNC(weight_h264_pixels16x16, depth);\ | ||||
c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ | c->weight_h264_pixels_tab[1]= FUNC(weight_h264_pixels16x8, depth);\ | ||||
@@ -86,11 +92,23 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) | |||||
c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ | c->h264_h_loop_filter_luma_intra= FUNC(h264_h_loop_filter_luma_intra, depth);\ | ||||
c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ | c->h264_h_loop_filter_luma_mbaff_intra= FUNC(h264_h_loop_filter_luma_mbaff_intra, depth);\ | ||||
c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ | c->h264_v_loop_filter_chroma= FUNC(h264_v_loop_filter_chroma, depth);\ | ||||
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ | |||||
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma, depth);\ | |||||
else\ | |||||
c->h264_h_loop_filter_chroma= FUNC(h264_h_loop_filter_chroma422, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma_mbaff, depth);\ | |||||
else\ | |||||
c->h264_h_loop_filter_chroma_mbaff= FUNC(h264_h_loop_filter_chroma422_mbaff, depth);\ | |||||
c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ | c->h264_v_loop_filter_chroma_intra= FUNC(h264_v_loop_filter_chroma_intra, depth);\ | ||||
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ | |||||
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma_intra, depth);\ | |||||
else\ | |||||
c->h264_h_loop_filter_chroma_intra= FUNC(h264_h_loop_filter_chroma422_intra, depth);\ | |||||
if (chroma_format_idc == 1)\ | |||||
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma_mbaff_intra, depth);\ | |||||
else\ | |||||
c->h264_h_loop_filter_chroma_mbaff_intra= FUNC(h264_h_loop_filter_chroma422_mbaff_intra, depth);\ | |||||
c->h264_loop_filter_strength= NULL; | c->h264_loop_filter_strength= NULL; | ||||
switch (bit_depth) { | switch (bit_depth) { | ||||
@@ -105,7 +123,7 @@ void ff_h264dsp_init(H264DSPContext *c, const int bit_depth) | |||||
break; | break; | ||||
} | } | ||||
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth); | |||||
if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth); | |||||
if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth); | |||||
if (ARCH_ARM) ff_h264dsp_init_arm(c, bit_depth, chroma_format_idc); | |||||
if (HAVE_ALTIVEC) ff_h264dsp_init_ppc(c, bit_depth, chroma_format_idc); | |||||
if (HAVE_MMX) ff_h264dsp_init_x86(c, bit_depth, chroma_format_idc); | |||||
} | } |
@@ -74,9 +74,9 @@ typedef struct H264DSPContext{ | |||||
void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); | void (*h264_chroma_dc_dequant_idct)(DCTELEM *block, int qmul); | ||||
}H264DSPContext; | }H264DSPContext; | ||||
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth); | |||||
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth); | |||||
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth); | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth); | |||||
void ff_h264dsp_init(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); | |||||
void ff_h264dsp_init_arm(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); | |||||
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc); | |||||
#endif /* AVCODEC_H264DSP_H */ | #endif /* AVCODEC_H264DSP_H */ |
@@ -275,6 +275,14 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff)(uint8_t *pix, int stride, int | |||||
{ | { | ||||
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 1, alpha, beta, tc0); | ||||
} | } | ||||
static void FUNCC(h264_h_loop_filter_chroma422)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | |||||
{ | |||||
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 4, alpha, beta, tc0); | |||||
} | |||||
static void FUNCC(h264_h_loop_filter_chroma422_mbaff)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | |||||
{ | |||||
FUNCC(h264_loop_filter_chroma)(pix, sizeof(pixel), stride, 2, alpha, beta, tc0); | |||||
} | |||||
static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) | static av_always_inline av_flatten void FUNCC(h264_loop_filter_chroma_intra)(uint8_t *_pix, int xstride, int ystride, int inner_iters, int alpha, int beta) | ||||
{ | { | ||||
@@ -312,3 +320,11 @@ static void FUNCC(h264_h_loop_filter_chroma_mbaff_intra)(uint8_t *pix, int strid | |||||
{ | { | ||||
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 1, alpha, beta); | ||||
} | } | ||||
static void FUNCC(h264_h_loop_filter_chroma422_intra)(uint8_t *pix, int stride, int alpha, int beta) | |||||
{ | |||||
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 4, alpha, beta); | |||||
} | |||||
static void FUNCC(h264_h_loop_filter_chroma422_mbaff_intra)(uint8_t *pix, int stride, int alpha, int beta) | |||||
{ | |||||
FUNCC(h264_loop_filter_chroma_intra)(pix, sizeof(pixel), stride, 2, alpha, beta); | |||||
} |
@@ -224,6 +224,29 @@ void FUNCC(ff_h264_idct_add8)(uint8_t **dest, const int *block_offset, DCTELEM * | |||||
} | } | ||||
} | } | ||||
} | } | ||||
void FUNCC(ff_h264_idct_add8_422)(uint8_t **dest, const int *block_offset, DCTELEM *block, int stride, const uint8_t nnzc[15*8]){ | |||||
int i, j; | |||||
for(j=1; j<3; j++){ | |||||
for(i=j*16; i<j*16+4; i++){ | |||||
if(nnzc[ scan8[i] ]) | |||||
FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | |||||
else if(((dctcoef*)block)[i*16]) | |||||
FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i], block + i*16*sizeof(pixel), stride); | |||||
} | |||||
} | |||||
for(j=1; j<3; j++){ | |||||
for(i=j*16+4; i<j*16+8; i++){ | |||||
if(nnzc[ scan8[i+4] ]) | |||||
FUNCC(ff_h264_idct_add )(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |||||
else if(((dctcoef*)block)[i*16]) | |||||
FUNCC(ff_h264_idct_dc_add)(dest[j-1] + block_offset[i+4], block + i*16*sizeof(pixel), stride); | |||||
} | |||||
} | |||||
} | |||||
/** | /** | ||||
* IDCT transforms the 16 dc values and dequantizes them. | * IDCT transforms the 16 dc values and dequantizes them. | ||||
* @param qmul quantization parameter | * @param qmul quantization parameter | ||||
@@ -263,6 +286,33 @@ void FUNCC(ff_h264_luma_dc_dequant_idct)(DCTELEM *_output, DCTELEM *_input, int | |||||
#undef stride | #undef stride | ||||
} | } | ||||
void FUNCC(ff_h264_chroma422_dc_dequant_idct)(DCTELEM *_block, int qmul){ | |||||
const int stride= 16*2; | |||||
const int xStride= 16; | |||||
int i; | |||||
int temp[8]; | |||||
static const uint8_t x_offset[2]={0, 16}; | |||||
dctcoef *block = (dctcoef*)_block; | |||||
for(i=0; i<4; i++){ | |||||
temp[2*i+0] = block[stride*i + xStride*0] + block[stride*i + xStride*1]; | |||||
temp[2*i+1] = block[stride*i + xStride*0] - block[stride*i + xStride*1]; | |||||
} | |||||
for(i=0; i<2; i++){ | |||||
const int offset= x_offset[i]; | |||||
const int z0= temp[2*0+i] + temp[2*2+i]; | |||||
const int z1= temp[2*0+i] - temp[2*2+i]; | |||||
const int z2= temp[2*1+i] - temp[2*3+i]; | |||||
const int z3= temp[2*1+i] + temp[2*3+i]; | |||||
block[stride*0+offset]= ((z0 + z3)*qmul + 128) >> 8; | |||||
block[stride*1+offset]= ((z1 + z2)*qmul + 128) >> 8; | |||||
block[stride*2+offset]= ((z1 - z2)*qmul + 128) >> 8; | |||||
block[stride*3+offset]= ((z0 - z3)*qmul + 128) >> 8; | |||||
} | |||||
} | |||||
void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ | void FUNCC(ff_h264_chroma_dc_dequant_idct)(DCTELEM *_block, int qmul){ | ||||
const int stride= 16*2; | const int stride= 16*2; | ||||
const int xStride= 16; | const int xStride= 16; | ||||
@@ -361,7 +361,7 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){ | |||||
/** | /** | ||||
* Set the intra prediction function pointers. | * Set the intra prediction function pointers. | ||||
*/ | */ | ||||
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ | |||||
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc){ | |||||
// MpegEncContext * const s = &h->s; | // MpegEncContext * const s = &h->s; | ||||
#undef FUNC | #undef FUNC | ||||
@@ -434,20 +434,39 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ | |||||
h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ | h->pred8x8l[TOP_DC_PRED ]= FUNCC(pred8x8l_top_dc , depth);\ | ||||
h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ | h->pred8x8l[DC_128_PRED ]= FUNCC(pred8x8l_128_dc , depth);\ | ||||
\ | \ | ||||
h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ | |||||
h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ | |||||
if (chroma_format_idc == 1) {\ | |||||
h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x8_vertical , depth);\ | |||||
h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x8_horizontal , depth);\ | |||||
} else {\ | |||||
h->pred8x8[VERT_PRED8x8 ]= FUNCC(pred8x16_vertical , depth);\ | |||||
h->pred8x8[HOR_PRED8x8 ]= FUNCC(pred8x16_horizontal , depth);\ | |||||
}\ | |||||
if (codec_id != CODEC_ID_VP8) {\ | if (codec_id != CODEC_ID_VP8) {\ | ||||
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ | |||||
if (chroma_format_idc == 1) {\ | |||||
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane , depth);\ | |||||
} else {\ | |||||
h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane , depth);\ | |||||
}\ | |||||
} else\ | } else\ | ||||
h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ | h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\ | ||||
if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ | if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\ | ||||
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ | |||||
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ | |||||
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ | |||||
if (chroma_format_idc == 1) {\ | |||||
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x8_dc , depth);\ | |||||
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc , depth);\ | |||||
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc , depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ | |||||
} else {\ | |||||
h->pred8x8[DC_PRED8x8 ]= FUNCC(pred8x16_dc , depth);\ | |||||
h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc , depth);\ | |||||
h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc , depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\ | |||||
h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\ | |||||
}\ | |||||
}else{\ | }else{\ | ||||
h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ | h->pred8x8[DC_PRED8x8 ]= FUNCD(pred8x8_dc_rv40);\ | ||||
h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ | h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\ | ||||
@@ -457,7 +476,11 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ | |||||
h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ | h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc , depth);\ | ||||
}\ | }\ | ||||
}\ | }\ | ||||
h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ | |||||
if (chroma_format_idc == 1) {\ | |||||
h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc , depth);\ | |||||
} else {\ | |||||
h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc , depth);\ | |||||
}\ | |||||
\ | \ | ||||
h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ | h->pred16x16[DC_PRED8x8 ]= FUNCC(pred16x16_dc , depth);\ | ||||
h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ | h->pred16x16[VERT_PRED8x8 ]= FUNCC(pred16x16_vertical , depth);\ | ||||
@@ -504,6 +527,6 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){ | |||||
break; | break; | ||||
} | } | ||||
if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth); | |||||
if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth); | |||||
if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc); | |||||
if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc); | |||||
} | } |
@@ -101,8 +101,8 @@ typedef struct H264PredContext{ | |||||
void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); | void (*pred16x16_add[3])(uint8_t *pix/*align 16*/, const int *block_offset, const DCTELEM *block/*align 16*/, int stride); | ||||
}H264PredContext; | }H264PredContext; | ||||
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth); | |||||
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth); | |||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth); | |||||
void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); | |||||
void ff_h264_pred_init_arm(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); | |||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc); | |||||
#endif /* AVCODEC_H264PRED_H */ | #endif /* AVCODEC_H264PRED_H */ |
@@ -454,6 +454,19 @@ static void FUNCC(pred8x8_vertical)(uint8_t *_src, int _stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_vertical)(uint8_t *_src, int _stride){ | |||||
int i; | |||||
pixel *src = (pixel*)_src; | |||||
int stride = _stride>>(sizeof(pixel)-1); | |||||
const pixel4 a= AV_RN4PA(((pixel4*)(src-stride))+0); | |||||
const pixel4 b= AV_RN4PA(((pixel4*)(src-stride))+1); | |||||
for(i=0; i<16; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, b); | |||||
} | |||||
} | |||||
static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ | static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ | ||||
int i; | int i; | ||||
pixel *src = (pixel*)_src; | pixel *src = (pixel*)_src; | ||||
@@ -466,6 +479,17 @@ static void FUNCC(pred8x8_horizontal)(uint8_t *_src, int stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_horizontal)(uint8_t *_src, int stride){ | |||||
int i; | |||||
pixel *src = (pixel*)_src; | |||||
stride >>= sizeof(pixel)-1; | |||||
for(i=0; i<16; i++){ | |||||
const pixel4 a = PIXEL_SPLAT_X4(src[-1+i*stride]); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, a); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, a); | |||||
} | |||||
} | |||||
#define PRED8x8_X(n, v)\ | #define PRED8x8_X(n, v)\ | ||||
static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ | static void FUNCC(pred8x8_##n##_dc)(uint8_t *_src, int stride){\ | ||||
int i;\ | int i;\ | ||||
@@ -482,6 +506,11 @@ PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1); | |||||
PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); | PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0); | ||||
PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); | PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1); | ||||
static void FUNCC(pred8x16_128_dc)(uint8_t *_src, int stride){ | |||||
FUNCC(pred8x8_128_dc)(_src, stride); | |||||
FUNCC(pred8x8_128_dc)(_src+8*stride, stride); | |||||
} | |||||
static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ | static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ | ||||
int i; | int i; | ||||
int dc0, dc2; | int dc0, dc2; | ||||
@@ -507,6 +536,11 @@ static void FUNCC(pred8x8_left_dc)(uint8_t *_src, int stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_left_dc)(uint8_t *_src, int stride){ | |||||
FUNCC(pred8x8_left_dc)(_src, stride); | |||||
FUNCC(pred8x8_left_dc)(_src+8*stride, stride); | |||||
} | |||||
static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ | static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ | ||||
int i; | int i; | ||||
int dc0, dc1; | int dc0, dc1; | ||||
@@ -532,6 +566,27 @@ static void FUNCC(pred8x8_top_dc)(uint8_t *_src, int stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_top_dc)(uint8_t *_src, int stride){ | |||||
int i; | |||||
int dc0, dc1; | |||||
pixel4 dc0splat, dc1splat; | |||||
pixel *src = (pixel*)_src; | |||||
stride >>= sizeof(pixel)-1; | |||||
dc0=dc1=0; | |||||
for(i=0;i<4; i++){ | |||||
dc0+= src[i-stride]; | |||||
dc1+= src[4+i-stride]; | |||||
} | |||||
dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2); | |||||
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |||||
for(i=0; i<16; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |||||
} | |||||
} | |||||
static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ | static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ | ||||
int i; | int i; | ||||
int dc0, dc1, dc2; | int dc0, dc1, dc2; | ||||
@@ -560,6 +615,48 @@ static void FUNCC(pred8x8_dc)(uint8_t *_src, int stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_dc)(uint8_t *_src, int stride){ | |||||
int i; | |||||
int dc0, dc1, dc2, dc3, dc4; | |||||
pixel4 dc0splat, dc1splat, dc2splat, dc3splat, dc4splat, dc5splat, dc6splat, dc7splat; | |||||
pixel *src = (pixel*)_src; | |||||
stride >>= sizeof(pixel)-1; | |||||
dc0=dc1=dc2=dc3=dc4=0; | |||||
for(i=0;i<4; i++){ | |||||
dc0+= src[-1+i*stride] + src[i-stride]; | |||||
dc1+= src[4+i-stride]; | |||||
dc2+= src[-1+(i+4)*stride]; | |||||
dc3+= src[-1+(i+8)*stride]; | |||||
dc4+= src[-1+(i+12)*stride]; | |||||
} | |||||
dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3); | |||||
dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2); | |||||
dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2); | |||||
dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3); | |||||
dc4splat = PIXEL_SPLAT_X4((dc3 + 2)>>2); | |||||
dc5splat = PIXEL_SPLAT_X4((dc1 + dc3 + 4)>>3); | |||||
dc6splat = PIXEL_SPLAT_X4((dc4 + 2)>>2); | |||||
dc7splat = PIXEL_SPLAT_X4((dc1 + dc4 + 4)>>3); | |||||
for(i=0; i<4; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc0splat); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc1splat); | |||||
} | |||||
for(i=4; i<8; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc2splat); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc3splat); | |||||
} | |||||
for(i=8; i<12; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc4splat); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc5splat); | |||||
} | |||||
for(i=12; i<16; i++){ | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+0, dc6splat); | |||||
AV_WN4PA(((pixel4*)(src+i*stride))+1, dc7splat); | |||||
} | |||||
} | |||||
//the following 4 function should not be optimized! | //the following 4 function should not be optimized! | ||||
static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ | static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){ | ||||
FUNCC(pred8x8_top_dc)(src, stride); | FUNCC(pred8x8_top_dc)(src, stride); | ||||
@@ -618,6 +715,47 @@ static void FUNCC(pred8x8_plane)(uint8_t *_src, int _stride){ | |||||
} | } | ||||
} | } | ||||
static void FUNCC(pred8x16_plane)(uint8_t *_src, int _stride){ | |||||
int j, k; | |||||
int a; | |||||
INIT_CLIP | |||||
pixel *src = (pixel*)_src; | |||||
int stride = _stride>>(sizeof(pixel)-1); | |||||
const pixel * const src0 = src +3-stride; | |||||
const pixel * src1 = src +8*stride-1; | |||||
const pixel * src2 = src1-2*stride; // == src+6*stride-1; | |||||
int H = src0[1] - src0[-1]; | |||||
int V = src1[0] - src2[ 0]; | |||||
for (k = 2; k <= 4; ++k) { | |||||
src1 += stride; src2 -= stride; | |||||
H += k*(src0[k] - src0[-k]); | |||||
V += k*(src1[0] - src2[ 0]); | |||||
} | |||||
for (; k <= 8; ++k) { | |||||
src1 += stride; src2 -= stride; | |||||
V += k*(src1[0] - src2[0]); | |||||
} | |||||
H = (17*H+16) >> 5; | |||||
V = (5*V+32) >> 6; | |||||
a = 16*(src1[0] + src2[8] + 1) - 7*V - 3*H; | |||||
for(j=16; j>0; --j) { | |||||
int b = a; | |||||
a += V; | |||||
src[0] = CLIP((b ) >> 5); | |||||
src[1] = CLIP((b+ H) >> 5); | |||||
src[2] = CLIP((b+2*H) >> 5); | |||||
src[3] = CLIP((b+3*H) >> 5); | |||||
src[4] = CLIP((b+4*H) >> 5); | |||||
src[5] = CLIP((b+5*H) >> 5); | |||||
src[6] = CLIP((b+6*H) >> 5); | |||||
src[7] = CLIP((b+7*H) >> 5); | |||||
src += stride; | |||||
} | |||||
} | |||||
#define SRC(x,y) src[(x)+(y)*stride] | #define SRC(x,y) src[(x)+(y)*stride] | ||||
#define PL(y) \ | #define PL(y) \ | ||||
const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; | const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2; | ||||
@@ -999,12 +999,13 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { | |||||
} | } | ||||
} | } | ||||
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth) | |||||
void ff_h264dsp_init_ppc(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { | if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { | ||||
if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
c->h264_idct_add = ff_h264_idct_add_altivec; | c->h264_idct_add = ff_h264_idct_add_altivec; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_altivec; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_altivec; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_altivec; | c->h264_idct_add16 = ff_h264_idct_add16_altivec; | ||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; | c->h264_idct_add16intra = ff_h264_idct_add16intra_altivec; | ||||
c->h264_idct_dc_add= h264_idct_dc_add_altivec; | c->h264_idct_dc_add= h264_idct_dc_add_altivec; | ||||
@@ -1343,7 +1343,7 @@ av_cold int ff_rv34_decode_init(AVCodecContext *avctx) | |||||
if (MPV_common_init(s) < 0) | if (MPV_common_init(s) < 0) | ||||
return -1; | return -1; | ||||
ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8); | |||||
ff_h264_pred_init(&r->h, CODEC_ID_RV40, 8, 1); | |||||
#if CONFIG_RV30_DECODER | #if CONFIG_RV30_DECODER | ||||
if (avctx->codec_id == CODEC_ID_RV30) | if (avctx->codec_id == CODEC_ID_RV30) | ||||
@@ -1769,7 +1769,7 @@ static av_cold int vp8_decode_init(AVCodecContext *avctx) | |||||
avctx->pix_fmt = PIX_FMT_YUV420P; | avctx->pix_fmt = PIX_FMT_YUV420P; | ||||
dsputil_init(&s->dsp, avctx); | dsputil_init(&s->dsp, avctx); | ||||
ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8); | |||||
ff_h264_pred_init(&s->hpc, CODEC_ID_VP8, 8, 1); | |||||
ff_vp8dsp_init(&s->vp8dsp); | ff_vp8dsp_init(&s->vp8dsp); | ||||
return 0; | return 0; | ||||
@@ -167,7 +167,7 @@ void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int s | |||||
void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); | ||||
void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); | void ff_pred4x4_vertical_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride); | ||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth) | |||||
void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
@@ -176,14 +176,17 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
if (mm_flags & AV_CPU_FLAG_MMX) { | if (mm_flags & AV_CPU_FLAG_MMX) { | ||||
h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; | h->pred16x16[VERT_PRED8x8 ] = ff_pred16x16_vertical_mmx; | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmx; | ||||
h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; | |||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; | |||||
if (chroma_format_idc == 1) { | |||||
h->pred8x8 [VERT_PRED8x8 ] = ff_pred8x8_vertical_mmx; | |||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmx; | |||||
} | |||||
if (codec_id == CODEC_ID_VP8) { | if (codec_id == CODEC_ID_VP8) { | ||||
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; | h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmx; | ||||
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; | h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_mmx; | ||||
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; | h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; | ||||
} else { | } else { | ||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx; | |||||
if (codec_id == CODEC_ID_SVQ3) { | if (codec_id == CODEC_ID_SVQ3) { | ||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; | h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_mmx; | ||||
} else if (codec_id == CODEC_ID_RV40) { | } else if (codec_id == CODEC_ID_RV40) { | ||||
@@ -197,7 +200,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | if (mm_flags & AV_CPU_FLAG_MMX2) { | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_mmxext; | ||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_mmxext; | ||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_mmxext; | |||||
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; | h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_mmxext; | ||||
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; | h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_mmxext; | ||||
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; | h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_mmxext; | ||||
@@ -221,8 +225,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; | h->pred4x4 [HOR_UP_PRED ] = ff_pred4x4_horizontal_up_mmxext; | ||||
} | } | ||||
if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { | if (codec_id == CODEC_ID_SVQ3 || codec_id == CODEC_ID_H264) { | ||||
h->pred8x8 [TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; | |||||
h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; | |||||
if (chroma_format_idc == 1) { | |||||
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_mmxext; | |||||
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_mmxext; | |||||
} | |||||
} | } | ||||
if (codec_id == CODEC_ID_VP8) { | if (codec_id == CODEC_ID_VP8) { | ||||
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; | h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_mmxext; | ||||
@@ -231,7 +237,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; | h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; | ||||
h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; | h->pred4x4 [VERT_PRED ] = ff_pred4x4_vertical_vp8_mmxext; | ||||
} else { | } else { | ||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_mmx2; | |||||
if (codec_id == CODEC_ID_SVQ3) { | if (codec_id == CODEC_ID_SVQ3) { | ||||
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; | h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_plane_svq3_mmx2; | ||||
} else if (codec_id == CODEC_ID_RV40) { | } else if (codec_id == CODEC_ID_RV40) { | ||||
@@ -257,7 +264,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; | h->pred16x16[PLANE_PRED8x8 ] = ff_pred16x16_tm_vp8_sse2; | ||||
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; | h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_sse2; | ||||
} else { | } else { | ||||
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_plane_sse2; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_sse2; | |||||
if (codec_id == CODEC_ID_SVQ3) { | if (codec_id == CODEC_ID_SVQ3) { | ||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; | h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_sse2; | ||||
} else if (codec_id == CODEC_ID_RV40) { | } else if (codec_id == CODEC_ID_RV40) { | ||||
@@ -271,7 +279,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | if (mm_flags & AV_CPU_FLAG_SSSE3) { | ||||
h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; | h->pred16x16[HOR_PRED8x8 ] = ff_pred16x16_horizontal_ssse3; | ||||
h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; | h->pred16x16[DC_PRED8x8 ] = ff_pred16x16_dc_ssse3; | ||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; | |||||
h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; | h->pred8x8l [TOP_DC_PRED ] = ff_pred8x8l_top_dc_ssse3; | ||||
h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; | h->pred8x8l [DC_PRED ] = ff_pred8x8l_dc_ssse3; | ||||
h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; | h->pred8x8l [HOR_PRED ] = ff_pred8x8l_horizontal_ssse3; | ||||
@@ -286,7 +295,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; | h->pred8x8 [PLANE_PRED8x8 ] = ff_pred8x8_tm_vp8_ssse3; | ||||
h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; | h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; | ||||
} else { | } else { | ||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_plane_ssse3; | |||||
if (codec_id == CODEC_ID_SVQ3) { | if (codec_id == CODEC_ID_SVQ3) { | ||||
h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; | h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_plane_svq3_ssse3; | ||||
} else if (codec_id == CODEC_ID_RV40) { | } else if (codec_id == CODEC_ID_RV40) { | ||||
@@ -301,7 +311,8 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | h->pred4x4[DC_PRED ] = ff_pred4x4_dc_10_mmxext; | ||||
h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | h->pred4x4[HOR_UP_PRED ] = ff_pred4x4_horizontal_up_10_mmxext; | ||||
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; | |||||
if (chroma_format_idc == 1) | |||||
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_mmxext; | |||||
h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; | h->pred8x8l[DC_128_PRED ] = ff_pred8x8l_128_dc_10_mmxext; | ||||
@@ -319,11 +330,13 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth | |||||
h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; | h->pred4x4[VERT_RIGHT_PRED ] = ff_pred4x4_vertical_right_10_sse2; | ||||
h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; | h->pred4x4[HOR_DOWN_PRED ] = ff_pred4x4_horizontal_down_10_sse2; | ||||
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; | |||||
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; | |||||
h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; | |||||
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; | |||||
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; | |||||
if (chroma_format_idc == 1) { | |||||
h->pred8x8[DC_PRED8x8 ] = ff_pred8x8_dc_10_sse2; | |||||
h->pred8x8[TOP_DC_PRED8x8 ] = ff_pred8x8_top_dc_10_sse2; | |||||
h->pred8x8[PLANE_PRED8x8 ] = ff_pred8x8_plane_10_sse2; | |||||
h->pred8x8[VERT_PRED8x8 ] = ff_pred8x8_vertical_10_sse2; | |||||
h->pred8x8[HOR_PRED8x8 ] = ff_pred8x8_horizontal_10_sse2; | |||||
} | |||||
h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; | h->pred8x8l[VERT_PRED ] = ff_pred8x8l_vertical_10_sse2; | ||||
h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; | h->pred8x8l[HOR_PRED ] = ff_pred8x8l_horizontal_10_sse2; | ||||
@@ -350,7 +350,7 @@ H264_BIWEIGHT_10_SSE( 4, 8, 10) | |||||
H264_BIWEIGHT_10_SSE( 4, 4, 10) | H264_BIWEIGHT_10_SSE( 4, 4, 10) | ||||
H264_BIWEIGHT_10_SSE( 4, 2, 10) | H264_BIWEIGHT_10_SSE( 4, 2, 10) | ||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
{ | { | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
@@ -368,7 +368,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; | c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; | ||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; | c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; | c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; | ||||
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; | c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; | ||||
@@ -377,13 +378,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | ||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; | c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; | c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; | ||||
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; | c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; | ||||
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; | |||||
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; | c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmxext; | ||||
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; | |||||
if (chroma_format_idc == 1) { | |||||
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; | |||||
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmxext; | |||||
} | |||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; | c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmxext; | ||||
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; | c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmxext; | ||||
@@ -413,7 +417,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | ||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; | c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | ||||
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; | c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; | ||||
@@ -472,7 +477,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; | c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; | c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; | ||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; | c->h264_idct8_add = ff_h264_idct8_add_10_sse2; | ||||
@@ -532,7 +538,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | ||||
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; | c->h264_idct_add16 = ff_h264_idct_add16_10_avx; | ||||
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; | c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; | ||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_idct8_add = ff_h264_idct8_add_10_avx; | c->h264_idct8_add = ff_h264_idct8_add_10_avx; | ||||