From 79c6477c2abd8cfa41eef0c4ac39779dd8a9ec8e Mon Sep 17 00:00:00 2001 From: Yogender Kumar Gupta Date: Thu, 15 Jun 2017 18:56:13 -0400 Subject: [PATCH 1/3] h264dec: fix Lossless Decoding (Profile 244) for 8x8 Intra Prediction CC: libav-stable@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_mb.c | 7 +++- libavcodec/h264pred.c | 2 + libavcodec/h264pred.h | 3 ++ libavcodec/h264pred_template.c | 73 ++++++++++++++++++++++++++++++++++ 4 files changed, 84 insertions(+), 1 deletion(-) diff --git a/libavcodec/h264_mb.c b/libavcodec/h264_mb.c index f037bd5163..51d73ce710 100644 --- a/libavcodec/h264_mb.c +++ b/libavcodec/h264_mb.c @@ -636,7 +636,12 @@ static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h, uint8_t *const ptr = dest_y + block_offset[i]; const int dir = sl->intra4x4_pred_mode_cache[scan8[i]]; if (transform_bypass && h->ps.sps->profile_idc == 244 && dir <= 1) { - h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); + if (h->x264_build < 151U) { + h->hpc.pred8x8l_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), linesize); + } else + h->hpc.pred8x8l_filter_add[dir](ptr, sl->mb + (i * 16 + p * 256 << pixel_shift), + (sl-> topleft_samples_available << i) & 0x8000, + (sl->topright_samples_available << i) & 0x4000, linesize); } else { const int nnz = sl->non_zero_count_cache[scan8[i + p * 16]]; h->hpc.pred8x8l[dir](ptr, (sl->topleft_samples_available << i) & 0x8000, diff --git a/libavcodec/h264pred.c b/libavcodec/h264pred.c index 7627eb076d..135babcab4 100644 --- a/libavcodec/h264pred.c +++ b/libavcodec/h264pred.c @@ -552,6 +552,8 @@ av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id, h->pred4x4_add [ HOR_PRED ]= FUNCC(pred4x4_horizontal_add , depth);\ h->pred8x8l_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_add , depth);\ h->pred8x8l_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_add , depth);\ + h->pred8x8l_filter_add [VERT_PRED ]= FUNCC(pred8x8l_vertical_filter_add , depth);\ + h->pred8x8l_filter_add [ HOR_PRED ]= FUNCC(pred8x8l_horizontal_filter_add , depth);\ if (chroma_format_idc <= 1) {\ h->pred8x8_add [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add , depth);\ h->pred8x8_add [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add , depth);\ diff --git a/libavcodec/h264pred.h b/libavcodec/h264pred.h index 60e74349c9..795d8f3eff 100644 --- a/libavcodec/h264pred.h +++ b/libavcodec/h264pred.h @@ -101,6 +101,9 @@ typedef struct H264PredContext { int16_t *block /*align 16*/, ptrdiff_t stride); void(*pred8x8l_add[2])(uint8_t *pix /*align 8*/, int16_t *block /*align 16*/, ptrdiff_t stride); + void(*pred8x8l_filter_add[2])(uint8_t *pix /*align 8*/, + int16_t *block /*align 16*/, + int topleft, int topright, ptrdiff_t stride); void(*pred8x8_add[3])(uint8_t *pix /*align 8*/, const int *block_offset, int16_t *block /*align 16*/, ptrdiff_t stride); diff --git a/libavcodec/h264pred_template.c b/libavcodec/h264pred_template.c index 8492b2b0a2..02494aaff9 100644 --- a/libavcodec/h264pred_template.c +++ b/libavcodec/h264pred_template.c @@ -1123,6 +1123,79 @@ static void FUNCC(pred8x8l_horizontal_up)(uint8_t *_src, int has_topleft, SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)= SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7; } + +static void FUNCC(pred8x8l_vertical_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + const dctcoef *block = (const dctcoef*)_block; + pixel pix[8]; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_TOP; + + pix[0] = t0; + pix[1] = t1; + pix[2] = t2; + pix[3] = t3; + pix[4] = t4; + pix[5] = t5; + pix[6] = t6; + pix[7] = t7; + + for (i = 0; i < 8; i++) { + pixel v = pix[i]; + src[0 * stride] = v += block[0]; + src[1 * stride] = v += block[8]; + src[2 * stride] = v += block[16]; + src[3 * stride] = v += block[24]; + src[4 * stride] = v += block[32]; + src[5 * stride] = v += block[40]; + src[6 * stride] = v += block[48]; + src[7 * stride] = v + block[56]; + src++; + block++; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + +static void FUNCC(pred8x8l_horizontal_filter_add)(uint8_t *_src, int16_t *_block, int has_topleft, + int has_topright, ptrdiff_t _stride) +{ + int i; + pixel *src = (pixel*)_src; + const dctcoef *block = (const dctcoef*)_block; + pixel pix[8]; + int stride = _stride/sizeof(pixel); + PREDICT_8x8_LOAD_LEFT; + + pix[0] = l0; + pix[1] = l1; + pix[2] = l2; + pix[3] = l3; + pix[4] = l4; + pix[5] = l5; + pix[6] = l6; + pix[7] = l7; + + for (i = 0; i < 8; i++) { + pixel v = pix[i]; + src[0] = v += block[0]; + src[1] = v += block[1]; + src[2] = v += block[2]; + src[3] = v += block[3]; + src[4] = v += block[4]; + src[5] = v += block[5]; + src[6] = v += block[6]; + src[7] = v + block[7]; + src += stride; + block += 8; + } + + memset(_block, 0, sizeof(dctcoef) * 64); +} + #undef PREDICT_8x8_LOAD_LEFT #undef PREDICT_8x8_LOAD_TOP #undef PREDICT_8x8_LOAD_TOPLEFT From 18d3f36d3c4d0f2c3e702f970ff8b457d7d5e39c Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Thu, 15 Jun 2017 18:56:14 -0400 Subject: [PATCH 2/3] h264_cabac: Fix CABAC+8x8dct in 4:4:4 Use the correct ctxIdxInc calculation for coded_block_flag. Keep old behavior for old versions of x264 for backward compatibility. CC: libav-stable@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_cabac.c | 47 +++++++++++++++++++++++++++++------------ 1 file changed, 33 insertions(+), 14 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index b28e486e52..5dd285c3ea 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -2329,21 +2329,40 @@ decode_intra_mb: if (CHROMA444(h) && IS_8x8DCT(mb_type)){ int i; uint8_t *nnz_cache = sl->non_zero_count_cache; - for (i = 0; i < 2; i++){ - if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { - nnz_cache[3+8* 1 + 2*8*i]= - nnz_cache[3+8* 2 + 2*8*i]= - nnz_cache[3+8* 6 + 2*8*i]= - nnz_cache[3+8* 7 + 2*8*i]= - nnz_cache[3+8*11 + 2*8*i]= - nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0; + if (h->x264_build < 151U) { + for (i = 0; i < 2; i++){ + if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { + nnz_cache[3+8* 1 + 2*8*i]= + nnz_cache[3+8* 2 + 2*8*i]= + nnz_cache[3+8* 6 + 2*8*i]= + nnz_cache[3+8* 7 + 2*8*i]= + nnz_cache[3+8*11 + 2*8*i]= + nnz_cache[3+8*12 + 2*8*i]= IS_INTRA(mb_type) ? 64 : 0; + } + } + if (sl->top_type && !IS_8x8DCT(sl->top_type)){ + uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040; + AV_WN32A(&nnz_cache[4+8* 0], top_empty); + AV_WN32A(&nnz_cache[4+8* 5], top_empty); + AV_WN32A(&nnz_cache[4+8*10], top_empty); + } + } else { + for (i = 0; i < 2; i++){ + if (sl->left_type[LEFT(i)] && !IS_8x8DCT(sl->left_type[LEFT(i)])) { + nnz_cache[3+8* 1 + 2*8*i]= + nnz_cache[3+8* 2 + 2*8*i]= + nnz_cache[3+8* 6 + 2*8*i]= + nnz_cache[3+8* 7 + 2*8*i]= + nnz_cache[3+8*11 + 2*8*i]= + nnz_cache[3+8*12 + 2*8*i]= !IS_INTRA_PCM(sl->left_type[LEFT(i)]) ? 0 : 64; + } + } + if (sl->top_type && !IS_8x8DCT(sl->top_type)){ + uint32_t top_empty = !IS_INTRA_PCM(sl->top_type) ? 0 : 0x40404040; + AV_WN32A(&nnz_cache[4+8* 0], top_empty); + AV_WN32A(&nnz_cache[4+8* 5], top_empty); + AV_WN32A(&nnz_cache[4+8*10], top_empty); } - } - if (sl->top_type && !IS_8x8DCT(sl->top_type)){ - uint32_t top_empty = !IS_INTRA(mb_type) ? 0 : 0x40404040; - AV_WN32A(&nnz_cache[4+8* 0], top_empty); - AV_WN32A(&nnz_cache[4+8* 5], top_empty); - AV_WN32A(&nnz_cache[4+8*10], top_empty); } } h->cur_pic.mb_type[mb_xy] = mb_type; From 70946e605924e2108c39f96faa369c220177f301 Mon Sep 17 00:00:00 2001 From: Anton Mitrofanov Date: Thu, 15 Jun 2017 18:56:16 -0400 Subject: [PATCH 3/3] h264dec: Fix mix of lossless and lossy MBs decoding CC: libav-stable@libav.org Signed-off-by: Anton Khirnov --- libavcodec/h264_cabac.c | 16 ++++++++-------- libavcodec/h264_cavlc.c | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/libavcodec/h264_cabac.c b/libavcodec/h264_cabac.c index 5dd285c3ea..c0b9e30e61 100644 --- a/libavcodec/h264_cabac.c +++ b/libavcodec/h264_cabac.c @@ -2371,14 +2371,6 @@ decode_intra_mb: const uint8_t *scan, *scan8x8; const uint32_t *qmul; - if(IS_INTERLACED(mb_type)){ - scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; - scan = sl->qscale ? h->field_scan : h->field_scan_q0; - }else{ - scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; - scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; - } - // decode_cabac_mb_dqp if(get_cabac_noinline( &sl->cabac, &sl->cabac_state[60 + (sl->last_qscale_diff != 0)])){ int val = 1; @@ -2409,6 +2401,14 @@ decode_intra_mb: }else sl->last_qscale_diff=0; + if(IS_INTERLACED(mb_type)){ + scan8x8 = sl->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; + scan = sl->qscale ? h->field_scan : h->field_scan_q0; + }else{ + scan8x8 = sl->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0; + scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; + } + decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, cbp, 0); if (CHROMA444(h)) { decode_cabac_luma_residual(h, sl, scan, scan8x8, pixel_shift, mb_type, cbp, 1); diff --git a/libavcodec/h264_cavlc.c b/libavcodec/h264_cavlc.c index c11e211bd8..d57062bc56 100644 --- a/libavcodec/h264_cavlc.c +++ b/libavcodec/h264_cavlc.c @@ -1093,14 +1093,6 @@ decode_intra_mb: const uint8_t *scan, *scan8x8; const int max_qp = 51 + 6 * (h->ps.sps->bit_depth_luma - 8); - if(IS_INTERLACED(mb_type)){ - scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; - scan = sl->qscale ? h->field_scan : h->field_scan_q0; - }else{ - scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; - scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; - } - dquant= get_se_golomb(&sl->gb); sl->qscale += dquant; @@ -1117,6 +1109,14 @@ decode_intra_mb: sl->chroma_qp[0] = get_chroma_qp(h->ps.pps, 0, sl->qscale); sl->chroma_qp[1] = get_chroma_qp(h->ps.pps, 1, sl->qscale); + if(IS_INTERLACED(mb_type)){ + scan8x8 = sl->qscale ? h->field_scan8x8_cavlc : h->field_scan8x8_cavlc_q0; + scan = sl->qscale ? h->field_scan : h->field_scan_q0; + }else{ + scan8x8 = sl->qscale ? h->zigzag_scan8x8_cavlc : h->zigzag_scan8x8_cavlc_q0; + scan = sl->qscale ? h->zigzag_scan : h->zigzag_scan_q0; + } + if ((ret = decode_luma_residual(h, sl, gb, scan, scan8x8, pixel_shift, mb_type, cbp, 0)) < 0 ) { return -1; }