High bitdepth H.264 needs 32-bit transform coefficients, whereas dnxhd does not. This creates a conflict with the templated functions operating on DCTELEM data. This patch adds a field allowing the caller to choose the element size in dsputil_init() and adds the required functions. Signed-off-by: Mans Rullgard <mans@mansr.com>tags/n0.9
| @@ -3159,13 +3159,13 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) | c->PFX ## _pixels_tab[IDX][15] = FUNCC(PFX ## NUM ## _mc33, depth) | ||||
| #define BIT_DEPTH_FUNCS(depth)\ | |||||
| #define BIT_DEPTH_FUNCS(depth, dct)\ | |||||
| c->draw_edges = FUNCC(draw_edges , depth);\ | c->draw_edges = FUNCC(draw_edges , depth);\ | ||||
| c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ | c->emulated_edge_mc = FUNC (ff_emulated_edge_mc , depth);\ | ||||
| c->clear_block = FUNCC(clear_block , depth);\ | |||||
| c->clear_blocks = FUNCC(clear_blocks , depth);\ | |||||
| c->add_pixels8 = FUNCC(add_pixels8 , depth);\ | |||||
| c->add_pixels4 = FUNCC(add_pixels4 , depth);\ | |||||
| c->clear_block = FUNCC(clear_block ## dct , depth);\ | |||||
| c->clear_blocks = FUNCC(clear_blocks ## dct , depth);\ | |||||
| c->add_pixels8 = FUNCC(add_pixels8 ## dct , depth);\ | |||||
| c->add_pixels4 = FUNCC(add_pixels4 ## dct , depth);\ | |||||
| c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ | c->put_no_rnd_pixels_l2[0] = FUNCC(put_no_rnd_pixels16_l2, depth);\ | ||||
| c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ | c->put_no_rnd_pixels_l2[1] = FUNCC(put_no_rnd_pixels8_l2 , depth);\ | ||||
| \ | \ | ||||
| @@ -3199,15 +3199,23 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
| switch (avctx->bits_per_raw_sample) { | switch (avctx->bits_per_raw_sample) { | ||||
| case 9: | case 9: | ||||
| BIT_DEPTH_FUNCS(9); | |||||
| if (c->dct_bits == 32) { | |||||
| BIT_DEPTH_FUNCS(9, _32); | |||||
| } else { | |||||
| BIT_DEPTH_FUNCS(9, _16); | |||||
| } | |||||
| break; | break; | ||||
| case 10: | case 10: | ||||
| BIT_DEPTH_FUNCS(10); | |||||
| if (c->dct_bits == 32) { | |||||
| BIT_DEPTH_FUNCS(10, _32); | |||||
| } else { | |||||
| BIT_DEPTH_FUNCS(10, _16); | |||||
| } | |||||
| break; | break; | ||||
| default: | default: | ||||
| av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); | av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", avctx->bits_per_raw_sample); | ||||
| case 8: | case 8: | ||||
| BIT_DEPTH_FUNCS(8); | |||||
| BIT_DEPTH_FUNCS(8, _16); | |||||
| break; | break; | ||||
| } | } | ||||
| @@ -219,6 +219,11 @@ void ff_put_signed_pixels_clamped_c(const DCTELEM *block, uint8_t *dest, int lin | |||||
| * DSPContext. | * DSPContext. | ||||
| */ | */ | ||||
| typedef struct DSPContext { | typedef struct DSPContext { | ||||
| /** | |||||
| * Size of DCT coefficients. | |||||
| */ | |||||
| int dct_bits; | |||||
| /* pixel ops : interface with DCT */ | /* pixel ops : interface with DCT */ | ||||
| void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | void (*get_pixels)(DCTELEM *block/*align 16*/, const uint8_t *pixels/*align 8*/, int line_size); | ||||
| void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | void (*diff_pixels)(DCTELEM *block/*align 16*/, const uint8_t *s1/*align 8*/, const uint8_t *s2/*align 8*/, int stride); | ||||
| @@ -192,43 +192,66 @@ void FUNC(ff_emulated_edge_mc)(uint8_t *buf, const uint8_t *src, int linesize, i | |||||
| } | } | ||||
| } | } | ||||
| static void FUNCC(add_pixels8)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) | |||||
| { | |||||
| int i; | |||||
| pixel *restrict pixels = (pixel *restrict)_pixels; | |||||
| dctcoef *block = (dctcoef*)_block; | |||||
| line_size /= sizeof(pixel); | |||||
| for(i=0;i<8;i++) { | |||||
| pixels[0] += block[0]; | |||||
| pixels[1] += block[1]; | |||||
| pixels[2] += block[2]; | |||||
| pixels[3] += block[3]; | |||||
| pixels[4] += block[4]; | |||||
| pixels[5] += block[5]; | |||||
| pixels[6] += block[6]; | |||||
| pixels[7] += block[7]; | |||||
| pixels += line_size; | |||||
| block += 8; | |||||
| } | |||||
| #define DCTELEM_FUNCS(dctcoef, suffix) \ | |||||
| static void FUNCC(add_pixels8 ## suffix)(uint8_t *restrict _pixels, \ | |||||
| DCTELEM *_block, \ | |||||
| int line_size) \ | |||||
| { \ | |||||
| int i; \ | |||||
| pixel *restrict pixels = (pixel *restrict)_pixels; \ | |||||
| dctcoef *block = (dctcoef*)_block; \ | |||||
| line_size /= sizeof(pixel); \ | |||||
| \ | |||||
| for(i=0;i<8;i++) { \ | |||||
| pixels[0] += block[0]; \ | |||||
| pixels[1] += block[1]; \ | |||||
| pixels[2] += block[2]; \ | |||||
| pixels[3] += block[3]; \ | |||||
| pixels[4] += block[4]; \ | |||||
| pixels[5] += block[5]; \ | |||||
| pixels[6] += block[6]; \ | |||||
| pixels[7] += block[7]; \ | |||||
| pixels += line_size; \ | |||||
| block += 8; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static void FUNCC(add_pixels4 ## suffix)(uint8_t *restrict _pixels, \ | |||||
| DCTELEM *_block, \ | |||||
| int line_size) \ | |||||
| { \ | |||||
| int i; \ | |||||
| pixel *restrict pixels = (pixel *restrict)_pixels; \ | |||||
| dctcoef *block = (dctcoef*)_block; \ | |||||
| line_size /= sizeof(pixel); \ | |||||
| \ | |||||
| for(i=0;i<4;i++) { \ | |||||
| pixels[0] += block[0]; \ | |||||
| pixels[1] += block[1]; \ | |||||
| pixels[2] += block[2]; \ | |||||
| pixels[3] += block[3]; \ | |||||
| pixels += line_size; \ | |||||
| block += 4; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static void FUNCC(clear_block ## suffix)(DCTELEM *block) \ | |||||
| { \ | |||||
| memset(block, 0, sizeof(dctcoef)*64); \ | |||||
| } \ | |||||
| \ | |||||
| /** \ | |||||
| * memset(blocks, 0, sizeof(DCTELEM)*6*64) \ | |||||
| */ \ | |||||
| static void FUNCC(clear_blocks ## suffix)(DCTELEM *blocks) \ | |||||
| { \ | |||||
| memset(blocks, 0, sizeof(dctcoef)*6*64); \ | |||||
| } | } | ||||
| static void FUNCC(add_pixels4)(uint8_t *restrict _pixels, DCTELEM *_block, int line_size) | |||||
| { | |||||
| int i; | |||||
| pixel *restrict pixels = (pixel *restrict)_pixels; | |||||
| dctcoef *block = (dctcoef*)_block; | |||||
| line_size /= sizeof(pixel); | |||||
| for(i=0;i<4;i++) { | |||||
| pixels[0] += block[0]; | |||||
| pixels[1] += block[1]; | |||||
| pixels[2] += block[2]; | |||||
| pixels[3] += block[3]; | |||||
| pixels += line_size; | |||||
| block += 4; | |||||
| } | |||||
| } | |||||
| DCTELEM_FUNCS(DCTELEM, _16) | |||||
| #if BIT_DEPTH > 8 | |||||
| DCTELEM_FUNCS(dctcoef, _32) | |||||
| #endif | |||||
| #define PIXOP2(OPNAME, OP) \ | #define PIXOP2(OPNAME, OP) \ | ||||
| static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | static void FUNCC(OPNAME ## _pixels2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | ||||
| @@ -1231,16 +1254,3 @@ void FUNCC(ff_put_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | |||||
| void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | void FUNCC(ff_avg_pixels16x16)(uint8_t *dst, uint8_t *src, int stride) { | ||||
| FUNCC(avg_pixels16)(dst, src, stride, 16); | FUNCC(avg_pixels16)(dst, src, stride, 16); | ||||
| } | } | ||||
| static void FUNCC(clear_block)(DCTELEM *block) | |||||
| { | |||||
| memset(block, 0, sizeof(dctcoef)*64); | |||||
| } | |||||
| /** | |||||
| * memset(blocks, 0, sizeof(DCTELEM)*6*64) | |||||
| */ | |||||
| static void FUNCC(clear_blocks)(DCTELEM *blocks) | |||||
| { | |||||
| memset(blocks, 0, sizeof(dctcoef)*6*64); | |||||
| } | |||||
| @@ -3702,6 +3702,7 @@ static int decode_nal_units(H264Context *h, const uint8_t *buf, int buf_size){ | |||||
| ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); | ff_h264dsp_init(&h->h264dsp, h->sps.bit_depth_luma); | ||||
| ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); | ff_h264_pred_init(&h->hpc, s->codec_id, h->sps.bit_depth_luma); | ||||
| s->dsp.dct_bits = h->sps.bit_depth_luma > 8 ? 32 : 16; | |||||
| dsputil_init(&s->dsp, s->avctx); | dsputil_init(&s->dsp, s->avctx); | ||||
| } else { | } else { | ||||
| av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); | av_log(avctx, AV_LOG_DEBUG, "Unsupported bit depth: %d\n", h->sps.bit_depth_luma); | ||||