| @@ -20,13 +20,14 @@ | |||
| */ | |||
| /** | |||
| * @file libavcodec/dirac_arith.c | |||
| * @file | |||
| * Arithmetic decoder for Dirac | |||
| * @author Marco Gerards <marco@gnu.org> | |||
| */ | |||
| #include "dirac_arith.h" | |||
| const uint16_t ff_dirac_prob[256] = { | |||
| 0, 2, 5, 8, 11, 15, 20, 24, | |||
| 29, 35, 41, 47, 53, 60, 67, 74, | |||
| @@ -20,7 +20,7 @@ | |||
| */ | |||
| /** | |||
| * @file libavcodec/dirac_arith.h | |||
| * @file | |||
| * Arithmetic decoder for Dirac | |||
| * @author Marco Gerards <marco@gnu.org> | |||
| */ | |||
| @@ -37,8 +37,6 @@ | |||
| #include "dirac.h" | |||
| #include "diracdsp.h" | |||
| #undef printf | |||
| /** | |||
| * The spec limits the number of wavelet decompositions to 4 for both | |||
| * level 1 (VC-2) and 128 (long-gop default). | |||
| @@ -74,17 +72,16 @@ | |||
| #define ff_emulated_edge_mc ff_emulated_edge_mc_8 /* Fix: change the calls to this function regarding bit depth */ | |||
| #define CALC_PADDING(size, depth) \ | |||
| (((size + (1 << depth) - 1) >> depth) << depth) | |||
| #define CALC_PADDING(size, depth) \ | |||
| (((size + (1 << depth) - 1) >> depth) << depth) | |||
| #define DIVRNDUP(a, b) (((a) + (b) - 1) / (b)) | |||
| typedef struct { | |||
| AVFrame avframe; | |||
| AVFrame avframe; | |||
| int interpolated[3]; /* 1 if hpel[] is valid */ | |||
| uint8_t *hpel[3][4]; | |||
| uint8_t *hpel_base[3][4]; | |||
| uint8_t *hpel[3][4]; | |||
| uint8_t *hpel_base[3][4]; | |||
| } DiracFrame; | |||
| typedef struct { | |||
| @@ -248,35 +245,35 @@ static const uint8_t default_qmat[][4][4] = { | |||
| }; | |||
| static const int qscale_tab[MAX_QUANT+1] = { | |||
| 4, 5, 6, 7, 8, 10, 11, 13, | |||
| 16, 19, 23, 27, 32, 38, 45, 54, | |||
| 64, 76, 91, 108, 128, 152, 181, 215, | |||
| 256, 304, 362, 431, 512, 609, 724, 861, | |||
| 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444, | |||
| 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777, | |||
| 4, 5, 6, 7, 8, 10, 11, 13, | |||
| 16, 19, 23, 27, 32, 38, 45, 54, | |||
| 64, 76, 91, 108, 128, 152, 181, 215, | |||
| 256, 304, 362, 431, 512, 609, 724, 861, | |||
| 1024, 1218, 1448, 1722, 2048, 2435, 2896, 3444, | |||
| 4096, 4871, 5793, 6889, 8192, 9742, 11585, 13777, | |||
| 16384, 19484, 23170, 27554, 32768, 38968, 46341, 55109, | |||
| 65536, 77936 | |||
| }; | |||
| static const int qoffset_intra_tab[MAX_QUANT+1] = { | |||
| 1, 2, 3, 4, 4, 5, 6, 7, | |||
| 8, 10, 12, 14, 16, 19, 23, 27, | |||
| 32, 38, 46, 54, 64, 76, 91, 108, | |||
| 128, 152, 181, 216, 256, 305, 362, 431, | |||
| 512, 609, 724, 861, 1024, 1218, 1448, 1722, | |||
| 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889, | |||
| 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555, | |||
| 1, 2, 3, 4, 4, 5, 6, 7, | |||
| 8, 10, 12, 14, 16, 19, 23, 27, | |||
| 32, 38, 46, 54, 64, 76, 91, 108, | |||
| 128, 152, 181, 216, 256, 305, 362, 431, | |||
| 512, 609, 724, 861, 1024, 1218, 1448, 1722, | |||
| 2048, 2436, 2897, 3445, 4096, 4871, 5793, 6889, | |||
| 8192, 9742, 11585, 13777, 16384, 19484, 23171, 27555, | |||
| 32768, 38968 | |||
| }; | |||
| static const int qoffset_inter_tab[MAX_QUANT+1] = { | |||
| 1, 2, 2, 3, 3, 4, 4, 5, | |||
| 6, 7, 9, 10, 12, 14, 17, 20, | |||
| 24, 29, 34, 41, 48, 57, 68, 81, | |||
| 96, 114, 136, 162, 192, 228, 272, 323, | |||
| 384, 457, 543, 646, 768, 913, 1086, 1292, | |||
| 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166, | |||
| 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666, | |||
| 1, 2, 2, 3, 3, 4, 4, 5, | |||
| 6, 7, 9, 10, 12, 14, 17, 20, | |||
| 24, 29, 34, 41, 48, 57, 68, 81, | |||
| 96, 114, 136, 162, 192, 228, 272, 323, | |||
| 384, 457, 543, 646, 768, 913, 1086, 1292, | |||
| 1536, 1827, 2172, 2583, 3072, 3653, 4344, 5166, | |||
| 6144, 7307, 8689, 10333, 12288, 14613, 17378, 20666, | |||
| 24576, 29226 | |||
| }; | |||
| @@ -20,13 +20,13 @@ | |||
| #include "dsputil.h" | |||
| #include "diracdsp.h" | |||
| //MMX_DISABLE #include "libavcodec/x86/diracdsp_mmx.h" | |||
| /* MMX_DISABLE #include "libavcodec/x86/diracdsp_mmx.h" */ | |||
| #define FILTER(src, stride) \ | |||
| ((21*((src)[ 0*stride] + (src)[1*stride]) \ | |||
| -7*((src)[-1*stride] + (src)[2*stride]) \ | |||
| +3*((src)[-2*stride] + (src)[3*stride]) \ | |||
| -1*((src)[-3*stride] + (src)[4*stride]) + 16) >> 5) | |||
| #define FILTER(src, stride) \ | |||
| ((21*((src)[ 0*stride] + (src)[1*stride]) \ | |||
| -7*((src)[-1*stride] + (src)[2*stride]) \ | |||
| +3*((src)[-2*stride] + (src)[3*stride]) \ | |||
| -1*((src)[-3*stride] + (src)[4*stride]) + 16) >> 5) | |||
| static void dirac_hpel_filter(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src, | |||
| int stride, int width, int height) | |||
| @@ -50,28 +50,28 @@ static void dirac_hpel_filter(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8 | |||
| } | |||
| } | |||
| #define PIXOP_BILINEAR(PFX, OP, WIDTH) \ | |||
| static void ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\ | |||
| {\ | |||
| int x;\ | |||
| const uint8_t *s0 = src[0];\ | |||
| const uint8_t *s1 = src[1];\ | |||
| const uint8_t *s2 = src[2];\ | |||
| const uint8_t *s3 = src[3];\ | |||
| const uint8_t *w = src[4];\ | |||
| \ | |||
| while (h--) {\ | |||
| for (x = 0; x < WIDTH; x++) {\ | |||
| OP(dst[x], (s0[x]*w[0] + s1[x]*w[1] + s2[x]*w[2] + s3[x]*w[3] + 8) >> 4);\ | |||
| }\ | |||
| \ | |||
| dst += stride;\ | |||
| s0 += stride;\ | |||
| s1 += stride;\ | |||
| s2 += stride;\ | |||
| s3 += stride;\ | |||
| }\ | |||
| } | |||
| #define PIXOP_BILINEAR(PFX, OP, WIDTH) \ | |||
| static void ff_ ## PFX ## _dirac_pixels ## WIDTH ## _bilinear_c(uint8_t *dst, const uint8_t *src[5], int stride, int h) \ | |||
| { \ | |||
| int x; \ | |||
| const uint8_t *s0 = src[0]; \ | |||
| const uint8_t *s1 = src[1]; \ | |||
| const uint8_t *s2 = src[2]; \ | |||
| const uint8_t *s3 = src[3]; \ | |||
| const uint8_t *w = src[4]; \ | |||
| \ | |||
| while (h--) { \ | |||
| for (x = 0; x < WIDTH; x++) { \ | |||
| OP(dst[x], (s0[x]*w[0] + s1[x]*w[1] + s2[x]*w[2] + s3[x]*w[3] + 8) >> 4); \ | |||
| } \ | |||
| \ | |||
| dst += stride; \ | |||
| s0 += stride; \ | |||
| s1 += stride; \ | |||
| s2 += stride; \ | |||
| s3 += stride; \ | |||
| } \ | |||
| } | |||
| #define OP_PUT(dst, val) (dst) = (val) | |||
| #define OP_AVG(dst, val) (dst) = (((dst) + (val) + 1)>>1) | |||
| @@ -86,50 +86,50 @@ PIXOP_BILINEAR(avg, OP_AVG, 32) | |||
| #define op_scale1(x) block[x] = av_clip_uint8( (block[x]*weight + (1<<(log2_denom-1))) >> log2_denom) | |||
| #define op_scale2(x) dst[x] = av_clip_uint8( (src[x]*weights + dst[x]*weightd + (1<<(log2_denom-1))) >> log2_denom) | |||
| #define DIRAC_WEIGHT(W) \ | |||
| static void weight_dirac_pixels ## W ## _c(uint8_t *block, int stride, int log2_denom, \ | |||
| int weight, int h) { \ | |||
| int x; \ | |||
| while (h--) { \ | |||
| for (x = 0; x < W; x++) { \ | |||
| op_scale1(x); \ | |||
| op_scale1(x+1); \ | |||
| } \ | |||
| block += stride; \ | |||
| } \ | |||
| } \ | |||
| static void biweight_dirac_pixels ## W ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, \ | |||
| int weightd, int weights, int h) { \ | |||
| int x; \ | |||
| while (h--) { \ | |||
| for (x = 0; x < W; x++) { \ | |||
| op_scale2(x); \ | |||
| op_scale2(x+1); \ | |||
| } \ | |||
| dst += stride; \ | |||
| src += stride; \ | |||
| } \ | |||
| } | |||
| #define DIRAC_WEIGHT(W) \ | |||
| static void weight_dirac_pixels ## W ## _c(uint8_t *block, int stride, int log2_denom, \ | |||
| int weight, int h) { \ | |||
| int x; \ | |||
| while (h--) { \ | |||
| for (x = 0; x < W; x++) { \ | |||
| op_scale1(x); \ | |||
| op_scale1(x+1); \ | |||
| } \ | |||
| block += stride; \ | |||
| } \ | |||
| } \ | |||
| static void biweight_dirac_pixels ## W ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, \ | |||
| int weightd, int weights, int h) { \ | |||
| int x; \ | |||
| while (h--) { \ | |||
| for (x = 0; x < W; x++) { \ | |||
| op_scale2(x); \ | |||
| op_scale2(x+1); \ | |||
| } \ | |||
| dst += stride; \ | |||
| src += stride; \ | |||
| } \ | |||
| } | |||
| DIRAC_WEIGHT(8) | |||
| DIRAC_WEIGHT(16) | |||
| DIRAC_WEIGHT(32) | |||
| #define ADD_OBMC(xblen) \ | |||
| static void add_obmc ## xblen ## _c(uint16_t *dst, const uint8_t *src, int stride, \ | |||
| const uint8_t *obmc_weight, int yblen) \ | |||
| { \ | |||
| int x; \ | |||
| while (yblen--) { \ | |||
| for (x = 0; x < xblen; x += 2) { \ | |||
| dst[x ] += src[x ] * obmc_weight[x ]; \ | |||
| dst[x+1] += src[x+1] * obmc_weight[x+1]; \ | |||
| } \ | |||
| dst += stride; \ | |||
| src += stride; \ | |||
| obmc_weight += 32; \ | |||
| } \ | |||
| } | |||
| #define ADD_OBMC(xblen) \ | |||
| static void add_obmc ## xblen ## _c(uint16_t *dst, const uint8_t *src, int stride, \ | |||
| const uint8_t *obmc_weight, int yblen) \ | |||
| { \ | |||
| int x; \ | |||
| while (yblen--) { \ | |||
| for (x = 0; x < xblen; x += 2) { \ | |||
| dst[x ] += src[x ] * obmc_weight[x ]; \ | |||
| dst[x+1] += src[x+1] * obmc_weight[x+1]; \ | |||
| } \ | |||
| dst += stride; \ | |||
| src += stride; \ | |||
| obmc_weight += 32; \ | |||
| } \ | |||
| } | |||
| ADD_OBMC(8) | |||
| ADD_OBMC(16) | |||
| @@ -167,7 +167,7 @@ static void add_rect_clamped_c(uint8_t *dst, const uint16_t *src, int stride, | |||
| } | |||
| } | |||
| #define PIXFUNC(PFX, WIDTH) \ | |||
| #define PIXFUNC(PFX, WIDTH) \ | |||
| c->PFX ## _dirac_pixels_tab[WIDTH>>4][0] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _c; \ | |||
| c->PFX ## _dirac_pixels_tab[WIDTH>>4][1] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l2_c; \ | |||
| c->PFX ## _dirac_pixels_tab[WIDTH>>4][2] = ff_ ## PFX ## _dirac_pixels ## WIDTH ## _l4_c; \ | |||
| @@ -197,5 +197,5 @@ void ff_diracdsp_init(DiracDSPContext *c) | |||
| PIXFUNC(avg, 16); | |||
| PIXFUNC(avg, 32); | |||
| //MMX_DISABLE if (HAVE_MMX) ff_diracdsp_init_mmx(c); | |||
| /* MMX_DISABLE if (HAVE_MMX) ff_diracdsp_init_mmx(c); */ | |||
| } | |||
| @@ -48,7 +48,7 @@ typedef struct { | |||
| dirac_biweight_func biweight_dirac_pixels_tab[3]; | |||
| } DiracDSPContext; | |||
| #define DECL_DIRAC_PIXOP(PFX, EXT) \ | |||
| #define DECL_DIRAC_PIXOP(PFX, EXT) \ | |||
| void ff_ ## PFX ## _dirac_pixels8_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h); \ | |||
| void ff_ ## PFX ## _dirac_pixels16_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h); \ | |||
| void ff_ ## PFX ## _dirac_pixels32_ ## EXT(uint8_t *dst, const uint8_t *src[5], int stride, int h) | |||
| @@ -26,39 +26,39 @@ void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, | |||
| void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |||
| void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); | |||
| #define HPEL_FILTER(MMSIZE, EXT) \ | |||
| void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, uint8_t *, int, int);\ | |||
| void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, uint8_t *, int);\ | |||
| \ | |||
| static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc,\ | |||
| uint8_t *src, int stride, int width, int height)\ | |||
| {\ | |||
| while( height-- )\ | |||
| {\ | |||
| ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5);\ | |||
| ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width);\ | |||
| ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width);\ | |||
| \ | |||
| dsth += stride;\ | |||
| dstv += stride;\ | |||
| dstc += stride;\ | |||
| src += stride;\ | |||
| }\ | |||
| } | |||
| #define HPEL_FILTER(MMSIZE, EXT) \ | |||
| void ff_dirac_hpel_filter_v_ ## EXT(uint8_t *, uint8_t *, int, int); \ | |||
| void ff_dirac_hpel_filter_h_ ## EXT(uint8_t *, uint8_t *, int); \ | |||
| \ | |||
| static void dirac_hpel_filter_ ## EXT(uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, \ | |||
| uint8_t *src, int stride, int width, int height) \ | |||
| { \ | |||
| while( height-- ) \ | |||
| { \ | |||
| ff_dirac_hpel_filter_v_ ## EXT(dstv-MMSIZE, src-MMSIZE, stride, width+MMSIZE+5); \ | |||
| ff_dirac_hpel_filter_h_ ## EXT(dsth, src, width); \ | |||
| ff_dirac_hpel_filter_h_ ## EXT(dstc, dstv, width); \ | |||
| \ | |||
| dsth += stride; \ | |||
| dstv += stride; \ | |||
| dstc += stride; \ | |||
| src += stride; \ | |||
| } \ | |||
| } | |||
| #if !ARCH_X86_64 | |||
| HPEL_FILTER(8, mmx) | |||
| #endif | |||
| HPEL_FILTER(16, sse2) | |||
| #define PIXFUNC(PFX, IDX, EXT) \ | |||
| c->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT; \ | |||
| #define PIXFUNC(PFX, IDX, EXT) \ | |||
| c->PFX ## _dirac_pixels_tab[0][IDX] = ff_ ## PFX ## _dirac_pixels8_ ## EXT; \ | |||
| c->PFX ## _dirac_pixels_tab[1][IDX] = ff_ ## PFX ## _dirac_pixels16_ ## EXT; \ | |||
| c->PFX ## _dirac_pixels_tab[2][IDX] = ff_ ## PFX ## _dirac_pixels32_ ## EXT | |||
| void ff_diracdsp_init_mmx(DiracDSPContext* c) | |||
| { | |||
| int mm_flags = av_get_cpu_flags();; | |||
| int mm_flags = av_get_cpu_flags();; | |||
| #if HAVE_YASM | |||
| c->add_dirac_obmc[0] = ff_add_dirac_obmc8_mmx; | |||