| @@ -1537,6 +1537,7 @@ CONFIG_EXTRA=" | |||||
| rtpdec | rtpdec | ||||
| rtpenc_chain | rtpenc_chain | ||||
| sinewin | sinewin | ||||
| tpeldsp | |||||
| videodsp | videodsp | ||||
| vp3dsp | vp3dsp | ||||
| " | " | ||||
| @@ -1820,7 +1821,7 @@ sipr_decoder_select="lsp" | |||||
| sp5x_decoder_select="mjpeg_decoder" | sp5x_decoder_select="mjpeg_decoder" | ||||
| svq1_decoder_select="hpeldsp" | svq1_decoder_select="hpeldsp" | ||||
| svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" | svq1_encoder_select="aandcttables dsputil hpeldsp mpegvideoenc" | ||||
| svq3_decoder_select="h264_decoder hpeldsp" | |||||
| svq3_decoder_select="h264_decoder hpeldsp tpeldsp" | |||||
| svq3_decoder_suggest="zlib" | svq3_decoder_suggest="zlib" | ||||
| tak_decoder_select="dsputil" | tak_decoder_select="dsputil" | ||||
| theora_decoder_select="vp3_decoder" | theora_decoder_select="vp3_decoder" | ||||
| @@ -79,9 +79,6 @@ qpel{8,16}_mc??_old_c / *pixels{8,16}_l4 | |||||
| Just used to work around a bug in an old libavcodec encoder version. | Just used to work around a bug in an old libavcodec encoder version. | ||||
| Don't optimize them. | Don't optimize them. | ||||
| tpel_mc_func {put,avg}_tpel_pixels_tab | |||||
| Used only for SVQ3, so only optimize them if you need fast SVQ3 decoding. | |||||
| add_bytes/diff_bytes | add_bytes/diff_bytes | ||||
| For huffyuv only, optimize if you want a faster ffhuffyuv codec. | For huffyuv only, optimize if you want a faster ffhuffyuv codec. | ||||
| @@ -65,6 +65,7 @@ OBJS-$(CONFIG_RANGECODER) += rangecoder.o | |||||
| RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | ||||
| OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) | OBJS-$(CONFIG_RDFT) += rdft.o $(RDFT-OBJS-yes) | ||||
| OBJS-$(CONFIG_SINEWIN) += sinewin.o | OBJS-$(CONFIG_SINEWIN) += sinewin.o | ||||
| OBJS-$(CONFIG_TPELDSP) += tpeldsp.o | |||||
| OBJS-$(CONFIG_VAAPI) += vaapi.o | OBJS-$(CONFIG_VAAPI) += vaapi.o | ||||
| OBJS-$(CONFIG_VDPAU) += vdpau.o | OBJS-$(CONFIG_VDPAU) += vdpau.o | ||||
| OBJS-$(CONFIG_VIDEODSP) += videodsp.o | OBJS-$(CONFIG_VIDEODSP) += videodsp.o | ||||
| @@ -48,6 +48,7 @@ uint32_t ff_square_tab[512] = { 0, }; | |||||
| #undef BIT_DEPTH | #undef BIT_DEPTH | ||||
| #define BIT_DEPTH 8 | #define BIT_DEPTH 8 | ||||
| #include "tpel_template.c" | |||||
| #include "dsputil_template.c" | #include "dsputil_template.c" | ||||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | ||||
| @@ -540,284 +541,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||||
| } | } | ||||
| } | } | ||||
| static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| switch (width) { | |||||
| case 2: | |||||
| put_pixels2_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 4: | |||||
| put_pixels4_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 8: | |||||
| put_pixels8_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 16: | |||||
| put_pixels16_8_c(dst, src, stride, height); | |||||
| break; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + src[j + 1] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((src[j] + 2 * src[j + 1] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + src[j + stride] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((4 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((3 * src[j] + 2 * src[j + 1] + | |||||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((src[j] + 2 * src[j + stride] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((3 * src[j] + 4 * src[j + 1] + | |||||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| switch (width) { | |||||
| case 2: | |||||
| avg_pixels2_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 4: | |||||
| avg_pixels4_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 8: | |||||
| avg_pixels8_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 16: | |||||
| avg_pixels16_8_c(dst, src, stride, height); | |||||
| break; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + src[j + 1] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((src[j] + 2 * src[j + 1] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + src[j + stride] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((4 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((3 * src[j] + 2 * src[j + 1] + | |||||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((src[j] + 2 * src[j + stride] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((3 * src[j] + 4 * src[j + 1] + | |||||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| #define QPEL_MC(r, OPNAME, RND, OP) \ | #define QPEL_MC(r, OPNAME, RND, OP) \ | ||||
| static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ | static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, \ | ||||
| int dstStride, int srcStride, \ | int dstStride, int srcStride, \ | ||||
| @@ -2781,26 +2504,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| c->pix_abs[1][2] = pix_abs8_y2_c; | c->pix_abs[1][2] = pix_abs8_y2_c; | ||||
| c->pix_abs[1][3] = pix_abs8_xy2_c; | c->pix_abs[1][3] = pix_abs8_xy2_c; | ||||
| c->put_tpel_pixels_tab[0] = put_tpel_pixels_mc00_c; | |||||
| c->put_tpel_pixels_tab[1] = put_tpel_pixels_mc10_c; | |||||
| c->put_tpel_pixels_tab[2] = put_tpel_pixels_mc20_c; | |||||
| c->put_tpel_pixels_tab[4] = put_tpel_pixels_mc01_c; | |||||
| c->put_tpel_pixels_tab[5] = put_tpel_pixels_mc11_c; | |||||
| c->put_tpel_pixels_tab[6] = put_tpel_pixels_mc21_c; | |||||
| c->put_tpel_pixels_tab[8] = put_tpel_pixels_mc02_c; | |||||
| c->put_tpel_pixels_tab[9] = put_tpel_pixels_mc12_c; | |||||
| c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | |||||
| c->avg_tpel_pixels_tab[0] = avg_tpel_pixels_mc00_c; | |||||
| c->avg_tpel_pixels_tab[1] = avg_tpel_pixels_mc10_c; | |||||
| c->avg_tpel_pixels_tab[2] = avg_tpel_pixels_mc20_c; | |||||
| c->avg_tpel_pixels_tab[4] = avg_tpel_pixels_mc01_c; | |||||
| c->avg_tpel_pixels_tab[5] = avg_tpel_pixels_mc11_c; | |||||
| c->avg_tpel_pixels_tab[6] = avg_tpel_pixels_mc21_c; | |||||
| c->avg_tpel_pixels_tab[8] = avg_tpel_pixels_mc02_c; | |||||
| c->avg_tpel_pixels_tab[9] = avg_tpel_pixels_mc12_c; | |||||
| c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |||||
| #define dspfunc(PFX, IDX, NUM) \ | #define dspfunc(PFX, IDX, NUM) \ | ||||
| c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ | c->PFX ## _pixels_tab[IDX][0] = PFX ## NUM ## _mc00_c; \ | ||||
| c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ | c->PFX ## _pixels_tab[IDX][1] = PFX ## NUM ## _mc10_c; \ | ||||
| @@ -71,9 +71,6 @@ void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, | |||||
| * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | * Block sizes for op_pixels_func are 8x4,8x8 16x8 16x16. | ||||
| * h for op_pixels_func is limited to { width / 2, width }, | * h for op_pixels_func is limited to { width / 2, width }, | ||||
| * but never larger than 16 and never smaller than 4. */ | * but never larger than 16 and never smaller than 4. */ | ||||
| typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, | |||||
| const uint8_t *pixels /* align 1 */, | |||||
| int line_size, int w, int h); | |||||
| typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, | typedef void (*qpel_mc_func)(uint8_t *dst /* align width (8 or 16) */, | ||||
| uint8_t *src /* align 1 */, ptrdiff_t stride); | uint8_t *src /* align 1 */, ptrdiff_t stride); | ||||
| @@ -188,19 +185,6 @@ typedef struct DSPContext { | |||||
| int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | int (*ssd_int8_vs_int16)(const int8_t *pix1, const int16_t *pix2, | ||||
| int size); | int size); | ||||
| /** | |||||
| * Thirdpel motion compensation with rounding (a + b + 1) >> 1. | |||||
| * this is an array[12] of motion compensation functions for the | |||||
| * 9 thirdpel positions<br> | |||||
| * *pixels_tab[xthirdpel + 4 * ythirdpel] | |||||
| * @param block destination where the result is stored | |||||
| * @param pixels source | |||||
| * @param line_size number of bytes in a horizontal line of block | |||||
| * @param h height | |||||
| */ | |||||
| tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||||
| tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||||
| qpel_mc_func put_qpel_pixels_tab[2][16]; | qpel_mc_func put_qpel_pixels_tab[2][16]; | ||||
| qpel_mc_func avg_qpel_pixels_tab[2][16]; | qpel_mc_func avg_qpel_pixels_tab[2][16]; | ||||
| qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | ||||
| @@ -24,6 +24,7 @@ | |||||
| #include "bit_depth_template.c" | #include "bit_depth_template.c" | ||||
| #include "hpel_template.c" | #include "hpel_template.c" | ||||
| #include "tpel_template.c" | |||||
| static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | static inline void FUNC(copy_block2)(uint8_t *dst, const uint8_t *src, int dstStride, int srcStride, int h) | ||||
| { | { | ||||
| @@ -22,47 +22,6 @@ | |||||
| #include "pixels.h" | #include "pixels.h" | ||||
| #define DEF_HPEL(OPNAME, OP) \ | #define DEF_HPEL(OPNAME, OP) \ | ||||
| static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel2 *) block), AV_RN2P(pixels)); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||||
| OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ | |||||
| AV_RN4P(pixels + 4 * sizeof(pixel))); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, \ | static inline void FUNC(OPNAME ## _pixels8_l2)(uint8_t *dst, \ | ||||
| const uint8_t *src1, \ | const uint8_t *src1, \ | ||||
| const uint8_t *src2, \ | const uint8_t *src2, \ | ||||
| @@ -134,10 +93,6 @@ static inline void FUNC(OPNAME ## _pixels16_l2)(uint8_t *dst, \ | |||||
| dst_stride, src_stride1, \ | dst_stride, src_stride1, \ | ||||
| src_stride2, h); \ | src_stride2, h); \ | ||||
| } \ | } \ | ||||
| \ | |||||
| CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ | |||||
| FUNCC(OPNAME ## _pixels8), \ | |||||
| 8 * sizeof(pixel)) | |||||
| #define op_avg(a, b) a = rnd_avg_pixel4(a, b) | #define op_avg(a, b) a = rnd_avg_pixel4(a, b) | ||||
| #define op_put(a, b) a = b | #define op_put(a, b) a = b | ||||
| @@ -33,6 +33,7 @@ | |||||
| #include "bit_depth_template.c" | #include "bit_depth_template.c" | ||||
| #include "hpel_template.c" | #include "hpel_template.c" | ||||
| #include "tpel_template.c" | |||||
| #define PIXOP2(OPNAME, OP) \ | #define PIXOP2(OPNAME, OP) \ | ||||
| static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, \ | static inline void FUNC(OPNAME ## _no_rnd_pixels8_l2)(uint8_t *dst, \ | ||||
| @@ -54,6 +54,7 @@ | |||||
| #include "golomb.h" | #include "golomb.h" | ||||
| #include "hpeldsp.h" | #include "hpeldsp.h" | ||||
| #include "rectangle.h" | #include "rectangle.h" | ||||
| #include "tpeldsp.h" | |||||
| #if CONFIG_ZLIB | #if CONFIG_ZLIB | ||||
| #include <zlib.h> | #include <zlib.h> | ||||
| @@ -70,6 +71,7 @@ | |||||
| typedef struct { | typedef struct { | ||||
| H264Context h; | H264Context h; | ||||
| HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
| TpelDSPContext tdsp; | |||||
| H264Picture *cur_pic; | H264Picture *cur_pic; | ||||
| H264Picture *next_pic; | H264Picture *next_pic; | ||||
| H264Picture *last_pic; | H264Picture *last_pic; | ||||
| @@ -321,9 +323,9 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, | |||||
| src = h->edge_emu_buffer; | src = h->edge_emu_buffer; | ||||
| } | } | ||||
| if (thirdpel) | if (thirdpel) | ||||
| (avg ? h->dsp.avg_tpel_pixels_tab | |||||
| : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, | |||||
| width, height); | |||||
| (avg ? s->tdsp.avg_tpel_pixels_tab | |||||
| : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize, | |||||
| width, height); | |||||
| else | else | ||||
| (avg ? s->hdsp.avg_pixels_tab | (avg ? s->hdsp.avg_pixels_tab | ||||
| : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize, | : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize, | ||||
| @@ -349,10 +351,10 @@ static inline void svq3_mc_dir_part(SVQ3Context *s, | |||||
| src = h->edge_emu_buffer; | src = h->edge_emu_buffer; | ||||
| } | } | ||||
| if (thirdpel) | if (thirdpel) | ||||
| (avg ? h->dsp.avg_tpel_pixels_tab | |||||
| : h->dsp.put_tpel_pixels_tab)[dxy](dest, src, | |||||
| h->uvlinesize, | |||||
| width, height); | |||||
| (avg ? s->tdsp.avg_tpel_pixels_tab | |||||
| : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, | |||||
| h->uvlinesize, | |||||
| width, height); | |||||
| else | else | ||||
| (avg ? s->hdsp.avg_pixels_tab | (avg ? s->hdsp.avg_pixels_tab | ||||
| : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, | : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, | ||||
| @@ -881,6 +883,8 @@ static av_cold int svq3_decode_init(AVCodecContext *avctx) | |||||
| return -1; | return -1; | ||||
| ff_hpeldsp_init(&s->hdsp, avctx->flags); | ff_hpeldsp_init(&s->hdsp, avctx->flags); | ||||
| ff_tpeldsp_init(&s->tdsp); | |||||
| h->flags = avctx->flags; | h->flags = avctx->flags; | ||||
| h->is_complex = 1; | h->is_complex = 1; | ||||
| h->picture_structure = PICT_FRAME; | h->picture_structure = PICT_FRAME; | ||||
| @@ -0,0 +1,80 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stddef.h> | |||||
| #include <stdint.h> | |||||
| #include "libavutil/intreadwrite.h" | |||||
| #include "pixels.h" | |||||
| #include "rnd_avg.h" | |||||
| #include "bit_depth_template.c" | |||||
| #define DEF_TPEL(OPNAME, OP) \ | |||||
| static inline void FUNCC(OPNAME ## _pixels2)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel2 *) block), AV_RN2P(pixels)); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static inline void FUNCC(OPNAME ## _pixels4)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| static inline void FUNCC(OPNAME ## _pixels8)(uint8_t *block, \ | |||||
| const uint8_t *pixels, \ | |||||
| ptrdiff_t line_size, \ | |||||
| int h) \ | |||||
| { \ | |||||
| int i; \ | |||||
| for (i = 0; i < h; i++) { \ | |||||
| OP(*((pixel4 *) block), AV_RN4P(pixels)); \ | |||||
| OP(*((pixel4 *) (block + 4 * sizeof(pixel))), \ | |||||
| AV_RN4P(pixels + 4 * sizeof(pixel))); \ | |||||
| pixels += line_size; \ | |||||
| block += line_size; \ | |||||
| } \ | |||||
| } \ | |||||
| \ | |||||
| CALL_2X_PIXELS(FUNCC(OPNAME ## _pixels16), \ | |||||
| FUNCC(OPNAME ## _pixels8), \ | |||||
| 8 * sizeof(pixel)) | |||||
| #define op_avg(a, b) a = rnd_avg_pixel4(a, b) | |||||
| #define op_put(a, b) a = b | |||||
| DEF_TPEL(avg, op_avg) | |||||
| DEF_TPEL(put, op_put) | |||||
| #undef op_avg | |||||
| #undef op_put | |||||
| @@ -0,0 +1,333 @@ | |||||
| /* | |||||
| * thirdpel DSP functions | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| /** | |||||
| * @file | |||||
| * thirdpel DSP functions | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "libavutil/attributes.h" | |||||
| #include "tpeldsp.h" | |||||
| #define BIT_DEPTH 8 | |||||
| #include "tpel_template.c" | |||||
| static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| switch (width) { | |||||
| case 2: | |||||
| put_pixels2_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 4: | |||||
| put_pixels4_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 8: | |||||
| put_pixels8_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 16: | |||||
| put_pixels16_8_c(dst, src, stride, height); | |||||
| break; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + src[j + 1] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((src[j] + 2 * src[j + 1] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + src[j + stride] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((4 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((3 * src[j] + 2 * src[j + 1] + | |||||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((src[j] + 2 * src[j + stride] + 1) * | |||||
| 683) >> 11; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((3 * src[j] + 4 * src[j + 1] + | |||||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = ((2 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| switch (width) { | |||||
| case 2: | |||||
| avg_pixels2_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 4: | |||||
| avg_pixels4_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 8: | |||||
| avg_pixels8_8_c(dst, src, stride, height); | |||||
| break; | |||||
| case 16: | |||||
| avg_pixels16_8_c(dst, src, stride, height); | |||||
| break; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + src[j + 1] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((src[j] + 2 * src[j + 1] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + src[j + stride] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((4 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 2 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((3 * src[j] + 2 * src[j + 1] + | |||||
| 4 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((src[j] + 2 * src[j + stride] + 1) * | |||||
| 683) >> 11) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((3 * src[j] + 4 * src[j + 1] + | |||||
| 2 * src[j + stride] + 3 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, | |||||
| int stride, int width, int height) | |||||
| { | |||||
| int i, j; | |||||
| for (i = 0; i < height; i++) { | |||||
| for (j = 0; j < width; j++) | |||||
| dst[j] = (dst[j] + | |||||
| (((2 * src[j] + 3 * src[j + 1] + | |||||
| 3 * src[j + stride] + 4 * src[j + stride + 1] + 6) * | |||||
| 2731) >> 15) + 1) >> 1; | |||||
| src += stride; | |||||
| dst += stride; | |||||
| } | |||||
| } | |||||
| av_cold void ff_tpeldsp_init(TpelDSPContext *c) | |||||
| { | |||||
| c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c; | |||||
| c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c; | |||||
| c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c; | |||||
| c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c; | |||||
| c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c; | |||||
| c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c; | |||||
| c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c; | |||||
| c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c; | |||||
| c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c; | |||||
| c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c; | |||||
| c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c; | |||||
| c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c; | |||||
| c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c; | |||||
| c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c; | |||||
| c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c; | |||||
| c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c; | |||||
| c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c; | |||||
| c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c; | |||||
| } | |||||
| @@ -0,0 +1,59 @@ | |||||
| /* | |||||
| * thirdpel DSP functions | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| /** | |||||
| * @file | |||||
| * thirdpel DSP functions | |||||
| */ | |||||
| #ifndef AVCODEC_TPELDSP_H | |||||
| #define AVCODEC_TPELDSP_H | |||||
| #include <stdint.h> | |||||
| /* add and put pixel (decoding) */ | |||||
| // blocksizes for hpel_pixels_func are 8x4,8x8 16x8 16x16 | |||||
| // h for hpel_pixels_func is limited to {width/2, width} but never larger | |||||
| // than 16 and never smaller than 4 | |||||
| typedef void (*tpel_mc_func)(uint8_t *block /* align width (8 or 16) */, | |||||
| const uint8_t *pixels /* align 1 */, | |||||
| int line_size, int w, int h); | |||||
| /** | |||||
| * thirdpel DSP context | |||||
| */ | |||||
| typedef struct TpelDSPContext { | |||||
| /** | |||||
| * Thirdpel motion compensation with rounding (a + b + 1) >> 1. | |||||
| * this is an array[12] of motion compensation functions for the | |||||
| * 9 thirdpel positions<br> | |||||
| * *pixels_tab[xthirdpel + 4 * ythirdpel] | |||||
| * @param block destination where the result is stored | |||||
| * @param pixels source | |||||
| * @param line_size number of bytes in a horizontal line of block | |||||
| * @param h height | |||||
| */ | |||||
| tpel_mc_func put_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||||
| tpel_mc_func avg_tpel_pixels_tab[11]; // FIXME individual func ptr per width? | |||||
| } TpelDSPContext; | |||||
| void ff_tpeldsp_init(TpelDSPContext *c); | |||||
| #endif /* AVCODEC_TPELDSP_H */ | |||||