Signed-off-by: James Almer <jamrial@gmail.com>tags/n3.3
| @@ -2115,6 +2115,7 @@ CONFIG_EXTRA=" | |||
| libx262 | |||
| llauddsp | |||
| llviddsp | |||
| llvidencdsp | |||
| lpc | |||
| lzf | |||
| me_cmp | |||
| @@ -2366,7 +2367,7 @@ amv_decoder_select="sp5x_decoder exif" | |||
| amv_encoder_select="aandcttables jpegtables mpegvideoenc" | |||
| ape_decoder_select="bswapdsp llauddsp" | |||
| apng_decoder_select="zlib" | |||
| apng_encoder_select="huffyuvencdsp zlib" | |||
| apng_encoder_select="llvidencdsp zlib" | |||
| asv1_decoder_select="blockdsp bswapdsp idctdsp" | |||
| asv1_encoder_select="bswapdsp fdctdsp pixblockdsp" | |||
| asv2_decoder_select="blockdsp bswapdsp idctdsp" | |||
| @@ -2430,7 +2431,7 @@ hap_encoder_deps="libsnappy" | |||
| hap_encoder_select="texturedspenc" | |||
| hevc_decoder_select="bswapdsp cabac golomb videodsp" | |||
| huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp" | |||
| huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp" | |||
| huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp" | |||
| iac_decoder_select="imc_decoder" | |||
| imc_decoder_select="bswapdsp fft mdct sinewin" | |||
| indeo3_decoder_select="hpeldsp" | |||
| @@ -2491,7 +2492,7 @@ on2avc_decoder_select="mdct" | |||
| opus_decoder_deps="swresample" | |||
| opus_decoder_select="imdct15" | |||
| png_decoder_select="zlib" | |||
| png_encoder_select="huffyuvencdsp zlib" | |||
| png_encoder_select="llvidencdsp zlib" | |||
| prores_decoder_select="blockdsp idctdsp" | |||
| prores_encoder_select="fdctdsp" | |||
| qcelp_decoder_select="lsp" | |||
| @@ -2534,7 +2535,7 @@ tscc_decoder_select="zlib" | |||
| twinvq_decoder_select="mdct lsp sinewin" | |||
| txd_decoder_select="texturedsp" | |||
| utvideo_decoder_select="bswapdsp llviddsp" | |||
| utvideo_encoder_select="bswapdsp huffman huffyuvencdsp" | |||
| utvideo_encoder_select="bswapdsp huffman llvidencdsp" | |||
| vble_decoder_select="llviddsp" | |||
| vc1_decoder_select="blockdsp h263_decoder h264qpel intrax8 mpegvideo vc1dsp" | |||
| vc1_qsv_decoder_deps="libmfx" | |||
| @@ -91,6 +91,7 @@ OBJS-$(CONFIG_JPEGTABLES) += jpegtables.o | |||
| OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | |||
| OBJS-$(CONFIG_LLAUDDSP) += lossless_audiodsp.o | |||
| OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o | |||
| OBJS-$(CONFIG_LLVIDENCDSP) += lossless_videoencdsp.o | |||
| OBJS-$(CONFIG_LPC) += lpc.o | |||
| OBJS-$(CONFIG_LSP) += lsp.o | |||
| OBJS-$(CONFIG_LZF) += lzf.o | |||
| @@ -38,6 +38,7 @@ | |||
| #include "huffyuvencdsp.h" | |||
| #include "put_bits.h" | |||
| #include "lossless_videodsp.h" | |||
| #include "lossless_videoencdsp.h" | |||
| #define VLC_BITS 12 | |||
| @@ -89,6 +90,7 @@ typedef struct HYuvContext { | |||
| HuffYUVDSPContext hdsp; | |||
| HuffYUVEncDSPContext hencdsp; | |||
| LLVidDSPContext llviddsp; | |||
| LLVidEncDSPContext llvidencdsp; | |||
| int non_determ; // non-deterministic, multi-threaded encoder allowed | |||
| } HYuvContext; | |||
| @@ -33,6 +33,7 @@ | |||
| #include "huffman.h" | |||
| #include "huffyuvencdsp.h" | |||
| #include "internal.h" | |||
| #include "lossless_videoencdsp.h" | |||
| #include "put_bits.h" | |||
| #include "libavutil/opt.h" | |||
| #include "libavutil/pixdesc.h" | |||
| @@ -41,7 +42,7 @@ static inline void diff_bytes(HYuvContext *s, uint8_t *dst, | |||
| const uint8_t *src0, const uint8_t *src1, int w) | |||
| { | |||
| if (s->bps <= 8) { | |||
| s->hencdsp.diff_bytes(dst, src0, src1, w); | |||
| s->llvidencdsp.diff_bytes(dst, src0, src1, w); | |||
| } else { | |||
| s->hencdsp.diff_int16((uint16_t *)dst, (const uint16_t *)src0, (const uint16_t *)src1, s->n - 1, w); | |||
| } | |||
| @@ -65,7 +66,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, | |||
| dst[i] = temp - left; | |||
| left = temp; | |||
| } | |||
| s->hencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | |||
| s->llvidencdsp.diff_bytes(dst + 32, src + 32, src + 31, w - 32); | |||
| return src[w-1]; | |||
| } | |||
| } else { | |||
| @@ -117,7 +118,7 @@ static inline void sub_left_prediction_bgr32(HYuvContext *s, uint8_t *dst, | |||
| a = at; | |||
| } | |||
| s->hencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | |||
| s->llvidencdsp.diff_bytes(dst + 16, src + 16, src + 12, w * 4 - 16); | |||
| *red = src[(w - 1) * 4 + R]; | |||
| *green = src[(w - 1) * 4 + G]; | |||
| @@ -146,7 +147,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | |||
| b = bt; | |||
| } | |||
| s->hencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | |||
| s->llvidencdsp.diff_bytes(dst + 48, src + 48, src + 48 - 3, w * 3 - 48); | |||
| *red = src[(w - 1) * 3 + 0]; | |||
| *green = src[(w - 1) * 3 + 1]; | |||
| @@ -156,7 +157,7 @@ static inline void sub_left_prediction_rgb24(HYuvContext *s, uint8_t *dst, | |||
| static void sub_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top) | |||
| { | |||
| if (s->bps <= 8) { | |||
| s->hencdsp.sub_hfyu_median_pred(dst, src1, src2, w , left, left_top); | |||
| s->llvidencdsp.sub_median_pred(dst, src1, src2, w , left, left_top); | |||
| } else { | |||
| s->hencdsp.sub_hfyu_median_pred_int16((uint16_t *)dst, (const uint16_t *)src1, (const uint16_t *)src2, s->n - 1, w , left, left_top); | |||
| } | |||
| @@ -218,6 +219,7 @@ static av_cold int encode_init(AVCodecContext *avctx) | |||
| ff_huffyuv_common_init(avctx); | |||
| ff_huffyuvencdsp_init(&s->hencdsp, avctx); | |||
| ff_llvidencdsp_init(&s->llvidencdsp); | |||
| avctx->extradata = av_mallocz(3*MAX_N + 4); | |||
| if (s->flags&AV_CODEC_FLAG_PASS1) { | |||
| @@ -823,9 +825,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| lefttopy = p->data[0][3]; | |||
| lefttopu = p->data[1][1]; | |||
| lefttopv = p->data[2][1]; | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | |||
| s->llvidencdsp.sub_median_pred(s->temp[0], p->data[0] + 4, p->data[0] + fake_ystride + 4, width - 4, &lefty, &lefttopy); | |||
| s->llvidencdsp.sub_median_pred(s->temp[1], p->data[1] + 2, p->data[1] + fake_ustride + 2, width2 - 2, &leftu, &lefttopu); | |||
| s->llvidencdsp.sub_median_pred(s->temp[2], p->data[2] + 2, p->data[2] + fake_vstride + 2, width2 - 2, &leftv, &lefttopv); | |||
| encode_422_bitstream(s, 0, width - 4); | |||
| y++; cy++; | |||
| @@ -835,7 +837,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| if (s->bitstream_bpp == 12) { | |||
| while (2 * cy > y) { | |||
| ydst = p->data[0] + p->linesize[0] * y; | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
| s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
| encode_gray_bitstream(s, width); | |||
| y++; | |||
| } | |||
| @@ -845,9 +847,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| udst = p->data[1] + p->linesize[1] * cy; | |||
| vdst = p->data[2] + p->linesize[2] * cy; | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
| s->hencdsp.sub_hfyu_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
| s->llvidencdsp.sub_median_pred(s->temp[0], ydst - fake_ystride, ydst, width, &lefty, &lefttopy); | |||
| s->llvidencdsp.sub_median_pred(s->temp[1], udst - fake_ustride, udst, width2, &leftu, &lefttopu); | |||
| s->llvidencdsp.sub_median_pred(s->temp[2], vdst - fake_vstride, vdst, width2, &leftv, &lefttopv); | |||
| encode_422_bitstream(s, 0, width); | |||
| } | |||
| @@ -860,7 +862,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| ydst = p->data[0] + p->linesize[0] * y; | |||
| if (s->predictor == PLANE && s->interlaced < y) { | |||
| s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
| s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
| lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | |||
| } else { | |||
| @@ -876,9 +878,9 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| vdst = p->data[2] + p->linesize[2] * cy; | |||
| if (s->predictor == PLANE && s->interlaced < cy) { | |||
| s->hencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
| s->hencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); | |||
| s->hencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | |||
| s->llvidencdsp.diff_bytes(s->temp[1], ydst, ydst - fake_ystride, width); | |||
| s->llvidencdsp.diff_bytes(s->temp[2], udst, udst - fake_ustride, width2); | |||
| s->llvidencdsp.diff_bytes(s->temp[2] + width2, vdst, vdst - fake_vstride, width2); | |||
| lefty = sub_left_prediction(s, s->temp[0], s->temp[1], width , lefty); | |||
| leftu = sub_left_prediction(s, s->temp[1], s->temp[2], width2, leftu); | |||
| @@ -911,7 +913,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| for (y = 1; y < s->height; y++) { | |||
| uint8_t *dst = data + y*stride; | |||
| if (s->predictor == PLANE && s->interlaced < y) { | |||
| s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | |||
| s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, width * 4); | |||
| sub_left_prediction_bgr32(s, s->temp[0], s->temp[1], width, | |||
| &leftr, &leftg, &leftb, &lefta); | |||
| } else { | |||
| @@ -939,7 +941,7 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| for (y = 1; y < s->height; y++) { | |||
| uint8_t *dst = data + y * stride; | |||
| if (s->predictor == PLANE && s->interlaced < y) { | |||
| s->hencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | |||
| s->llvidencdsp.diff_bytes(s->temp[1], dst, dst - fake_stride, | |||
| width * 3); | |||
| sub_left_prediction_rgb24(s, s->temp[0], s->temp[1], width, | |||
| &leftr, &leftg, &leftb); | |||
| @@ -21,38 +21,6 @@ | |||
| #include "huffyuvencdsp.h" | |||
| #include "mathops.h" | |||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
| #define pb_7f (~0UL / 255 * 0x7f) | |||
| #define pb_80 (~0UL / 255 * 0x80) | |||
| static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | |||
| { | |||
| long i; | |||
| #if !HAVE_FAST_UNALIGNED | |||
| if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | |||
| for (i = 0; i + 7 < w; i += 8) { | |||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
| dst[i + 1] = src1[i + 1] - src2[i + 1]; | |||
| dst[i + 2] = src1[i + 2] - src2[i + 2]; | |||
| dst[i + 3] = src1[i + 3] - src2[i + 3]; | |||
| dst[i + 4] = src1[i + 4] - src2[i + 4]; | |||
| dst[i + 5] = src1[i + 5] - src2[i + 5]; | |||
| dst[i + 6] = src1[i + 6] - src2[i + 6]; | |||
| dst[i + 7] = src1[i + 7] - src2[i + 7]; | |||
| } | |||
| } else | |||
| #endif | |||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
| long a = *(long *) (src1 + i); | |||
| long b = *(long *) (src2 + i); | |||
| *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | |||
| ((a ^ b ^ pb_80) & pb_80); | |||
| } | |||
| for (; i < w; i++) | |||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
| } | |||
| static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w){ | |||
| long i; | |||
| #if !HAVE_FAST_UNALIGNED | |||
| @@ -79,27 +47,6 @@ static void diff_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *sr | |||
| dst[i] = (src1[i] - src2[i]) & mask; | |||
| } | |||
| static void sub_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top) | |||
| { | |||
| int i; | |||
| uint8_t l, lt; | |||
| l = *left; | |||
| lt = *left_top; | |||
| for (i = 0; i < w; i++) { | |||
| const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | |||
| lt = src1[i]; | |||
| l = src2[i]; | |||
| dst[i] = l - pred; | |||
| } | |||
| *left = l; | |||
| *left_top = lt; | |||
| } | |||
| static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w, int *left, int *left_top){ | |||
| int i; | |||
| uint16_t l, lt; | |||
| @@ -120,9 +67,7 @@ static void sub_hfyu_median_pred_int16_c(uint16_t *dst, const uint16_t *src1, co | |||
| av_cold void ff_huffyuvencdsp_init(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| c->diff_bytes = diff_bytes_c; | |||
| c->diff_int16 = diff_int16_c; | |||
| c->sub_hfyu_median_pred = sub_hfyu_median_pred_c; | |||
| c->sub_hfyu_median_pred_int16 = sub_hfyu_median_pred_int16_c; | |||
| if (ARCH_X86) | |||
| @@ -24,22 +24,11 @@ | |||
| #include "avcodec.h" | |||
| typedef struct HuffYUVEncDSPContext { | |||
| void (*diff_bytes)(uint8_t *dst /* align 16 */, | |||
| const uint8_t *src1 /* align 16 */, | |||
| const uint8_t *src2 /* align 1 */, | |||
| intptr_t w); | |||
| void (*diff_int16)(uint16_t *dst /* align 16 */, | |||
| const uint16_t *src1 /* align 16 */, | |||
| const uint16_t *src2 /* align 1 */, | |||
| unsigned mask, int w); | |||
| /** | |||
| * Subtract HuffYUV's variant of median prediction. | |||
| * Note, this might read from src1[-1], src2[-1]. | |||
| */ | |||
| void (*sub_hfyu_median_pred)(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top); | |||
| void (*sub_hfyu_median_pred_int16)(uint16_t *dst, const uint16_t *src1, | |||
| const uint16_t *src2, unsigned mask, | |||
| int w, int *left, int *left_top); | |||
| @@ -0,0 +1,84 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "lossless_videoencdsp.h" | |||
| #include "mathops.h" | |||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
| #define pb_7f (~0UL / 255 * 0x7f) | |||
| #define pb_80 (~0UL / 255 * 0x80) | |||
| static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, intptr_t w) | |||
| { | |||
| long i; | |||
| #if !HAVE_FAST_UNALIGNED | |||
| if (((long)src1 | (long)src2) & (sizeof(long) - 1)) { | |||
| for (i = 0; i + 7 < w; i += 8) { | |||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
| dst[i + 1] = src1[i + 1] - src2[i + 1]; | |||
| dst[i + 2] = src1[i + 2] - src2[i + 2]; | |||
| dst[i + 3] = src1[i + 3] - src2[i + 3]; | |||
| dst[i + 4] = src1[i + 4] - src2[i + 4]; | |||
| dst[i + 5] = src1[i + 5] - src2[i + 5]; | |||
| dst[i + 6] = src1[i + 6] - src2[i + 6]; | |||
| dst[i + 7] = src1[i + 7] - src2[i + 7]; | |||
| } | |||
| } else | |||
| #endif | |||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
| long a = *(long *) (src1 + i); | |||
| long b = *(long *) (src2 + i); | |||
| *(long *) (dst + i) = ((a | pb_80) - (b & pb_7f)) ^ | |||
| ((a ^ b ^ pb_80) & pb_80); | |||
| } | |||
| for (; i < w; i++) | |||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
| } | |||
| static void sub_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top) | |||
| { | |||
| int i; | |||
| uint8_t l, lt; | |||
| l = *left; | |||
| lt = *left_top; | |||
| for (i = 0; i < w; i++) { | |||
| const int pred = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF); | |||
| lt = src1[i]; | |||
| l = src2[i]; | |||
| dst[i] = l - pred; | |||
| } | |||
| *left = l; | |||
| *left_top = lt; | |||
| } | |||
| av_cold void ff_llvidencdsp_init(LLVidEncDSPContext *c) | |||
| { | |||
| c->diff_bytes = diff_bytes_c; | |||
| c->sub_median_pred = sub_median_pred_c; | |||
| if (ARCH_X86) | |||
| ff_llvidencdsp_init_x86(c); | |||
| } | |||
| @@ -0,0 +1,41 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_LOSSLESS_VIDEOENCDSP_H | |||
| #define AVCODEC_LOSSLESS_VIDEOENCDSP_H | |||
| #include <stdint.h> | |||
| typedef struct LLVidEncDSPContext { | |||
| void (*diff_bytes)(uint8_t *dst /* align 16 */, | |||
| const uint8_t *src1 /* align 16 */, | |||
| const uint8_t *src2 /* align 1 */, | |||
| intptr_t w); | |||
| /** | |||
| * Subtract HuffYUV's variant of median prediction. | |||
| * Note, this might read from src1[-1], src2[-1]. | |||
| */ | |||
| void (*sub_median_pred)(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top); | |||
| } LLVidEncDSPContext; | |||
| void ff_llvidencdsp_init(LLVidEncDSPContext *c); | |||
| void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c); | |||
| #endif /* AVCODEC_LOSSLESS_VIDEOENCDSP_H */ | |||
| @@ -22,7 +22,7 @@ | |||
| #include "avcodec.h" | |||
| #include "internal.h" | |||
| #include "bytestream.h" | |||
| #include "huffyuvencdsp.h" | |||
| #include "lossless_videoencdsp.h" | |||
| #include "png.h" | |||
| #include "apng.h" | |||
| @@ -47,7 +47,7 @@ typedef struct APNGFctlChunk { | |||
| typedef struct PNGEncContext { | |||
| AVClass *class; | |||
| HuffYUVEncDSPContext hdsp; | |||
| LLVidEncDSPContext llvidencdsp; | |||
| uint8_t *bytestream; | |||
| uint8_t *bytestream_start; | |||
| @@ -159,7 +159,7 @@ static void sub_left_prediction(PNGEncContext *c, uint8_t *dst, const uint8_t *s | |||
| for (x = 0; x < unaligned_w; x++) | |||
| *dst++ = *src1++ - *src2++; | |||
| size -= unaligned_w; | |||
| c->hdsp.diff_bytes(dst, src1, src2, size); | |||
| c->llvidencdsp.diff_bytes(dst, src1, src2, size); | |||
| } | |||
| static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | |||
| @@ -175,7 +175,7 @@ static void png_filter_row(PNGEncContext *c, uint8_t *dst, int filter_type, | |||
| sub_left_prediction(c, dst, src, bpp, size); | |||
| break; | |||
| case PNG_FILTER_VALUE_UP: | |||
| c->hdsp.diff_bytes(dst, src, top, size); | |||
| c->llvidencdsp.diff_bytes(dst, src, top, size); | |||
| break; | |||
| case PNG_FILTER_VALUE_AVG: | |||
| for (i = 0; i < bpp; i++) | |||
| @@ -1015,7 +1015,7 @@ FF_DISABLE_DEPRECATION_WARNINGS | |||
| FF_ENABLE_DEPRECATION_WARNINGS | |||
| #endif | |||
| ff_huffyuvencdsp_init(&s->hdsp, avctx); | |||
| ff_llvidencdsp_init(&s->llvidencdsp); | |||
| #if FF_API_PRIVATE_OPT | |||
| FF_DISABLE_DEPRECATION_WARNINGS | |||
| @@ -30,8 +30,8 @@ | |||
| #include "libavutil/common.h" | |||
| #include "avcodec.h" | |||
| #include "bswapdsp.h" | |||
| #include "huffyuvencdsp.h" | |||
| #include "lossless_videodsp.h" | |||
| #include "lossless_videoencdsp.h" | |||
| enum { | |||
| PRED_NONE = 0, | |||
| @@ -70,8 +70,8 @@ typedef struct UtvideoContext { | |||
| const AVClass *class; | |||
| AVCodecContext *avctx; | |||
| BswapDSPContext bdsp; | |||
| HuffYUVEncDSPContext hdsp; | |||
| LLVidDSPContext llviddsp; | |||
| LLVidEncDSPContext llvidencdsp; | |||
| uint32_t frame_info_size, flags, frame_info; | |||
| int planes; | |||
| @@ -33,7 +33,6 @@ | |||
| #include "bswapdsp.h" | |||
| #include "bytestream.h" | |||
| #include "put_bits.h" | |||
| #include "huffyuvencdsp.h" | |||
| #include "mathops.h" | |||
| #include "utvideo.h" | |||
| #include "huffman.h" | |||
| @@ -120,7 +119,7 @@ static av_cold int utvideo_encode_init(AVCodecContext *avctx) | |||
| } | |||
| ff_bswapdsp_init(&c->bdsp); | |||
| ff_huffyuvencdsp_init(&c->hdsp, avctx); | |||
| ff_llvidencdsp_init(&c->llvidencdsp); | |||
| #if FF_API_PRIVATE_OPT | |||
| FF_DISABLE_DEPRECATION_WARNINGS | |||
| @@ -324,7 +323,7 @@ static void median_predict(UtvideoContext *c, uint8_t *src, uint8_t *dst, int st | |||
| /* Rest of the coded part uses median prediction */ | |||
| for (j = 1; j < height; j++) { | |||
| c->hdsp.sub_hfyu_median_pred(dst, src - stride, src, width, &A, &B); | |||
| c->llvidencdsp.sub_median_pred(dst, src - stride, src, width, &A, &B); | |||
| dst += width; | |||
| src += stride; | |||
| } | |||
| @@ -20,8 +20,9 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | |||
| OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | |||
| OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp_init.o | |||
| OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o | |||
| OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp_init.o | |||
| OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | |||
| OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_mmx.o | |||
| OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp_init.o | |||
| OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_init.o | |||
| OBJS-$(CONFIG_LPC) += x86/lpc.o | |||
| OBJS-$(CONFIG_ME_CMP) += x86/me_cmp_init.o | |||
| @@ -114,6 +115,7 @@ YASM-OBJS-$(CONFIG_HUFFYUVENCDSP) += x86/huffyuvencdsp.o | |||
| YASM-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp.o | |||
| YASM-OBJS-$(CONFIG_LLAUDDSP) += x86/lossless_audiodsp.o | |||
| YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o | |||
| YASM-OBJS-$(CONFIG_LLVIDENCDSP) += x86/lossless_videoencdsp.o | |||
| YASM-OBJS-$(CONFIG_ME_CMP) += x86/me_cmp.o | |||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | |||
| YASM-OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoencdsp.o | |||
| @@ -27,128 +27,8 @@ | |||
| section .text | |||
| ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| ; intptr_t w); | |||
| %macro DIFF_BYTES_PROLOGUE 0 | |||
| %if ARCH_X86_32 | |||
| cglobal diff_bytes, 3,5,2, dst, src1, src2 | |||
| %define wq r4q | |||
| DECLARE_REG_TMP 3 | |||
| mov wq, r3mp | |||
| %else | |||
| cglobal diff_bytes, 4,5,2, dst, src1, src2, w | |||
| DECLARE_REG_TMP 4 | |||
| %endif ; ARCH_X86_32 | |||
| %define i t0q | |||
| %endmacro | |||
| ; label to jump to if w < regsize | |||
| %macro DIFF_BYTES_LOOP_PREP 1 | |||
| mov i, wq | |||
| and i, -2 * regsize | |||
| jz %1 | |||
| add dstq, i | |||
| add src1q, i | |||
| add src2q, i | |||
| neg i | |||
| %endmacro | |||
| ; mov type used for src1q, dstq, first reg, second reg | |||
| %macro DIFF_BYTES_LOOP_CORE 4 | |||
| %if mmsize != 16 | |||
| mov%1 %3, [src1q + i] | |||
| mov%1 %4, [src1q + i + regsize] | |||
| psubb %3, [src2q + i] | |||
| psubb %4, [src2q + i + regsize] | |||
| mov%2 [dstq + i], %3 | |||
| mov%2 [regsize + dstq + i], %4 | |||
| %else | |||
| ; SSE enforces alignment of psubb operand | |||
| mov%1 %3, [src1q + i] | |||
| movu %4, [src2q + i] | |||
| psubb %3, %4 | |||
| mov%2 [dstq + i], %3 | |||
| mov%1 %3, [src1q + i + regsize] | |||
| movu %4, [src2q + i + regsize] | |||
| psubb %3, %4 | |||
| mov%2 [regsize + dstq + i], %3 | |||
| %endif | |||
| %endmacro | |||
| %macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | |||
| %define regsize mmsize | |||
| .loop_%1%2: | |||
| DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | |||
| add i, 2 * regsize | |||
| jl .loop_%1%2 | |||
| .skip_main_%1%2: | |||
| and wq, 2 * regsize - 1 | |||
| jz .end_%1%2 | |||
| %if mmsize > 16 | |||
| ; fall back to narrower xmm | |||
| %define regsize mmsize / 2 | |||
| DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | |||
| .loop2_%1%2: | |||
| DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | |||
| add i, 2 * regsize | |||
| jl .loop2_%1%2 | |||
| .setup_loop_gpr_%1%2: | |||
| and wq, 2 * regsize - 1 | |||
| jz .end_%1%2 | |||
| %endif | |||
| add dstq, wq | |||
| add src1q, wq | |||
| add src2q, wq | |||
| neg wq | |||
| .loop_gpr_%1%2: | |||
| mov t0b, [src1q + wq] | |||
| sub t0b, [src2q + wq] | |||
| mov [dstq + wq], t0b | |||
| inc wq | |||
| jl .loop_gpr_%1%2 | |||
| .end_%1%2: | |||
| REP_RET | |||
| %endmacro | |||
| %if ARCH_X86_32 | |||
| INIT_MMX mmx | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
| DIFF_BYTES_BODY a, a | |||
| %undef i | |||
| %endif | |||
| INIT_XMM sse2 | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
| test dstq, regsize - 1 | |||
| jnz .loop_uu | |||
| test src1q, regsize - 1 | |||
| jnz .loop_ua | |||
| DIFF_BYTES_BODY a, a | |||
| DIFF_BYTES_BODY u, a | |||
| DIFF_BYTES_BODY u, u | |||
| %undef i | |||
| %if HAVE_AVX2_EXTERNAL | |||
| INIT_YMM avx2 | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| ; Directly using unaligned SSE2 version is marginally faster than | |||
| ; branching based on arguments. | |||
| DIFF_BYTES_LOOP_PREP .skip_main_uu | |||
| test dstq, regsize - 1 | |||
| jnz .loop_uu | |||
| test src1q, regsize - 1 | |||
| jnz .loop_ua | |||
| DIFF_BYTES_BODY a, a | |||
| DIFF_BYTES_BODY u, a | |||
| DIFF_BYTES_BODY u, u | |||
| %undef i | |||
| %endif | |||
| ; void ff_diff_int16(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| ; unsigned mask, int w); | |||
| %macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub | |||
| movd m4, maskd | |||
| SPLATW m4, m4 | |||
| @@ -0,0 +1,54 @@ | |||
| /* | |||
| * SIMD-optimized HuffYUV encoding functions | |||
| * Copyright (c) 2000, 2001 Fabrice Bellard | |||
| * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/huffyuvencdsp.h" | |||
| void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w); | |||
| void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w); | |||
| void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w, int *left, int *left_top); | |||
| av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
| { | |||
| av_unused int cpu_flags = av_get_cpu_flags(); | |||
| const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | |||
| if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | |||
| c->diff_int16 = ff_diff_int16_mmx; | |||
| } | |||
| if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | |||
| c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| c->diff_int16 = ff_diff_int16_sse2; | |||
| } | |||
| } | |||
| @@ -0,0 +1,150 @@ | |||
| ;************************************************************************ | |||
| ;* SIMD-optimized lossless video encoding functions | |||
| ;* Copyright (c) 2000, 2001 Fabrice Bellard | |||
| ;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| ;* | |||
| ;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||
| ;* Conversion to NASM format by Tiancheng "Timothy" Gu <timothygu99@gmail.com> | |||
| ;* | |||
| ;* This file is part of FFmpeg. | |||
| ;* | |||
| ;* FFmpeg is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with FFmpeg; if not, write to the Free Software | |||
| ;* 51, Inc., Foundation Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;****************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| section .text | |||
| ; void ff_diff_bytes(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| ; intptr_t w); | |||
| %macro DIFF_BYTES_PROLOGUE 0 | |||
| %if ARCH_X86_32 | |||
| cglobal diff_bytes, 3,5,2, dst, src1, src2 | |||
| %define wq r4q | |||
| DECLARE_REG_TMP 3 | |||
| mov wq, r3mp | |||
| %else | |||
| cglobal diff_bytes, 4,5,2, dst, src1, src2, w | |||
| DECLARE_REG_TMP 4 | |||
| %endif ; ARCH_X86_32 | |||
| %define i t0q | |||
| %endmacro | |||
| ; label to jump to if w < regsize | |||
| %macro DIFF_BYTES_LOOP_PREP 1 | |||
| mov i, wq | |||
| and i, -2 * regsize | |||
| jz %1 | |||
| add dstq, i | |||
| add src1q, i | |||
| add src2q, i | |||
| neg i | |||
| %endmacro | |||
| ; mov type used for src1q, dstq, first reg, second reg | |||
| %macro DIFF_BYTES_LOOP_CORE 4 | |||
| %if mmsize != 16 | |||
| mov%1 %3, [src1q + i] | |||
| mov%1 %4, [src1q + i + regsize] | |||
| psubb %3, [src2q + i] | |||
| psubb %4, [src2q + i + regsize] | |||
| mov%2 [dstq + i], %3 | |||
| mov%2 [regsize + dstq + i], %4 | |||
| %else | |||
| ; SSE enforces alignment of psubb operand | |||
| mov%1 %3, [src1q + i] | |||
| movu %4, [src2q + i] | |||
| psubb %3, %4 | |||
| mov%2 [dstq + i], %3 | |||
| mov%1 %3, [src1q + i + regsize] | |||
| movu %4, [src2q + i + regsize] | |||
| psubb %3, %4 | |||
| mov%2 [regsize + dstq + i], %3 | |||
| %endif | |||
| %endmacro | |||
| %macro DIFF_BYTES_BODY 2 ; mov type used for src1q, for dstq | |||
| %define regsize mmsize | |||
| .loop_%1%2: | |||
| DIFF_BYTES_LOOP_CORE %1, %2, m0, m1 | |||
| add i, 2 * regsize | |||
| jl .loop_%1%2 | |||
| .skip_main_%1%2: | |||
| and wq, 2 * regsize - 1 | |||
| jz .end_%1%2 | |||
| %if mmsize > 16 | |||
| ; fall back to narrower xmm | |||
| %define regsize mmsize / 2 | |||
| DIFF_BYTES_LOOP_PREP .setup_loop_gpr_aa | |||
| .loop2_%1%2: | |||
| DIFF_BYTES_LOOP_CORE %1, %2, xm0, xm1 | |||
| add i, 2 * regsize | |||
| jl .loop2_%1%2 | |||
| .setup_loop_gpr_%1%2: | |||
| and wq, 2 * regsize - 1 | |||
| jz .end_%1%2 | |||
| %endif | |||
| add dstq, wq | |||
| add src1q, wq | |||
| add src2q, wq | |||
| neg wq | |||
| .loop_gpr_%1%2: | |||
| mov t0b, [src1q + wq] | |||
| sub t0b, [src2q + wq] | |||
| mov [dstq + wq], t0b | |||
| inc wq | |||
| jl .loop_gpr_%1%2 | |||
| .end_%1%2: | |||
| REP_RET | |||
| %endmacro | |||
| %if ARCH_X86_32 | |||
| INIT_MMX mmx | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
| DIFF_BYTES_BODY a, a | |||
| %undef i | |||
| %endif | |||
| INIT_XMM sse2 | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| DIFF_BYTES_LOOP_PREP .skip_main_aa | |||
| test dstq, regsize - 1 | |||
| jnz .loop_uu | |||
| test src1q, regsize - 1 | |||
| jnz .loop_ua | |||
| DIFF_BYTES_BODY a, a | |||
| DIFF_BYTES_BODY u, a | |||
| DIFF_BYTES_BODY u, u | |||
| %undef i | |||
| %if HAVE_AVX2_EXTERNAL | |||
| INIT_YMM avx2 | |||
| DIFF_BYTES_PROLOGUE | |||
| %define regsize mmsize | |||
| ; Directly using unaligned SSE2 version is marginally faster than | |||
| ; branching based on arguments. | |||
| DIFF_BYTES_LOOP_PREP .skip_main_uu | |||
| test dstq, regsize - 1 | |||
| jnz .loop_uu | |||
| test src1q, regsize - 1 | |||
| jnz .loop_ua | |||
| DIFF_BYTES_BODY a, a | |||
| DIFF_BYTES_BODY u, a | |||
| DIFF_BYTES_BODY u, u | |||
| %undef i | |||
| %endif | |||
| @@ -1,5 +1,5 @@ | |||
| /* | |||
| * SIMD-optimized HuffYUV encoding functions | |||
| * SIMD-optimized lossless video encoding functions | |||
| * Copyright (c) 2000, 2001 Fabrice Bellard | |||
| * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| @@ -24,10 +24,9 @@ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/pixdesc.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/huffyuvencdsp.h" | |||
| #include "libavcodec/lossless_videoencdsp.h" | |||
| #include "libavcodec/mathops.h" | |||
| void ff_diff_bytes_mmx(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| @@ -36,18 +35,12 @@ void ff_diff_bytes_sse2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| intptr_t w); | |||
| void ff_diff_bytes_avx2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| intptr_t w); | |||
| void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w); | |||
| void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w); | |||
| void ff_sub_hfyu_median_pred_int16_mmxext(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, | |||
| unsigned mask, int w, int *left, int *left_top); | |||
| #if HAVE_INLINE_ASM | |||
| static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top) | |||
| static void sub_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, intptr_t w, | |||
| int *left, int *left_top) | |||
| { | |||
| x86_reg i = 0; | |||
| uint8_t l, lt; | |||
| @@ -87,29 +80,22 @@ static void sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| av_cold void ff_huffyuvencdsp_init_x86(HuffYUVEncDSPContext *c, AVCodecContext *avctx) | |||
| av_cold void ff_llvidencdsp_init_x86(LLVidEncDSPContext *c) | |||
| { | |||
| av_unused int cpu_flags = av_get_cpu_flags(); | |||
| const AVPixFmtDescriptor *pix_desc = av_pix_fmt_desc_get(avctx->pix_fmt); | |||
| if (ARCH_X86_32 && EXTERNAL_MMX(cpu_flags)) { | |||
| c->diff_bytes = ff_diff_bytes_mmx; | |||
| c->diff_int16 = ff_diff_int16_mmx; | |||
| } | |||
| #if HAVE_INLINE_ASM | |||
| if (INLINE_MMXEXT(cpu_flags)) { | |||
| c->sub_hfyu_median_pred = sub_hfyu_median_pred_mmxext; | |||
| c->sub_median_pred = sub_median_pred_mmxext; | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| if (EXTERNAL_MMXEXT(cpu_flags) && pix_desc && pix_desc->comp[0].depth<16) { | |||
| c->sub_hfyu_median_pred_int16 = ff_sub_hfyu_median_pred_int16_mmxext; | |||
| } | |||
| if (EXTERNAL_SSE2(cpu_flags)) { | |||
| c->diff_bytes = ff_diff_bytes_sse2; | |||
| c->diff_int16 = ff_diff_int16_sse2; | |||
| } | |||
| if (EXTERNAL_AVX2_FAST(cpu_flags)) { | |||