* commit '0d439fbede03854eac8a978cccf21a3425a3c82d': dsputil: Split off HuffYUV decoding bits into their own context Conflicts: configure libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/huffyuv.h libavcodec/huffyuvdec.c libavcodec/lagarith.c libavcodec/vble.c libavcodec/x86/Makefile libavcodec/x86/dsputil.asm libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -1805,6 +1805,7 @@ CONFIG_EXTRA=" | |||
| h264qpel | |||
| hpeldsp | |||
| huffman | |||
| huffyuvdsp | |||
| intrax8 | |||
| lgplv3 | |||
| llviddsp | |||
| @@ -2059,7 +2060,7 @@ h263p_encoder_select="h263_encoder" | |||
| h264_decoder_select="cabac golomb h264chroma h264dsp h264pred h264qpel videodsp" | |||
| h264_decoder_suggest="error_resilience" | |||
| hevc_decoder_select="cabac dsputil golomb videodsp" | |||
| huffyuv_decoder_select="dsputil llviddsp" | |||
| huffyuv_decoder_select="dsputil huffyuvdsp llviddsp" | |||
| huffyuv_encoder_select="dsputil huffman llviddsp" | |||
| iac_decoder_select="imc_decoder" | |||
| imc_decoder_select="dsputil fft mdct sinewin" | |||
| @@ -2068,7 +2069,7 @@ interplay_video_decoder_select="hpeldsp" | |||
| jpegls_decoder_select="golomb mjpeg_decoder" | |||
| jpegls_encoder_select="golomb" | |||
| jv_decoder_select="dsputil" | |||
| lagarith_decoder_select="dsputil" | |||
| lagarith_decoder_select="huffyuvdsp" | |||
| ljpeg_encoder_select="aandcttables mpegvideoenc" | |||
| loco_decoder_select="golomb" | |||
| mdec_decoder_select="dsputil error_resilience mpegvideo" | |||
| @@ -2151,7 +2152,7 @@ tscc_decoder_select="zlib" | |||
| twinvq_decoder_select="mdct lsp sinewin" | |||
| utvideo_decoder_select="dsputil" | |||
| utvideo_encoder_select="dsputil huffman" | |||
| vble_decoder_select="dsputil" | |||
| vble_decoder_select="huffyuvdsp" | |||
| vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8" | |||
| vc1image_decoder_select="vc1_decoder" | |||
| vorbis_decoder_select="mdct" | |||
| @@ -54,6 +54,7 @@ OBJS-$(CONFIG_H264PRED) += h264pred.o | |||
| OBJS-$(CONFIG_H264QPEL) += h264qpel.o | |||
| OBJS-$(CONFIG_HPELDSP) += hpeldsp.o | |||
| OBJS-$(CONFIG_HUFFMAN) += huffman.o | |||
| OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | |||
| OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | |||
| OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | |||
| OBJS-$(CONFIG_LLVIDDSP) += lossless_videodsp.o | |||
| @@ -1780,19 +1780,6 @@ void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type) | |||
| } | |||
| } | |||
| static void add_bytes_c(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| long i; | |||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
| long a = *(long *) (src + i); | |||
| long b = *(long *) (dst + i); | |||
| *(long *) (dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80); | |||
| } | |||
| for (; i < w; i++) | |||
| dst[i + 0] += src[i + 0]; | |||
| } | |||
| static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w) | |||
| { | |||
| long i; | |||
| @@ -1821,26 +1808,6 @@ static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, | |||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | |||
| } | |||
| static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top) | |||
| { | |||
| int i; | |||
| uint8_t l, lt; | |||
| l = *left; | |||
| lt = *left_top; | |||
| for (i = 0; i < w; i++) { | |||
| l = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF) + diff[i]; | |||
| lt = src1[i]; | |||
| dst[i] = l; | |||
| } | |||
| *left = l; | |||
| *left_top = lt; | |||
| } | |||
| static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, int w, | |||
| int *left, int *left_top) | |||
| @@ -1862,66 +1829,6 @@ static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | |||
| *left_top = lt; | |||
| } | |||
| static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, | |||
| int w, int acc) | |||
| { | |||
| int i; | |||
| for (i = 0; i < w - 1; i++) { | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| i++; | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| } | |||
| for (; i < w; i++) { | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| } | |||
| return acc; | |||
| } | |||
| #if HAVE_BIGENDIAN | |||
| #define B 3 | |||
| #define G 2 | |||
| #define R 1 | |||
| #define A 0 | |||
| #else | |||
| #define B 0 | |||
| #define G 1 | |||
| #define R 2 | |||
| #define A 3 | |||
| #endif | |||
| static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, | |||
| int w, int *red, int *green, | |||
| int *blue, int *alpha) | |||
| { | |||
| int i, r = *red, g = *green, b = *blue, a = *alpha; | |||
| for (i = 0; i < w; i++) { | |||
| b += src[4 * i + B]; | |||
| g += src[4 * i + G]; | |||
| r += src[4 * i + R]; | |||
| a += src[4 * i + A]; | |||
| dst[4 * i + B] = b; | |||
| dst[4 * i + G] = g; | |||
| dst[4 * i + R] = r; | |||
| dst[4 * i + A] = a; | |||
| } | |||
| *red = r; | |||
| *green = g; | |||
| *blue = b; | |||
| *alpha = a; | |||
| } | |||
| #undef B | |||
| #undef G | |||
| #undef R | |||
| #undef A | |||
| #define BUTTERFLY2(o1, o2, i1, i2) \ | |||
| o1 = (i1) + (i2); \ | |||
| o2 = (i1) - (i2); | |||
| @@ -2774,11 +2681,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; | |||
| c->add_bytes = add_bytes_c; | |||
| c->add_hfyu_median_prediction = add_hfyu_median_prediction_c; | |||
| c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; | |||
| c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | |||
| c->diff_bytes = diff_bytes_c; | |||
| c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_c; | |||
| @@ -188,9 +188,6 @@ typedef struct DSPContext { | |||
| me_cmp_func pix_abs[2][4]; | |||
| /* HuffYUV specific */ | |||
| void (*add_bytes)(uint8_t *dst /* align 16 */, | |||
| uint8_t *src /* align 16 */, | |||
| int w); | |||
| void (*diff_bytes)(uint8_t *dst /* align 16 */, | |||
| const uint8_t *src1 /* align 16 */, | |||
| const uint8_t *src2 /* align 1 */, | |||
| @@ -202,15 +199,7 @@ typedef struct DSPContext { | |||
| void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *src2, int w, | |||
| int *left, int *left_top); | |||
| void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, | |||
| int w, int *red, int *green, | |||
| int *blue, int *alpha); | |||
| /* this might write to dst[w] */ | |||
| void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | |||
| void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); | |||
| @@ -33,6 +33,7 @@ | |||
| #include "libavutil/mem.h" | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "huffyuv.h" | |||
| int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table, int n) | |||
| @@ -34,6 +34,7 @@ | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| #include "huffyuvdsp.h" | |||
| #include "put_bits.h" | |||
| #include "lossless_videodsp.h" | |||
| @@ -95,6 +96,7 @@ typedef struct HYuvContext { | |||
| uint8_t *bitstream_buffer; | |||
| unsigned int bitstream_buffer_size; | |||
| DSPContext dsp; | |||
| HuffYUVDSPContext hdsp; | |||
| LLVidDSPContext llviddsp; | |||
| } HYuvContext; | |||
| @@ -33,6 +33,7 @@ | |||
| #include "avcodec.h" | |||
| #include "get_bits.h" | |||
| #include "huffyuv.h" | |||
| #include "huffyuvdsp.h" | |||
| #include "thread.h" | |||
| #include "libavutil/pixdesc.h" | |||
| @@ -265,6 +266,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||
| { | |||
| HYuvContext *s = avctx->priv_data; | |||
| ff_huffyuvdsp_init(&s->hdsp); | |||
| memset(s->vlc, 0, 4 * sizeof(VLC)); | |||
| s->interlaced = avctx->height > 288; | |||
| @@ -746,7 +748,7 @@ static void draw_slice(HYuvContext *s, AVFrame *frame, int y) | |||
| static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int w, int acc) | |||
| { | |||
| if (s->bps <= 8) { | |||
| return s->dsp.add_hfyu_left_prediction(dst, src, w, acc); | |||
| return s->hdsp.add_hfyu_left_pred(dst, src, w, acc); | |||
| } else { | |||
| return s->llviddsp.add_hfyu_left_prediction_int16(( uint16_t *)dst, (const uint16_t *)src, s->n-1, w, acc); | |||
| } | |||
| @@ -755,7 +757,7 @@ static int left_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, int | |||
| static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| if (s->bps <= 8) { | |||
| s->dsp.add_bytes(dst, src, w); | |||
| s->hdsp.add_bytes(dst, src, w); | |||
| } else { | |||
| s->llviddsp.add_int16((uint16_t*)dst, (const uint16_t*)src, s->n - 1, w); | |||
| } | |||
| @@ -764,7 +766,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w) | |||
| static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *src, const uint8_t *diff, int w, int *left, int *left_top) | |||
| { | |||
| if (s->bps <= 8) { | |||
| s->dsp.add_hfyu_median_prediction(dst, src, diff, w, left, left_top); | |||
| s->hdsp.add_hfyu_median_pred(dst, src, diff, w, left, left_top); | |||
| } else { | |||
| s->llviddsp.add_hfyu_median_prediction_int16((uint16_t *)dst, (const uint16_t *)src, (const uint16_t *)diff, s->n-1, w, left, left_top); | |||
| } | |||
| @@ -903,10 +905,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| case LEFT: | |||
| case PLANE: | |||
| decode_422_bitstream(s, width-2); | |||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | |||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||
| } | |||
| for (cy = y = 1; y < s->height; y++, cy++) { | |||
| @@ -917,10 +919,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| ydst = p->data[0] + p->linesize[0] * y; | |||
| lefty = s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(ydst, s->temp[0], width, lefty); | |||
| if (s->predictor == PLANE) { | |||
| if (y > s->interlaced) | |||
| s->dsp.add_bytes(ydst, ydst - fake_ystride, width); | |||
| s->hdsp.add_bytes(ydst, ydst - fake_ystride, width); | |||
| } | |||
| y++; | |||
| if (y >= s->height) break; | |||
| @@ -933,17 +935,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| vdst = p->data[2] + p->linesize[2]*cy; | |||
| decode_422_bitstream(s, width); | |||
| lefty = s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(ydst, s->temp[0], width, lefty); | |||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | |||
| leftu= s->dsp.add_hfyu_left_prediction(udst, s->temp[1], width2, leftu); | |||
| leftv= s->dsp.add_hfyu_left_prediction(vdst, s->temp[2], width2, leftv); | |||
| leftu = s->hdsp.add_hfyu_left_pred(udst, s->temp[1], width2, leftu); | |||
| leftv = s->hdsp.add_hfyu_left_pred(vdst, s->temp[2], width2, leftv); | |||
| } | |||
| if (s->predictor == PLANE) { | |||
| if (cy > s->interlaced) { | |||
| s->dsp.add_bytes(ydst, ydst - fake_ystride, width); | |||
| s->hdsp.add_bytes(ydst, ydst - fake_ystride, width); | |||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | |||
| s->dsp.add_bytes(udst, udst - fake_ustride, width2); | |||
| s->dsp.add_bytes(vdst, vdst - fake_vstride, width2); | |||
| s->hdsp.add_bytes(udst, udst - fake_ustride, width2); | |||
| s->hdsp.add_bytes(vdst, vdst - fake_vstride, width2); | |||
| } | |||
| } | |||
| } | |||
| @@ -954,10 +956,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| case MEDIAN: | |||
| /* first line except first 2 pixels is left predicted */ | |||
| decode_422_bitstream(s, width - 2); | |||
| lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | |||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||
| } | |||
| cy = y = 1; | |||
| @@ -965,31 +967,31 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| /* second line is left predicted for interlaced case */ | |||
| if (s->interlaced) { | |||
| decode_422_bitstream(s, width); | |||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + p->linesize[0], s->temp[0], width, lefty); | |||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | |||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu); | |||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv); | |||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + p->linesize[2], s->temp[1], width2, leftu); | |||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + p->linesize[1], s->temp[2], width2, leftv); | |||
| } | |||
| y++; cy++; | |||
| } | |||
| /* next 4 pixels are left predicted too */ | |||
| decode_422_bitstream(s, 4); | |||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty); | |||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + fake_ystride, s->temp[0], 4, lefty); | |||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | |||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu); | |||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv); | |||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + fake_ustride, s->temp[1], 2, leftu); | |||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + fake_vstride, s->temp[2], 2, leftv); | |||
| } | |||
| /* next line except the first 4 pixels is median predicted */ | |||
| lefttopy = p->data[0][3]; | |||
| decode_422_bitstream(s, width - 4); | |||
| s->dsp.add_hfyu_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy); | |||
| s->hdsp.add_hfyu_median_pred(p->data[0] + fake_ystride + 4, p->data[0] + 4, s->temp[0], width - 4, &lefty, &lefttopy); | |||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | |||
| lefttopu = p->data[1][1]; | |||
| lefttopv = p->data[2][1]; | |||
| s->dsp.add_hfyu_median_prediction(p->data[1] + fake_ustride+2, p->data[1] + 2, s->temp[1], width2 - 2, &leftu, &lefttopu); | |||
| s->dsp.add_hfyu_median_prediction(p->data[2] + fake_vstride+2, p->data[2] + 2, s->temp[2], width2 - 2, &leftv, &lefttopv); | |||
| s->hdsp.add_hfyu_median_pred(p->data[1] + fake_ustride + 2, p->data[1] + 2, s->temp[1], width2 - 2, &leftu, &lefttopu); | |||
| s->hdsp.add_hfyu_median_pred(p->data[2] + fake_vstride + 2, p->data[2] + 2, s->temp[2], width2 - 2, &leftv, &lefttopv); | |||
| } | |||
| y++; cy++; | |||
| @@ -1000,7 +1002,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| while (2 * cy > y) { | |||
| decode_gray_bitstream(s, width); | |||
| ydst = p->data[0] + p->linesize[0] * y; | |||
| s->dsp.add_hfyu_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||
| s->hdsp.add_hfyu_median_pred(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||
| y++; | |||
| } | |||
| if (y >= height) break; | |||
| @@ -1013,10 +1015,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| udst = p->data[1] + p->linesize[1] * cy; | |||
| vdst = p->data[2] + p->linesize[2] * cy; | |||
| s->dsp.add_hfyu_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||
| s->hdsp.add_hfyu_median_pred(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | |||
| s->dsp.add_hfyu_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu); | |||
| s->dsp.add_hfyu_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv); | |||
| s->hdsp.add_hfyu_median_pred(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu); | |||
| s->hdsp.add_hfyu_median_pred(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv); | |||
| } | |||
| } | |||
| @@ -1047,19 +1049,19 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||
| case LEFT: | |||
| case PLANE: | |||
| decode_bgr_bitstream(s, width - 1); | |||
| s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width - 1, &leftr, &leftg, &leftb, &lefta); | |||
| s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + last_line + 4, s->temp[0], width - 1, &leftr, &leftg, &leftb, &lefta); | |||
| for (y = s->height - 2; y >= 0; y--) { //Yes it is stored upside down. | |||
| decode_bgr_bitstream(s, width); | |||
| s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb, &lefta); | |||
| s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + p->linesize[0] * y, s->temp[0], width, &leftr, &leftg, &leftb, &lefta); | |||
| if (s->predictor == PLANE) { | |||
| if (s->bitstream_bpp != 32) lefta = 0; | |||
| if ((y & s->interlaced) == 0 && | |||
| y < s->height - 1 - s->interlaced) { | |||
| s->dsp.add_bytes(p->data[0] + p->linesize[0] * y, | |||
| p->data[0] + p->linesize[0] * y + | |||
| fake_ystride, fake_ystride); | |||
| s->hdsp.add_bytes(p->data[0] + p->linesize[0] * y, | |||
| p->data[0] + p->linesize[0] * y + | |||
| fake_ystride, fake_ystride); | |||
| } | |||
| } | |||
| } | |||
| @@ -0,0 +1,132 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include <stdint.h> | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "mathops.h" | |||
| #include "huffyuvdsp.h" | |||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||
| #define pb_7f (~0UL / 255 * 0x7f) | |||
| #define pb_80 (~0UL / 255 * 0x80) | |||
| static void add_bytes_c(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| long i; | |||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||
| long a = *(long *) (src + i); | |||
| long b = *(long *) (dst + i); | |||
| *(long *) (dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80); | |||
| } | |||
| for (; i < w; i++) | |||
| dst[i + 0] += src[i + 0]; | |||
| } | |||
| static void add_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top) | |||
| { | |||
| int i; | |||
| uint8_t l, lt; | |||
| l = *left; | |||
| lt = *left_top; | |||
| for (i = 0; i < w; i++) { | |||
| l = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF) + diff[i]; | |||
| lt = src1[i]; | |||
| dst[i] = l; | |||
| } | |||
| *left = l; | |||
| *left_top = lt; | |||
| } | |||
| static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, int w, | |||
| int acc) | |||
| { | |||
| int i; | |||
| for (i = 0; i < w - 1; i++) { | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| i++; | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| } | |||
| for (; i < w; i++) { | |||
| acc += src[i]; | |||
| dst[i] = acc; | |||
| } | |||
| return acc; | |||
| } | |||
| #if HAVE_BIGENDIAN | |||
| #define B 3 | |||
| #define G 2 | |||
| #define R 1 | |||
| #define A 0 | |||
| #else | |||
| #define B 0 | |||
| #define G 1 | |||
| #define R 2 | |||
| #define A 3 | |||
| #endif | |||
| static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src, | |||
| int w, int *red, int *green, | |||
| int *blue, int *alpha) | |||
| { | |||
| int i, r = *red, g = *green, b = *blue, a = *alpha; | |||
| for (i = 0; i < w; i++) { | |||
| b += src[4 * i + B]; | |||
| g += src[4 * i + G]; | |||
| r += src[4 * i + R]; | |||
| a += src[4 * i + A]; | |||
| dst[4 * i + B] = b; | |||
| dst[4 * i + G] = g; | |||
| dst[4 * i + R] = r; | |||
| dst[4 * i + A] = a; | |||
| } | |||
| *red = r; | |||
| *green = g; | |||
| *blue = b; | |||
| *alpha = a; | |||
| } | |||
| #undef B | |||
| #undef G | |||
| #undef R | |||
| #undef A | |||
| av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c) | |||
| { | |||
| c->add_bytes = add_bytes_c; | |||
| c->add_hfyu_median_pred = add_hfyu_median_pred_c; | |||
| c->add_hfyu_left_pred = add_hfyu_left_pred_c; | |||
| c->add_hfyu_left_pred_bgr32 = add_hfyu_left_pred_bgr32_c; | |||
| if (ARCH_X86) | |||
| ff_huffyuvdsp_init_x86(c); | |||
| } | |||
| @@ -0,0 +1,41 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_HUFFYUVDSP_H | |||
| #define AVCODEC_HUFFYUVDSP_H | |||
| #include <stdint.h> | |||
| typedef struct HuffYUVDSPContext { | |||
| void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */, | |||
| int w); | |||
| void (*add_hfyu_median_pred)(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| int (*add_hfyu_left_pred)(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| void (*add_hfyu_left_pred_bgr32)(uint8_t *dst, const uint8_t *src, | |||
| int w, int *red, int *green, | |||
| int *blue, int *alpha); | |||
| } HuffYUVDSPContext; | |||
| void ff_huffyuvdsp_init(HuffYUVDSPContext *c); | |||
| void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c); | |||
| void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c); | |||
| #endif /* AVCODEC_HUFFYUVDSP_H */ | |||
| @@ -30,7 +30,7 @@ | |||
| #include "avcodec.h" | |||
| #include "get_bits.h" | |||
| #include "mathops.h" | |||
| #include "dsputil.h" | |||
| #include "huffyuvdsp.h" | |||
| #include "lagarithrac.h" | |||
| #include "thread.h" | |||
| @@ -50,7 +50,7 @@ enum LagarithFrameType { | |||
| typedef struct LagarithContext { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| HuffYUVDSPContext hdsp; | |||
| int zeros; /**< number of consecutive zero bytes encountered */ | |||
| int zeros_rem; /**< number of zero bytes remaining to output */ | |||
| uint8_t *rgb_planes; | |||
| @@ -233,7 +233,7 @@ static void add_lag_median_prediction(uint8_t *dst, uint8_t *src1, | |||
| uint8_t *diff, int w, int *left, | |||
| int *left_top) | |||
| { | |||
| /* This is almost identical to add_hfyu_median_prediction in dsputil.h. | |||
| /* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h. | |||
| * However the &0xFF on the gradient predictor yealds incorrect output | |||
| * for lagarith. | |||
| */ | |||
| @@ -260,8 +260,7 @@ static void lag_pred_line(LagarithContext *l, uint8_t *buf, | |||
| if (!line) { | |||
| /* Left prediction only for first line */ | |||
| L = l->dsp.add_hfyu_left_prediction(buf, buf, | |||
| width, 0); | |||
| L = l->hdsp.add_hfyu_left_pred(buf, buf, width, 0); | |||
| } else { | |||
| /* Left pixel is actually prev_row[width] */ | |||
| L = buf[width - stride - 1]; | |||
| @@ -290,7 +289,7 @@ static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf, | |||
| L= buf[0]; | |||
| if (is_luma) | |||
| buf[0] = 0; | |||
| l->dsp.add_hfyu_left_prediction(buf, buf, width, 0); | |||
| l->hdsp.add_hfyu_left_pred(buf, buf, width, 0); | |||
| if (is_luma) | |||
| buf[0] = L; | |||
| return; | |||
| @@ -313,8 +312,7 @@ static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf, | |||
| } else { | |||
| TL = buf[width - (2 * stride) - 1]; | |||
| L = buf[width - stride - 1]; | |||
| l->dsp.add_hfyu_median_prediction(buf, buf - stride, buf, width, | |||
| &L, &TL); | |||
| l->hdsp.add_hfyu_median_pred(buf, buf - stride, buf, width, &L, &TL); | |||
| } | |||
| } | |||
| @@ -727,7 +725,7 @@ static av_cold int lag_decode_init(AVCodecContext *avctx) | |||
| LagarithContext *l = avctx->priv_data; | |||
| l->avctx = avctx; | |||
| ff_dsputil_init(&l->dsp, avctx); | |||
| ff_huffyuvdsp_init(&l->hdsp); | |||
| return 0; | |||
| } | |||
| @@ -6,6 +6,7 @@ OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o | |||
| OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o | |||
| OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o | |||
| OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o | |||
| OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o | |||
| OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o | |||
| OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | |||
| @@ -571,23 +571,6 @@ static void clear_block_altivec(int16_t *block) | |||
| vec_st(zero_s16v, 112, block); | |||
| } | |||
| static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| register int i; | |||
| register vector unsigned char vdst, vsrc; | |||
| /* dst and src are 16 bytes-aligned (guaranteed). */ | |||
| for (i = 0; i + 15 < w; i += 16) { | |||
| vdst = vec_ld(i, (unsigned char *) dst); | |||
| vsrc = vec_ld(i, (unsigned char *) src); | |||
| vdst = vec_add(vsrc, vdst); | |||
| vec_st(vdst, i, (unsigned char *) dst); | |||
| } | |||
| /* If w is not a multiple of 16. */ | |||
| for (; i < w; i++) | |||
| dst[i] = src[i]; | |||
| } | |||
| static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | |||
| uint8_t *src, int stride, int h) | |||
| { | |||
| @@ -945,7 +928,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||
| c->pix_sum = pix_sum_altivec; | |||
| c->diff_pixels = diff_pixels_altivec; | |||
| c->add_bytes = add_bytes_altivec; | |||
| if (!high_bit_depth) { | |||
| c->get_pixels = get_pixels_altivec; | |||
| @@ -0,0 +1,57 @@ | |||
| /* | |||
| * Copyright (c) 2002 Brian Foley | |||
| * Copyright (c) 2002 Dieter Shirley | |||
| * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #if HAVE_ALTIVEC_H | |||
| #include <altivec.h> | |||
| #endif | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/ppc/types_altivec.h" | |||
| #include "libavutil/ppc/util_altivec.h" | |||
| #include "libavcodec/huffyuvdsp.h" | |||
| #if HAVE_ALTIVEC | |||
| static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| register int i; | |||
| register vector unsigned char vdst, vsrc; | |||
| /* dst and src are 16 bytes-aligned (guaranteed). */ | |||
| for (i = 0; i + 15 < w; i += 16) { | |||
| vdst = vec_ld(i, (unsigned char *) dst); | |||
| vsrc = vec_ld(i, (unsigned char *) src); | |||
| vdst = vec_add(vsrc, vdst); | |||
| vec_st(vdst, i, (unsigned char *) dst); | |||
| } | |||
| /* If w is not a multiple of 16. */ | |||
| for (; i < w; i++) | |||
| dst[i] = src[i]; | |||
| } | |||
| #endif /* HAVE_ALTIVEC */ | |||
| av_cold void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c) | |||
| { | |||
| #if HAVE_ALTIVEC | |||
| c->add_bytes = add_bytes_altivec; | |||
| #endif /* HAVE_ALTIVEC */ | |||
| } | |||
| @@ -27,14 +27,14 @@ | |||
| #define BITSTREAM_READER_LE | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "get_bits.h" | |||
| #include "huffyuvdsp.h" | |||
| #include "internal.h" | |||
| #include "mathops.h" | |||
| typedef struct { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| HuffYUVDSPContext hdsp; | |||
| int size; | |||
| uint8_t *val; ///< This array first holds the lengths of vlc symbols and then their value. | |||
| @@ -100,7 +100,8 @@ static void vble_restore_plane(VBLEContext *ctx, AVFrame *pic, | |||
| if (i) { | |||
| left = 0; | |||
| left_top = dst[-stride]; | |||
| ctx->dsp.add_hfyu_median_prediction(dst, dst-stride, val, width, &left, &left_top); | |||
| ctx->hdsp.add_hfyu_median_pred(dst, dst - stride, val, | |||
| width, &left, &left_top); | |||
| } else { | |||
| dst[0] = val[0]; | |||
| for (j = 1; j < width; j++) | |||
| @@ -181,7 +182,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx) | |||
| /* Stash for later use */ | |||
| ctx->avctx = avctx; | |||
| ff_dsputil_init(&ctx->dsp, avctx); | |||
| ff_huffyuvdsp_init(&ctx->hdsp); | |||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | |||
| avctx->bits_per_raw_sample = 8; | |||
| @@ -3,8 +3,7 @@ OBJS += x86/constants.o \ | |||
| OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | |||
| OBJS-$(CONFIG_DCT) += x86/dct_init.o | |||
| OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \ | |||
| x86/dsputil_x86.o | |||
| OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o | |||
| OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ | |||
| x86/fdct.o \ | |||
| x86/motion_est.o | |||
| @@ -19,6 +18,7 @@ OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | |||
| OBJS-$(CONFIG_HEVC_DECODER) += x86/hevcdsp_init.o | |||
| OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | |||
| OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp_init.o | |||
| OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | |||
| OBJS-$(CONFIG_LPC) += x86/lpc.o | |||
| OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | |||
| OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o | |||
| @@ -54,6 +54,7 @@ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | |||
| x86/idct_sse2_xvid.o \ | |||
| x86/simple_idct.o | |||
| MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o | |||
| MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o | |||
| MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp.o | |||
| MMX-OBJS-$(CONFIG_SNOW_ENCODER) += x86/snowdsp.o | |||
| @@ -93,6 +94,7 @@ YASM-OBJS-$(CONFIG_HEVC_DECODER) += x86/hevc_mc.o \ | |||
| x86/hevc_deblock.o | |||
| YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ | |||
| x86/hpeldsp.o | |||
| YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o | |||
| YASM-OBJS-$(CONFIG_LLVIDDSP) += x86/lossless_videodsp.o | |||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | |||
| YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o | |||
| @@ -24,11 +24,6 @@ | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION_RODATA | |||
| pb_f: times 16 db 15 | |||
| pb_zzzzzzzz77777777: times 8 db -1 | |||
| pb_7: times 8 db 7 | |||
| pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 | |||
| pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 | |||
| pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | |||
| cextern pb_80 | |||
| @@ -193,142 +188,6 @@ SCALARPRODUCT_LOOP 0 | |||
| RET | |||
| ; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, | |||
| ; const uint8_t *diff, int w, | |||
| ; int *left, int *left_top) | |||
| INIT_MMX mmxext | |||
| cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top | |||
| movq mm0, [topq] | |||
| movq mm2, mm0 | |||
| movd mm4, [left_topq] | |||
| psllq mm2, 8 | |||
| movq mm1, mm0 | |||
| por mm4, mm2 | |||
| movd mm3, [leftq] | |||
| psubb mm0, mm4 ; t-tl | |||
| add dstq, wq | |||
| add topq, wq | |||
| add diffq, wq | |||
| neg wq | |||
| jmp .skip | |||
| .loop: | |||
| movq mm4, [topq+wq] | |||
| movq mm0, mm4 | |||
| psllq mm4, 8 | |||
| por mm4, mm1 | |||
| movq mm1, mm0 ; t | |||
| psubb mm0, mm4 ; t-tl | |||
| .skip: | |||
| movq mm2, [diffq+wq] | |||
| %assign i 0 | |||
| %rep 8 | |||
| movq mm4, mm0 | |||
| paddb mm4, mm3 ; t-tl+l | |||
| movq mm5, mm3 | |||
| pmaxub mm3, mm1 | |||
| pminub mm5, mm1 | |||
| pminub mm3, mm4 | |||
| pmaxub mm3, mm5 ; median | |||
| paddb mm3, mm2 ; +residual | |||
| %if i==0 | |||
| movq mm7, mm3 | |||
| psllq mm7, 56 | |||
| %else | |||
| movq mm6, mm3 | |||
| psrlq mm7, 8 | |||
| psllq mm6, 56 | |||
| por mm7, mm6 | |||
| %endif | |||
| %if i<7 | |||
| psrlq mm0, 8 | |||
| psrlq mm1, 8 | |||
| psrlq mm2, 8 | |||
| %endif | |||
| %assign i i+1 | |||
| %endrep | |||
| movq [dstq+wq], mm7 | |||
| add wq, 8 | |||
| jl .loop | |||
| movzx r2d, byte [dstq-1] | |||
| mov [leftq], r2d | |||
| movzx r2d, byte [topq-1] | |||
| mov [left_topq], r2d | |||
| RET | |||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||
| add srcq, wq | |||
| add dstq, wq | |||
| neg wq | |||
| %%.loop: | |||
| %if %2 | |||
| mova m1, [srcq+wq] | |||
| %else | |||
| movu m1, [srcq+wq] | |||
| %endif | |||
| mova m2, m1 | |||
| psllw m1, 8 | |||
| paddb m1, m2 | |||
| mova m2, m1 | |||
| pshufb m1, m3 | |||
| paddb m1, m2 | |||
| pshufb m0, m5 | |||
| mova m2, m1 | |||
| pshufb m1, m4 | |||
| paddb m1, m2 | |||
| %if mmsize == 16 | |||
| mova m2, m1 | |||
| pshufb m1, m6 | |||
| paddb m1, m2 | |||
| %endif | |||
| paddb m0, m1 | |||
| %if %1 | |||
| mova [dstq+wq], m0 | |||
| %else | |||
| movq [dstq+wq], m0 | |||
| movhps [dstq+wq+8], m0 | |||
| %endif | |||
| add wq, mmsize | |||
| jl %%.loop | |||
| mov eax, mmsize-1 | |||
| sub eax, wd | |||
| movd m1, eax | |||
| pshufb m0, m1 | |||
| movd eax, m0 | |||
| RET | |||
| %endmacro | |||
| ; int ff_add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src, | |||
| ; int w, int left) | |||
| INIT_MMX ssse3 | |||
| cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | |||
| .skip_prologue: | |||
| mova m5, [pb_7] | |||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||
| mova m3, [pb_zz11zz55zz99zzdd] | |||
| movd m0, leftm | |||
| psllq m0, 56 | |||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||
| INIT_XMM sse4 | |||
| cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | |||
| mova m5, [pb_f] | |||
| mova m6, [pb_zzzzzzzz77777777] | |||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||
| mova m3, [pb_zz11zz55zz99zzdd] | |||
| movd m0, leftm | |||
| pslldq m0, 15 | |||
| test srcq, 15 | |||
| jnz .src_unaligned | |||
| test dstq, 15 | |||
| jnz .dst_unaligned | |||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||
| .dst_unaligned: | |||
| ADD_HFYU_LEFT_LOOP 0, 1 | |||
| .src_unaligned: | |||
| ADD_HFYU_LEFT_LOOP 0, 0 | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | |||
| ; int32_t max, unsigned int len) | |||
| @@ -23,7 +23,6 @@ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/internal.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| #include "libavcodec/dsputil.h" | |||
| @@ -93,14 +92,6 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, | |||
| void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | |||
| void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | |||
| void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, | |||
| int32_t min, int32_t max, unsigned int len); | |||
| void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, | |||
| @@ -536,8 +527,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
| #if CONFIG_VIDEODSP && (ARCH_X86_32 || !HAVE_YASM) | |||
| c->gmc = ff_gmc_mmx; | |||
| #endif | |||
| c->add_bytes = ff_add_bytes_mmx; | |||
| #endif /* HAVE_MMX_INLINE */ | |||
| #if HAVE_MMX_EXTERNAL | |||
| @@ -570,10 +559,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | |||
| SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); | |||
| SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); | |||
| /* slower than cmov version on AMD */ | |||
| if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) | |||
| c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext; | |||
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | |||
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; | |||
| #endif /* HAVE_MMXEXT_EXTERNAL */ | |||
| @@ -630,10 +615,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, | |||
| int cpu_flags, unsigned high_bit_depth) | |||
| { | |||
| #if HAVE_SSSE3_EXTERNAL | |||
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | |||
| if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | |||
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | |||
| if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit | |||
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; | |||
| c->bswap_buf = ff_bswap32_buf_ssse3; | |||
| @@ -653,11 +634,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| #if HAVE_7REGS && HAVE_INLINE_ASM | |||
| if (HAVE_MMX && cpu_flags & AV_CPU_FLAG_CMOV) | |||
| c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov; | |||
| #endif | |||
| if (X86_MMX(cpu_flags)) { | |||
| #if HAVE_INLINE_ASM | |||
| const int idct_algo = avctx->idct_algo; | |||
| @@ -134,31 +134,6 @@ void ff_add_pixels_clamped_mmx(const int16_t *block, uint8_t *pixels, | |||
| } while (--i); | |||
| } | |||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| x86_reg i = 0; | |||
| __asm__ volatile ( | |||
| "jmp 2f \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq (%2, %0), %%mm1 \n\t" | |||
| "paddb %%mm0, %%mm1 \n\t" | |||
| "movq %%mm1, (%2, %0) \n\t" | |||
| "movq 8(%1, %0), %%mm0 \n\t" | |||
| "movq 8(%2, %0), %%mm1 \n\t" | |||
| "paddb %%mm0, %%mm1 \n\t" | |||
| "movq %%mm1, 8(%2, %0) \n\t" | |||
| "add $16, %0 \n\t" | |||
| "2: \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "js 1b \n\t" | |||
| : "+r" (i) | |||
| : "r" (src), "r" (dst), "r" ((x86_reg) w - 15)); | |||
| for (; i < w; i++) | |||
| dst[i + 0] += src[i + 0]; | |||
| } | |||
| /* Draw the edges of width 'w' of an image of size width, height | |||
| * this MMX version can only handle w == 8 || w == 16. */ | |||
| @@ -45,12 +45,6 @@ void ff_clear_block_sse(int16_t *block); | |||
| void ff_clear_blocks_mmx(int16_t *blocks); | |||
| void ff_clear_blocks_sse(int16_t *blocks); | |||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||
| void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides); | |||
| @@ -0,0 +1,165 @@ | |||
| ;****************************************************************************** | |||
| ;* SIMD-optimized HuffYUV functions | |||
| ;* Copyright (c) 2008 Loren Merritt | |||
| ;* | |||
| ;* This file is part of FFmpeg. | |||
| ;* | |||
| ;* FFmpeg is free software; you can redistribute it and/or | |||
| ;* modify it under the terms of the GNU Lesser General Public | |||
| ;* License as published by the Free Software Foundation; either | |||
| ;* version 2.1 of the License, or (at your option) any later version. | |||
| ;* | |||
| ;* FFmpeg is distributed in the hope that it will be useful, | |||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| ;* Lesser General Public License for more details. | |||
| ;* | |||
| ;* You should have received a copy of the GNU Lesser General Public | |||
| ;* License along with FFmpeg; if not, write to the Free Software | |||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| ;****************************************************************************** | |||
| %include "libavutil/x86/x86util.asm" | |||
| SECTION_RODATA | |||
| pb_f: times 16 db 15 | |||
| pb_zzzzzzzz77777777: times 8 db -1 | |||
| pb_7: times 8 db 7 | |||
| pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 | |||
| pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 | |||
| SECTION_TEXT | |||
| ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, | |||
| ; const uint8_t *diff, int w, | |||
| ; int *left, int *left_top) | |||
| INIT_MMX mmxext | |||
| cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top | |||
| movq mm0, [topq] | |||
| movq mm2, mm0 | |||
| movd mm4, [left_topq] | |||
| psllq mm2, 8 | |||
| movq mm1, mm0 | |||
| por mm4, mm2 | |||
| movd mm3, [leftq] | |||
| psubb mm0, mm4 ; t-tl | |||
| add dstq, wq | |||
| add topq, wq | |||
| add diffq, wq | |||
| neg wq | |||
| jmp .skip | |||
| .loop: | |||
| movq mm4, [topq+wq] | |||
| movq mm0, mm4 | |||
| psllq mm4, 8 | |||
| por mm4, mm1 | |||
| movq mm1, mm0 ; t | |||
| psubb mm0, mm4 ; t-tl | |||
| .skip: | |||
| movq mm2, [diffq+wq] | |||
| %assign i 0 | |||
| %rep 8 | |||
| movq mm4, mm0 | |||
| paddb mm4, mm3 ; t-tl+l | |||
| movq mm5, mm3 | |||
| pmaxub mm3, mm1 | |||
| pminub mm5, mm1 | |||
| pminub mm3, mm4 | |||
| pmaxub mm3, mm5 ; median | |||
| paddb mm3, mm2 ; +residual | |||
| %if i==0 | |||
| movq mm7, mm3 | |||
| psllq mm7, 56 | |||
| %else | |||
| movq mm6, mm3 | |||
| psrlq mm7, 8 | |||
| psllq mm6, 56 | |||
| por mm7, mm6 | |||
| %endif | |||
| %if i<7 | |||
| psrlq mm0, 8 | |||
| psrlq mm1, 8 | |||
| psrlq mm2, 8 | |||
| %endif | |||
| %assign i i+1 | |||
| %endrep | |||
| movq [dstq+wq], mm7 | |||
| add wq, 8 | |||
| jl .loop | |||
| movzx r2d, byte [dstq-1] | |||
| mov [leftq], r2d | |||
| movzx r2d, byte [topq-1] | |||
| mov [left_topq], r2d | |||
| RET | |||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||
| add srcq, wq | |||
| add dstq, wq | |||
| neg wq | |||
| %%.loop: | |||
| %if %2 | |||
| mova m1, [srcq+wq] | |||
| %else | |||
| movu m1, [srcq+wq] | |||
| %endif | |||
| mova m2, m1 | |||
| psllw m1, 8 | |||
| paddb m1, m2 | |||
| mova m2, m1 | |||
| pshufb m1, m3 | |||
| paddb m1, m2 | |||
| pshufb m0, m5 | |||
| mova m2, m1 | |||
| pshufb m1, m4 | |||
| paddb m1, m2 | |||
| %if mmsize == 16 | |||
| mova m2, m1 | |||
| pshufb m1, m6 | |||
| paddb m1, m2 | |||
| %endif | |||
| paddb m0, m1 | |||
| %if %1 | |||
| mova [dstq+wq], m0 | |||
| %else | |||
| movq [dstq+wq], m0 | |||
| movhps [dstq+wq+8], m0 | |||
| %endif | |||
| add wq, mmsize | |||
| jl %%.loop | |||
| mov eax, mmsize-1 | |||
| sub eax, wd | |||
| movd m1, eax | |||
| pshufb m0, m1 | |||
| movd eax, m0 | |||
| RET | |||
| %endmacro | |||
| ; int ff_add_hfyu_left_pred(uint8_t *dst, const uint8_t *src, int w, int left) | |||
| INIT_MMX ssse3 | |||
| cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left | |||
| .skip_prologue: | |||
| mova m5, [pb_7] | |||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||
| mova m3, [pb_zz11zz55zz99zzdd] | |||
| movd m0, leftm | |||
| psllq m0, 56 | |||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||
| INIT_XMM sse4 | |||
| cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left | |||
| mova m5, [pb_f] | |||
| mova m6, [pb_zzzzzzzz77777777] | |||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||
| mova m3, [pb_zz11zz55zz99zzdd] | |||
| movd m0, leftm | |||
| pslldq m0, 15 | |||
| test srcq, 15 | |||
| jnz .src_unaligned | |||
| test dstq, 15 | |||
| jnz .dst_unaligned | |||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||
| .dst_unaligned: | |||
| ADD_HFYU_LEFT_LOOP 0, 1 | |||
| .src_unaligned: | |||
| ADD_HFYU_LEFT_LOOP 0, 0 | |||
| @@ -0,0 +1,30 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #ifndef AVCODEC_X86_HUFFYUVDSP_H | |||
| #define AVCODEC_X86_HUFFYUVDSP_H | |||
| #include <stdint.h> | |||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| #endif /* AVCODEC_X86_HUFFYUVDSP_H */ | |||
| @@ -0,0 +1,63 @@ | |||
| /* | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| */ | |||
| #include "config.h" | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/huffyuvdsp.h" | |||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top); | |||
| int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, | |||
| int w, int left); | |||
| av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| #if HAVE_7REGS && HAVE_INLINE_ASM | |||
| if (cpu_flags & AV_CPU_FLAG_CMOV) | |||
| c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov; | |||
| #endif | |||
| if (INLINE_MMX(cpu_flags)) | |||
| c->add_bytes = ff_add_bytes_mmx; | |||
| if (EXTERNAL_MMXEXT(cpu_flags)) { | |||
| /* slower than cmov version on AMD */ | |||
| if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) | |||
| c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; | |||
| } | |||
| if (EXTERNAL_SSSE3(cpu_flags)) { | |||
| c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; | |||
| if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | |||
| c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; | |||
| } | |||
| } | |||
| @@ -20,14 +20,14 @@ | |||
| #include "config.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "dsputil_x86.h" | |||
| #include "huffyuvdsp.h" | |||
| #if HAVE_INLINE_ASM | |||
| #if HAVE_7REGS | |||
| void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top) | |||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||
| const uint8_t *diff, int w, | |||
| int *left, int *left_top) | |||
| { | |||
| x86_reg w2 = -w; | |||
| x86_reg x; | |||
| @@ -62,4 +62,30 @@ void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||
| } | |||
| #endif | |||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w) | |||
| { | |||
| x86_reg i = 0; | |||
| __asm__ volatile ( | |||
| "jmp 2f \n\t" | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq (%2, %0), %%mm1 \n\t" | |||
| "paddb %%mm0, %%mm1 \n\t" | |||
| "movq %%mm1, (%2, %0) \n\t" | |||
| "movq 8(%1, %0), %%mm0 \n\t" | |||
| "movq 8(%2, %0), %%mm1 \n\t" | |||
| "paddb %%mm0, %%mm1 \n\t" | |||
| "movq %%mm1, 8(%2, %0) \n\t" | |||
| "add $16, %0 \n\t" | |||
| "2: \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "js 1b \n\t" | |||
| : "+r" (i) | |||
| : "r" (src), "r" (dst), "r" ((x86_reg) w - 15)); | |||
| for (; i < w; i++) | |||
| dst[i + 0] += src[i + 0]; | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||