Also shorten HuffYUV context member names to avoid clutter.tags/n2.3
| @@ -1541,6 +1541,7 @@ CONFIG_EXTRA=" | |||||
| h264qpel | h264qpel | ||||
| hpeldsp | hpeldsp | ||||
| huffman | huffman | ||||
| huffyuvdsp | |||||
| intrax8 | intrax8 | ||||
| lgplv3 | lgplv3 | ||||
| lpc | lpc | ||||
| @@ -1771,7 +1772,7 @@ h263p_encoder_select="h263_encoder" | |||||
| h264_decoder_select="cabac golomb h264chroma h264dsp h264pred h264qpel videodsp" | h264_decoder_select="cabac golomb h264chroma h264dsp h264pred h264qpel videodsp" | ||||
| h264_decoder_suggest="error_resilience" | h264_decoder_suggest="error_resilience" | ||||
| hevc_decoder_select="cabac dsputil golomb videodsp" | hevc_decoder_select="cabac dsputil golomb videodsp" | ||||
| huffyuv_decoder_select="dsputil" | |||||
| huffyuv_decoder_select="dsputil huffyuvdsp" | |||||
| huffyuv_encoder_select="dsputil huffman" | huffyuv_encoder_select="dsputil huffman" | ||||
| iac_decoder_select="imc_decoder" | iac_decoder_select="imc_decoder" | ||||
| imc_decoder_select="dsputil fft mdct sinewin" | imc_decoder_select="dsputil fft mdct sinewin" | ||||
| @@ -1780,7 +1781,7 @@ interplay_video_decoder_select="hpeldsp" | |||||
| jpegls_decoder_select="golomb mjpeg_decoder" | jpegls_decoder_select="golomb mjpeg_decoder" | ||||
| jpegls_encoder_select="golomb" | jpegls_encoder_select="golomb" | ||||
| jv_decoder_select="dsputil" | jv_decoder_select="dsputil" | ||||
| lagarith_decoder_select="dsputil" | |||||
| lagarith_decoder_select="huffyuvdsp" | |||||
| ljpeg_encoder_select="aandcttables mpegvideoenc" | ljpeg_encoder_select="aandcttables mpegvideoenc" | ||||
| loco_decoder_select="golomb" | loco_decoder_select="golomb" | ||||
| mdec_decoder_select="dsputil error_resilience mpegvideo" | mdec_decoder_select="dsputil error_resilience mpegvideo" | ||||
| @@ -1857,7 +1858,7 @@ tscc_decoder_deps="zlib" | |||||
| twinvq_decoder_select="mdct lsp sinewin" | twinvq_decoder_select="mdct lsp sinewin" | ||||
| utvideo_decoder_select="dsputil" | utvideo_decoder_select="dsputil" | ||||
| utvideo_encoder_select="dsputil huffman" | utvideo_encoder_select="dsputil huffman" | ||||
| vble_decoder_select="dsputil" | |||||
| vble_decoder_select="huffyuvdsp" | |||||
| vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8" | vc1_decoder_select="error_resilience h263_decoder h264chroma h264qpel intrax8" | ||||
| vc1image_decoder_select="vc1_decoder" | vc1image_decoder_select="vc1_decoder" | ||||
| vorbis_decoder_select="mdct" | vorbis_decoder_select="mdct" | ||||
| @@ -46,6 +46,7 @@ OBJS-$(CONFIG_H264PRED) += h264pred.o | |||||
| OBJS-$(CONFIG_H264QPEL) += h264qpel.o | OBJS-$(CONFIG_H264QPEL) += h264qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP) += hpeldsp.o | OBJS-$(CONFIG_HPELDSP) += hpeldsp.o | ||||
| OBJS-$(CONFIG_HUFFMAN) += huffman.o | OBJS-$(CONFIG_HUFFMAN) += huffman.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | |||||
| OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | ||||
| OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | OBJS-$(CONFIG_LIBXVID) += libxvid_rc.o | ||||
| OBJS-$(CONFIG_LPC) += lpc.o | OBJS-$(CONFIG_LPC) += lpc.o | ||||
| @@ -1663,19 +1663,6 @@ void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type) | |||||
| } | } | ||||
| } | } | ||||
| static void add_bytes_c(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| long i; | |||||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||||
| long a = *(long *) (src + i); | |||||
| long b = *(long *) (dst + i); | |||||
| *(long *) (dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80); | |||||
| } | |||||
| for (; i < w; i++) | |||||
| dst[i + 0] += src[i + 0]; | |||||
| } | |||||
| static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w) | static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w) | ||||
| { | { | ||||
| long i; | long i; | ||||
| @@ -1704,26 +1691,6 @@ static void diff_bytes_c(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w) | |||||
| dst[i + 0] = src1[i + 0] - src2[i + 0]; | dst[i + 0] = src1[i + 0] - src2[i + 0]; | ||||
| } | } | ||||
| static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top) | |||||
| { | |||||
| int i; | |||||
| uint8_t l, lt; | |||||
| l = *left; | |||||
| lt = *left_top; | |||||
| for (i = 0; i < w; i++) { | |||||
| l = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF) + diff[i]; | |||||
| lt = src1[i]; | |||||
| dst[i] = l; | |||||
| } | |||||
| *left = l; | |||||
| *left_top = lt; | |||||
| } | |||||
| static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | ||||
| const uint8_t *src2, int w, | const uint8_t *src2, int w, | ||||
| int *left, int *left_top) | int *left, int *left_top) | ||||
| @@ -1745,66 +1712,6 @@ static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, | |||||
| *left_top = lt; | *left_top = lt; | ||||
| } | } | ||||
| static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, | |||||
| int w, int acc) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < w - 1; i++) { | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| i++; | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| } | |||||
| for (; i < w; i++) { | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| } | |||||
| return acc; | |||||
| } | |||||
| #if HAVE_BIGENDIAN | |||||
| #define B 3 | |||||
| #define G 2 | |||||
| #define R 1 | |||||
| #define A 0 | |||||
| #else | |||||
| #define B 0 | |||||
| #define G 1 | |||||
| #define R 2 | |||||
| #define A 3 | |||||
| #endif | |||||
| static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, | |||||
| int w, int *red, int *green, | |||||
| int *blue, int *alpha) | |||||
| { | |||||
| int i, r = *red, g = *green, b = *blue, a = *alpha; | |||||
| for (i = 0; i < w; i++) { | |||||
| b += src[4 * i + B]; | |||||
| g += src[4 * i + G]; | |||||
| r += src[4 * i + R]; | |||||
| a += src[4 * i + A]; | |||||
| dst[4 * i + B] = b; | |||||
| dst[4 * i + G] = g; | |||||
| dst[4 * i + R] = r; | |||||
| dst[4 * i + A] = a; | |||||
| } | |||||
| *red = r; | |||||
| *green = g; | |||||
| *blue = b; | |||||
| *alpha = a; | |||||
| } | |||||
| #undef B | |||||
| #undef G | |||||
| #undef R | |||||
| #undef A | |||||
| #define BUTTERFLY2(o1, o2, i1, i2) \ | #define BUTTERFLY2(o1, o2, i1, i2) \ | ||||
| o1 = (i1) + (i2); \ | o1 = (i1) + (i2); \ | ||||
| o2 = (i1) - (i2); | o2 = (i1) - (i2); | ||||
| @@ -2578,11 +2485,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; | c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c; | ||||
| c->add_bytes = add_bytes_c; | |||||
| c->add_hfyu_median_prediction = add_hfyu_median_prediction_c; | |||||
| c->add_hfyu_left_prediction = add_hfyu_left_prediction_c; | |||||
| c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c; | |||||
| c->diff_bytes = diff_bytes_c; | c->diff_bytes = diff_bytes_c; | ||||
| c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_c; | c->sub_hfyu_median_prediction = sub_hfyu_median_prediction_c; | ||||
| @@ -186,9 +186,6 @@ typedef struct DSPContext { | |||||
| me_cmp_func pix_abs[2][4]; | me_cmp_func pix_abs[2][4]; | ||||
| /* HuffYUV specific */ | /* HuffYUV specific */ | ||||
| void (*add_bytes)(uint8_t *dst /* align 16 */, | |||||
| uint8_t *src /* align 16 */, | |||||
| int w); | |||||
| void (*diff_bytes)(uint8_t *dst /* align 16 */, | void (*diff_bytes)(uint8_t *dst /* align 16 */, | ||||
| uint8_t *src1 /* align 16 */, | uint8_t *src1 /* align 16 */, | ||||
| uint8_t *src2 /* align 1 */, | uint8_t *src2 /* align 1 */, | ||||
| @@ -200,14 +197,7 @@ typedef struct DSPContext { | |||||
| void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, | void (*sub_hfyu_median_prediction)(uint8_t *dst, const uint8_t *src1, | ||||
| const uint8_t *src2, int w, | const uint8_t *src2, int w, | ||||
| int *left, int *left_top); | int *left, int *left_top); | ||||
| void (*add_hfyu_median_prediction)(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| int (*add_hfyu_left_prediction)(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| void (*add_hfyu_left_prediction_bgr32)(uint8_t *dst, const uint8_t *src, | |||||
| int w, int *red, int *green, | |||||
| int *blue, int *alpha); | |||||
| void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | void (*bswap_buf)(uint32_t *dst, const uint32_t *src, int w); | ||||
| void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); | void (*bswap16_buf)(uint16_t *dst, const uint16_t *src, int len); | ||||
| @@ -33,6 +33,7 @@ | |||||
| #include "libavutil/mem.h" | #include "libavutil/mem.h" | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "huffyuv.h" | #include "huffyuv.h" | ||||
| int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table) | int ff_huffyuv_generate_bits_table(uint32_t *dst, const uint8_t *len_table) | ||||
| @@ -34,6 +34,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | #include "dsputil.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "huffyuvdsp.h" | |||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #define VLC_BITS 11 | #define VLC_BITS 11 | ||||
| @@ -81,6 +82,7 @@ typedef struct HYuvContext { | |||||
| uint8_t *bitstream_buffer; | uint8_t *bitstream_buffer; | ||||
| unsigned int bitstream_buffer_size; | unsigned int bitstream_buffer_size; | ||||
| DSPContext dsp; | DSPContext dsp; | ||||
| HuffYUVDSPContext hdsp; | |||||
| } HYuvContext; | } HYuvContext; | ||||
| void ff_huffyuv_common_init(AVCodecContext *s); | void ff_huffyuv_common_init(AVCodecContext *s); | ||||
| @@ -31,6 +31,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "huffyuv.h" | #include "huffyuv.h" | ||||
| #include "huffyuvdsp.h" | |||||
| #include "thread.h" | #include "thread.h" | ||||
| #define classic_shift_luma_table_size 42 | #define classic_shift_luma_table_size 42 | ||||
| @@ -239,6 +240,7 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
| HYuvContext *s = avctx->priv_data; | HYuvContext *s = avctx->priv_data; | ||||
| ff_huffyuv_common_init(avctx); | ff_huffyuv_common_init(avctx); | ||||
| ff_huffyuvdsp_init(&s->hdsp); | |||||
| memset(s->vlc, 0, 3 * sizeof(VLC)); | memset(s->vlc, 0, 3 * sizeof(VLC)); | ||||
| s->interlaced = s->height > 288; | s->interlaced = s->height > 288; | ||||
| @@ -542,10 +544,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| case LEFT: | case LEFT: | ||||
| case PLANE: | case PLANE: | ||||
| decode_422_bitstream(s, width-2); | decode_422_bitstream(s, width-2); | ||||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width-2, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | if (!(s->flags&CODEC_FLAG_GRAY)) { | ||||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||||
| } | } | ||||
| for (cy = y = 1; y < s->height; y++, cy++) { | for (cy = y = 1; y < s->height; y++, cy++) { | ||||
| @@ -556,10 +558,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| ydst = p->data[0] + p->linesize[0] * y; | ydst = p->data[0] + p->linesize[0] * y; | ||||
| lefty = s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(ydst, s->temp[0], width, lefty); | |||||
| if (s->predictor == PLANE) { | if (s->predictor == PLANE) { | ||||
| if (y > s->interlaced) | if (y > s->interlaced) | ||||
| s->dsp.add_bytes(ydst, ydst - fake_ystride, width); | |||||
| s->hdsp.add_bytes(ydst, ydst - fake_ystride, width); | |||||
| } | } | ||||
| y++; | y++; | ||||
| if (y >= s->height) break; | if (y >= s->height) break; | ||||
| @@ -572,17 +574,17 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| vdst = p->data[2] + p->linesize[2]*cy; | vdst = p->data[2] + p->linesize[2]*cy; | ||||
| decode_422_bitstream(s, width); | decode_422_bitstream(s, width); | ||||
| lefty = s->dsp.add_hfyu_left_prediction(ydst, s->temp[0], width, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(ydst, s->temp[0], width, lefty); | |||||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | if (!(s->flags & CODEC_FLAG_GRAY)) { | ||||
| leftu= s->dsp.add_hfyu_left_prediction(udst, s->temp[1], width2, leftu); | |||||
| leftv= s->dsp.add_hfyu_left_prediction(vdst, s->temp[2], width2, leftv); | |||||
| leftu = s->hdsp.add_hfyu_left_pred(udst, s->temp[1], width2, leftu); | |||||
| leftv = s->hdsp.add_hfyu_left_pred(vdst, s->temp[2], width2, leftv); | |||||
| } | } | ||||
| if (s->predictor == PLANE) { | if (s->predictor == PLANE) { | ||||
| if (cy > s->interlaced) { | if (cy > s->interlaced) { | ||||
| s->dsp.add_bytes(ydst, ydst - fake_ystride, width); | |||||
| s->hdsp.add_bytes(ydst, ydst - fake_ystride, width); | |||||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | if (!(s->flags & CODEC_FLAG_GRAY)) { | ||||
| s->dsp.add_bytes(udst, udst - fake_ustride, width2); | |||||
| s->dsp.add_bytes(vdst, vdst - fake_vstride, width2); | |||||
| s->hdsp.add_bytes(udst, udst - fake_ustride, width2); | |||||
| s->hdsp.add_bytes(vdst, vdst - fake_vstride, width2); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -593,10 +595,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| case MEDIAN: | case MEDIAN: | ||||
| /* first line except first 2 pixels is left predicted */ | /* first line except first 2 pixels is left predicted */ | ||||
| decode_422_bitstream(s, width - 2); | decode_422_bitstream(s, width - 2); | ||||
| lefty= s->dsp.add_hfyu_left_prediction(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + 2, s->temp[0], width - 2, lefty); | |||||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | if (!(s->flags & CODEC_FLAG_GRAY)) { | ||||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + 1, s->temp[1], width2 - 1, leftu); | |||||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + 1, s->temp[2], width2 - 1, leftv); | |||||
| } | } | ||||
| cy = y = 1; | cy = y = 1; | ||||
| @@ -604,31 +606,31 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| /* second line is left predicted for interlaced case */ | /* second line is left predicted for interlaced case */ | ||||
| if (s->interlaced) { | if (s->interlaced) { | ||||
| decode_422_bitstream(s, width); | decode_422_bitstream(s, width); | ||||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + p->linesize[0], s->temp[0], width, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + p->linesize[0], s->temp[0], width, lefty); | |||||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | if (!(s->flags & CODEC_FLAG_GRAY)) { | ||||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + p->linesize[2], s->temp[1], width2, leftu); | |||||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + p->linesize[1], s->temp[2], width2, leftv); | |||||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + p->linesize[2], s->temp[1], width2, leftu); | |||||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + p->linesize[1], s->temp[2], width2, leftv); | |||||
| } | } | ||||
| y++; cy++; | y++; cy++; | ||||
| } | } | ||||
| /* next 4 pixels are left predicted too */ | /* next 4 pixels are left predicted too */ | ||||
| decode_422_bitstream(s, 4); | decode_422_bitstream(s, 4); | ||||
| lefty = s->dsp.add_hfyu_left_prediction(p->data[0] + fake_ystride, s->temp[0], 4, lefty); | |||||
| lefty = s->hdsp.add_hfyu_left_pred(p->data[0] + fake_ystride, s->temp[0], 4, lefty); | |||||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | if (!(s->flags&CODEC_FLAG_GRAY)) { | ||||
| leftu = s->dsp.add_hfyu_left_prediction(p->data[1] + fake_ustride, s->temp[1], 2, leftu); | |||||
| leftv = s->dsp.add_hfyu_left_prediction(p->data[2] + fake_vstride, s->temp[2], 2, leftv); | |||||
| leftu = s->hdsp.add_hfyu_left_pred(p->data[1] + fake_ustride, s->temp[1], 2, leftu); | |||||
| leftv = s->hdsp.add_hfyu_left_pred(p->data[2] + fake_vstride, s->temp[2], 2, leftv); | |||||
| } | } | ||||
| /* next line except the first 4 pixels is median predicted */ | /* next line except the first 4 pixels is median predicted */ | ||||
| lefttopy = p->data[0][3]; | lefttopy = p->data[0][3]; | ||||
| decode_422_bitstream(s, width - 4); | decode_422_bitstream(s, width - 4); | ||||
| s->dsp.add_hfyu_median_prediction(p->data[0] + fake_ystride+4, p->data[0]+4, s->temp[0], width-4, &lefty, &lefttopy); | |||||
| s->hdsp.add_hfyu_median_pred(p->data[0] + fake_ystride + 4, p->data[0] + 4, s->temp[0], width - 4, &lefty, &lefttopy); | |||||
| if (!(s->flags&CODEC_FLAG_GRAY)) { | if (!(s->flags&CODEC_FLAG_GRAY)) { | ||||
| lefttopu = p->data[1][1]; | lefttopu = p->data[1][1]; | ||||
| lefttopv = p->data[2][1]; | lefttopv = p->data[2][1]; | ||||
| s->dsp.add_hfyu_median_prediction(p->data[1] + fake_ustride+2, p->data[1] + 2, s->temp[1], width2 - 2, &leftu, &lefttopu); | |||||
| s->dsp.add_hfyu_median_prediction(p->data[2] + fake_vstride+2, p->data[2] + 2, s->temp[2], width2 - 2, &leftv, &lefttopv); | |||||
| s->hdsp.add_hfyu_median_pred(p->data[1] + fake_ustride + 2, p->data[1] + 2, s->temp[1], width2 - 2, &leftu, &lefttopu); | |||||
| s->hdsp.add_hfyu_median_pred(p->data[2] + fake_vstride + 2, p->data[2] + 2, s->temp[2], width2 - 2, &leftv, &lefttopv); | |||||
| } | } | ||||
| y++; cy++; | y++; cy++; | ||||
| @@ -639,7 +641,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| while (2 * cy > y) { | while (2 * cy > y) { | ||||
| decode_gray_bitstream(s, width); | decode_gray_bitstream(s, width); | ||||
| ydst = p->data[0] + p->linesize[0] * y; | ydst = p->data[0] + p->linesize[0] * y; | ||||
| s->dsp.add_hfyu_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||||
| s->hdsp.add_hfyu_median_pred(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||||
| y++; | y++; | ||||
| } | } | ||||
| if (y >= height) break; | if (y >= height) break; | ||||
| @@ -652,10 +654,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| udst = p->data[1] + p->linesize[1] * cy; | udst = p->data[1] + p->linesize[1] * cy; | ||||
| vdst = p->data[2] + p->linesize[2] * cy; | vdst = p->data[2] + p->linesize[2] * cy; | ||||
| s->dsp.add_hfyu_median_prediction(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||||
| s->hdsp.add_hfyu_median_pred(ydst, ydst - fake_ystride, s->temp[0], width, &lefty, &lefttopy); | |||||
| if (!(s->flags & CODEC_FLAG_GRAY)) { | if (!(s->flags & CODEC_FLAG_GRAY)) { | ||||
| s->dsp.add_hfyu_median_prediction(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu); | |||||
| s->dsp.add_hfyu_median_prediction(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv); | |||||
| s->hdsp.add_hfyu_median_pred(udst, udst - fake_ustride, s->temp[1], width2, &leftu, &lefttopu); | |||||
| s->hdsp.add_hfyu_median_pred(vdst, vdst - fake_vstride, s->temp[2], width2, &leftv, &lefttopv); | |||||
| } | } | ||||
| } | } | ||||
| @@ -686,19 +688,19 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *got_frame, | |||||
| case LEFT: | case LEFT: | ||||
| case PLANE: | case PLANE: | ||||
| decode_bgr_bitstream(s, width - 1); | decode_bgr_bitstream(s, width - 1); | ||||
| s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + last_line+4, s->temp[0], width - 1, &leftr, &leftg, &leftb, &lefta); | |||||
| s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + last_line + 4, s->temp[0], width - 1, &leftr, &leftg, &leftb, &lefta); | |||||
| for (y = s->height - 2; y >= 0; y--) { //Yes it is stored upside down. | for (y = s->height - 2; y >= 0; y--) { //Yes it is stored upside down. | ||||
| decode_bgr_bitstream(s, width); | decode_bgr_bitstream(s, width); | ||||
| s->dsp.add_hfyu_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb, &lefta); | |||||
| s->hdsp.add_hfyu_left_pred_bgr32(p->data[0] + p->linesize[0] * y, s->temp[0], width, &leftr, &leftg, &leftb, &lefta); | |||||
| if (s->predictor == PLANE) { | if (s->predictor == PLANE) { | ||||
| if (s->bitstream_bpp != 32) lefta = 0; | if (s->bitstream_bpp != 32) lefta = 0; | ||||
| if ((y & s->interlaced) == 0 && | if ((y & s->interlaced) == 0 && | ||||
| y < s->height - 1 - s->interlaced) { | y < s->height - 1 - s->interlaced) { | ||||
| s->dsp.add_bytes(p->data[0] + p->linesize[0] * y, | |||||
| p->data[0] + p->linesize[0] * y + | |||||
| fake_ystride, fake_ystride); | |||||
| s->hdsp.add_bytes(p->data[0] + p->linesize[0] * y, | |||||
| p->data[0] + p->linesize[0] * y + | |||||
| fake_ystride, fake_ystride); | |||||
| } | } | ||||
| } | } | ||||
| } | } | ||||
| @@ -0,0 +1,132 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <stdint.h> | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "mathops.h" | |||||
| #include "huffyuvdsp.h" | |||||
| // 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size | |||||
| #define pb_7f (~0UL / 255 * 0x7f) | |||||
| #define pb_80 (~0UL / 255 * 0x80) | |||||
| static void add_bytes_c(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| long i; | |||||
| for (i = 0; i <= w - (int) sizeof(long); i += sizeof(long)) { | |||||
| long a = *(long *) (src + i); | |||||
| long b = *(long *) (dst + i); | |||||
| *(long *) (dst + i) = ((a & pb_7f) + (b & pb_7f)) ^ ((a ^ b) & pb_80); | |||||
| } | |||||
| for (; i < w; i++) | |||||
| dst[i + 0] += src[i + 0]; | |||||
| } | |||||
| static void add_hfyu_median_pred_c(uint8_t *dst, const uint8_t *src1, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top) | |||||
| { | |||||
| int i; | |||||
| uint8_t l, lt; | |||||
| l = *left; | |||||
| lt = *left_top; | |||||
| for (i = 0; i < w; i++) { | |||||
| l = mid_pred(l, src1[i], (l + src1[i] - lt) & 0xFF) + diff[i]; | |||||
| lt = src1[i]; | |||||
| dst[i] = l; | |||||
| } | |||||
| *left = l; | |||||
| *left_top = lt; | |||||
| } | |||||
| static int add_hfyu_left_pred_c(uint8_t *dst, const uint8_t *src, int w, | |||||
| int acc) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < w - 1; i++) { | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| i++; | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| } | |||||
| for (; i < w; i++) { | |||||
| acc += src[i]; | |||||
| dst[i] = acc; | |||||
| } | |||||
| return acc; | |||||
| } | |||||
| #if HAVE_BIGENDIAN | |||||
| #define B 3 | |||||
| #define G 2 | |||||
| #define R 1 | |||||
| #define A 0 | |||||
| #else | |||||
| #define B 0 | |||||
| #define G 1 | |||||
| #define R 2 | |||||
| #define A 3 | |||||
| #endif | |||||
| static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src, | |||||
| int w, int *red, int *green, | |||||
| int *blue, int *alpha) | |||||
| { | |||||
| int i, r = *red, g = *green, b = *blue, a = *alpha; | |||||
| for (i = 0; i < w; i++) { | |||||
| b += src[4 * i + B]; | |||||
| g += src[4 * i + G]; | |||||
| r += src[4 * i + R]; | |||||
| a += src[4 * i + A]; | |||||
| dst[4 * i + B] = b; | |||||
| dst[4 * i + G] = g; | |||||
| dst[4 * i + R] = r; | |||||
| dst[4 * i + A] = a; | |||||
| } | |||||
| *red = r; | |||||
| *green = g; | |||||
| *blue = b; | |||||
| *alpha = a; | |||||
| } | |||||
| #undef B | |||||
| #undef G | |||||
| #undef R | |||||
| #undef A | |||||
| av_cold void ff_huffyuvdsp_init(HuffYUVDSPContext *c) | |||||
| { | |||||
| c->add_bytes = add_bytes_c; | |||||
| c->add_hfyu_median_pred = add_hfyu_median_pred_c; | |||||
| c->add_hfyu_left_pred = add_hfyu_left_pred_c; | |||||
| c->add_hfyu_left_pred_bgr32 = add_hfyu_left_pred_bgr32_c; | |||||
| if (ARCH_X86) | |||||
| ff_huffyuvdsp_init_x86(c); | |||||
| } | |||||
| @@ -0,0 +1,41 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_HUFFYUVDSP_H | |||||
| #define AVCODEC_HUFFYUVDSP_H | |||||
| #include <stdint.h> | |||||
| typedef struct HuffYUVDSPContext { | |||||
| void (*add_bytes)(uint8_t *dst /* align 16 */, uint8_t *src /* align 16 */, | |||||
| int w); | |||||
| void (*add_hfyu_median_pred)(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| int (*add_hfyu_left_pred)(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| void (*add_hfyu_left_pred_bgr32)(uint8_t *dst, const uint8_t *src, | |||||
| int w, int *red, int *green, | |||||
| int *blue, int *alpha); | |||||
| } HuffYUVDSPContext; | |||||
| void ff_huffyuvdsp_init(HuffYUVDSPContext *c); | |||||
| void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c); | |||||
| void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c); | |||||
| #endif /* AVCODEC_HUFFYUVDSP_H */ | |||||
| @@ -30,7 +30,7 @@ | |||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| #include "dsputil.h" | |||||
| #include "huffyuvdsp.h" | |||||
| #include "lagarithrac.h" | #include "lagarithrac.h" | ||||
| #include "thread.h" | #include "thread.h" | ||||
| @@ -50,7 +50,7 @@ enum LagarithFrameType { | |||||
| typedef struct LagarithContext { | typedef struct LagarithContext { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| DSPContext dsp; | |||||
| HuffYUVDSPContext hdsp; | |||||
| int zeros; /**< number of consecutive zero bytes encountered */ | int zeros; /**< number of consecutive zero bytes encountered */ | ||||
| int zeros_rem; /**< number of zero bytes remaining to output */ | int zeros_rem; /**< number of zero bytes remaining to output */ | ||||
| uint8_t *rgb_planes; | uint8_t *rgb_planes; | ||||
| @@ -225,7 +225,7 @@ static void add_lag_median_prediction(uint8_t *dst, uint8_t *src1, | |||||
| uint8_t *diff, int w, int *left, | uint8_t *diff, int w, int *left, | ||||
| int *left_top) | int *left_top) | ||||
| { | { | ||||
| /* This is almost identical to add_hfyu_median_prediction in dsputil.h. | |||||
| /* This is almost identical to add_hfyu_median_pred in huffyuvdsp.h. | |||||
| * However the &0xFF on the gradient predictor yealds incorrect output | * However the &0xFF on the gradient predictor yealds incorrect output | ||||
| * for lagarith. | * for lagarith. | ||||
| */ | */ | ||||
| @@ -253,8 +253,7 @@ static void lag_pred_line(LagarithContext *l, uint8_t *buf, | |||||
| if (!line) { | if (!line) { | ||||
| int i, align_width = (width - 1) & ~31; | int i, align_width = (width - 1) & ~31; | ||||
| /* Left prediction only for first line */ | /* Left prediction only for first line */ | ||||
| L = l->dsp.add_hfyu_left_prediction(buf + 1, buf + 1, | |||||
| align_width, buf[0]); | |||||
| L = l->hdsp.add_hfyu_left_pred(buf + 1, buf + 1, align_width, buf[0]); | |||||
| for (i = align_width + 1; i < width; i++) | for (i = align_width + 1; i < width; i++) | ||||
| buf[i] += buf[i - 1]; | buf[i] += buf[i - 1]; | ||||
| } else { | } else { | ||||
| @@ -289,7 +288,7 @@ static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf, | |||||
| } | } | ||||
| align_width = (width - 1) & ~31; | align_width = (width - 1) & ~31; | ||||
| l->dsp.add_hfyu_left_prediction(buf + 1, buf + 1, align_width, buf[0]); | |||||
| l->hdsp.add_hfyu_left_pred(buf + 1, buf + 1, align_width, buf[0]); | |||||
| for (i = align_width + 1; i < width; i++) | for (i = align_width + 1; i < width; i++) | ||||
| buf[i] += buf[i - 1]; | buf[i] += buf[i - 1]; | ||||
| @@ -314,8 +313,7 @@ static void lag_pred_line_yuy2(LagarithContext *l, uint8_t *buf, | |||||
| } else { | } else { | ||||
| TL = buf[width - (2 * stride) - 1]; | TL = buf[width - (2 * stride) - 1]; | ||||
| L = buf[width - stride - 1]; | L = buf[width - stride - 1]; | ||||
| l->dsp.add_hfyu_median_prediction(buf, buf - stride, buf, width, | |||||
| &L, &TL); | |||||
| l->hdsp.add_hfyu_median_pred(buf, buf - stride, buf, width, &L, &TL); | |||||
| } | } | ||||
| } | } | ||||
| @@ -682,7 +680,7 @@ static av_cold int lag_decode_init(AVCodecContext *avctx) | |||||
| LagarithContext *l = avctx->priv_data; | LagarithContext *l = avctx->priv_data; | ||||
| l->avctx = avctx; | l->avctx = avctx; | ||||
| ff_dsputil_init(&l->dsp, avctx); | |||||
| ff_huffyuvdsp_init(&l->hdsp); | |||||
| return 0; | return 0; | ||||
| } | } | ||||
| @@ -6,6 +6,7 @@ OBJS-$(CONFIG_H264CHROMA) += ppc/h264chroma_init.o | |||||
| OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o | OBJS-$(CONFIG_H264DSP) += ppc/h264dsp.o | ||||
| OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o | OBJS-$(CONFIG_H264QPEL) += ppc/h264qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o | OBJS-$(CONFIG_HPELDSP) += ppc/hpeldsp_altivec.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += ppc/huffyuvdsp_altivec.o | |||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | OBJS-$(CONFIG_MPEGAUDIODSP) += ppc/mpegaudiodsp_altivec.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o | OBJS-$(CONFIG_MPEGVIDEO) += ppc/mpegvideo_altivec.o | ||||
| OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | OBJS-$(CONFIG_VIDEODSP) += ppc/videodsp_ppc.o | ||||
| @@ -571,23 +571,6 @@ static void clear_block_altivec(int16_t *block) | |||||
| vec_st(zero_s16v, 112, block); | vec_st(zero_s16v, 112, block); | ||||
| } | } | ||||
| static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| register int i; | |||||
| register vector unsigned char vdst, vsrc; | |||||
| /* dst and src are 16 bytes-aligned (guaranteed). */ | |||||
| for (i = 0; i + 15 < w; i += 16) { | |||||
| vdst = vec_ld(i, (unsigned char *) dst); | |||||
| vsrc = vec_ld(i, (unsigned char *) src); | |||||
| vdst = vec_add(vsrc, vdst); | |||||
| vec_st(vdst, i, (unsigned char *) dst); | |||||
| } | |||||
| /* If w is not a multiple of 16. */ | |||||
| for (; i < w; i++) | |||||
| dst[i] = src[i]; | |||||
| } | |||||
| static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | static int hadamard8_diff8x8_altivec(MpegEncContext *s, uint8_t *dst, | ||||
| uint8_t *src, int stride, int h) | uint8_t *src, int stride, int h) | ||||
| { | { | ||||
| @@ -945,7 +928,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx, | |||||
| c->pix_sum = pix_sum_altivec; | c->pix_sum = pix_sum_altivec; | ||||
| c->diff_pixels = diff_pixels_altivec; | c->diff_pixels = diff_pixels_altivec; | ||||
| c->add_bytes = add_bytes_altivec; | |||||
| if (!high_bit_depth) { | if (!high_bit_depth) { | ||||
| c->get_pixels = get_pixels_altivec; | c->get_pixels = get_pixels_altivec; | ||||
| @@ -0,0 +1,57 @@ | |||||
| /* | |||||
| * Copyright (c) 2002 Brian Foley | |||||
| * Copyright (c) 2002 Dieter Shirley | |||||
| * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org> | |||||
| * | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #if HAVE_ALTIVEC_H | |||||
| #include <altivec.h> | |||||
| #endif | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/ppc/types_altivec.h" | |||||
| #include "libavutil/ppc/util_altivec.h" | |||||
| #include "libavcodec/huffyuvdsp.h" | |||||
| #if HAVE_ALTIVEC | |||||
| static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| register int i; | |||||
| register vector unsigned char vdst, vsrc; | |||||
| /* dst and src are 16 bytes-aligned (guaranteed). */ | |||||
| for (i = 0; i + 15 < w; i += 16) { | |||||
| vdst = vec_ld(i, (unsigned char *) dst); | |||||
| vsrc = vec_ld(i, (unsigned char *) src); | |||||
| vdst = vec_add(vsrc, vdst); | |||||
| vec_st(vdst, i, (unsigned char *) dst); | |||||
| } | |||||
| /* If w is not a multiple of 16. */ | |||||
| for (; i < w; i++) | |||||
| dst[i] = src[i]; | |||||
| } | |||||
| #endif /* HAVE_ALTIVEC */ | |||||
| av_cold void ff_huffyuvdsp_init_ppc(HuffYUVDSPContext *c) | |||||
| { | |||||
| #if HAVE_ALTIVEC | |||||
| c->add_bytes = add_bytes_altivec; | |||||
| #endif /* HAVE_ALTIVEC */ | |||||
| } | |||||
| @@ -27,14 +27,14 @@ | |||||
| #define BITSTREAM_READER_LE | #define BITSTREAM_READER_LE | ||||
| #include "avcodec.h" | #include "avcodec.h" | ||||
| #include "dsputil.h" | |||||
| #include "get_bits.h" | #include "get_bits.h" | ||||
| #include "huffyuvdsp.h" | |||||
| #include "internal.h" | #include "internal.h" | ||||
| #include "mathops.h" | #include "mathops.h" | ||||
| typedef struct { | typedef struct { | ||||
| AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
| DSPContext dsp; | |||||
| HuffYUVDSPContext hdsp; | |||||
| int size; | int size; | ||||
| uint8_t *val; /* First holds the lengths of vlc symbols and then their values */ | uint8_t *val; /* First holds the lengths of vlc symbols and then their values */ | ||||
| @@ -100,8 +100,8 @@ static void vble_restore_plane(VBLEContext *ctx, AVFrame *pic, | |||||
| if (i) { | if (i) { | ||||
| left = 0; | left = 0; | ||||
| left_top = dst[-stride]; | left_top = dst[-stride]; | ||||
| ctx->dsp.add_hfyu_median_prediction(dst, dst-stride, val, | |||||
| width, &left, &left_top); | |||||
| ctx->hdsp.add_hfyu_median_pred(dst, dst - stride, val, | |||||
| width, &left, &left_top); | |||||
| } else { | } else { | ||||
| dst[0] = val[0]; | dst[0] = val[0]; | ||||
| for (j = 1; j < width; j++) | for (j = 1; j < width; j++) | ||||
| @@ -178,7 +178,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx) | |||||
| /* Stash for later use */ | /* Stash for later use */ | ||||
| ctx->avctx = avctx; | ctx->avctx = avctx; | ||||
| ff_dsputil_init(&ctx->dsp, avctx); | |||||
| ff_huffyuvdsp_init(&ctx->hdsp); | |||||
| avctx->pix_fmt = AV_PIX_FMT_YUV420P; | avctx->pix_fmt = AV_PIX_FMT_YUV420P; | ||||
| avctx->bits_per_raw_sample = 8; | avctx->bits_per_raw_sample = 8; | ||||
| @@ -3,8 +3,7 @@ OBJS += x86/constants.o \ | |||||
| OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | OBJS-$(CONFIG_AC3DSP) += x86/ac3dsp_init.o | ||||
| OBJS-$(CONFIG_DCT) += x86/dct_init.o | OBJS-$(CONFIG_DCT) += x86/dct_init.o | ||||
| OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o \ | |||||
| x86/dsputil_x86.o | |||||
| OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_init.o | |||||
| OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ | OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o \ | ||||
| x86/fdct.o \ | x86/fdct.o \ | ||||
| x86/motion_est.o | x86/motion_est.o | ||||
| @@ -15,6 +14,7 @@ OBJS-$(CONFIG_H264DSP) += x86/h264dsp_init.o | |||||
| OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o | OBJS-$(CONFIG_H264PRED) += x86/h264_intrapred_init.o | ||||
| OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel.o | ||||
| OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | OBJS-$(CONFIG_HPELDSP) += x86/hpeldsp_init.o | ||||
| OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_init.o | |||||
| OBJS-$(CONFIG_LPC) += x86/lpc.o | OBJS-$(CONFIG_LPC) += x86/lpc.o | ||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o | OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o | ||||
| @@ -48,6 +48,7 @@ MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o \ | |||||
| x86/simple_idct.o | x86/simple_idct.o | ||||
| MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | MMX-OBJS-$(CONFIG_HPELDSP) += x86/fpel_mmx.o \ | ||||
| x86/hpeldsp_mmx.o | x86/hpeldsp_mmx.o | ||||
| MMX-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp_mmx.o | |||||
| MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o | MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o | ||||
| @@ -80,6 +81,7 @@ YASM-OBJS-$(CONFIG_H264QPEL) += x86/h264_qpel_8bit.o \ | |||||
| x86/qpel.o | x86/qpel.o | ||||
| YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ | YASM-OBJS-$(CONFIG_HPELDSP) += x86/fpel.o \ | ||||
| x86/hpeldsp.o | x86/hpeldsp.o | ||||
| YASM-OBJS-$(CONFIG_HUFFYUVDSP) += x86/huffyuvdsp.o | |||||
| YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | YASM-OBJS-$(CONFIG_MPEGAUDIODSP) += x86/imdct36.o | ||||
| YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o | YASM-OBJS-$(CONFIG_VIDEODSP) += x86/videodsp.o | ||||
| YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o | YASM-OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp.o | ||||
| @@ -22,11 +22,6 @@ | |||||
| %include "libavutil/x86/x86util.asm" | %include "libavutil/x86/x86util.asm" | ||||
| SECTION_RODATA | SECTION_RODATA | ||||
| pb_f: times 16 db 15 | |||||
| pb_zzzzzzzz77777777: times 8 db -1 | |||||
| pb_7: times 8 db 7 | |||||
| pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 | |||||
| pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 | |||||
| pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 | ||||
| SECTION_TEXT | SECTION_TEXT | ||||
| @@ -203,141 +198,6 @@ SCALARPRODUCT_LOOP 0 | |||||
| RET | RET | ||||
| ; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, | |||||
| ; const uint8_t *diff, int w, | |||||
| ; int *left, int *left_top) | |||||
| INIT_MMX mmxext | |||||
| cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top | |||||
| movq mm0, [topq] | |||||
| movq mm2, mm0 | |||||
| movd mm4, [left_topq] | |||||
| psllq mm2, 8 | |||||
| movq mm1, mm0 | |||||
| por mm4, mm2 | |||||
| movd mm3, [leftq] | |||||
| psubb mm0, mm4 ; t-tl | |||||
| add dstq, wq | |||||
| add topq, wq | |||||
| add diffq, wq | |||||
| neg wq | |||||
| jmp .skip | |||||
| .loop: | |||||
| movq mm4, [topq+wq] | |||||
| movq mm0, mm4 | |||||
| psllq mm4, 8 | |||||
| por mm4, mm1 | |||||
| movq mm1, mm0 ; t | |||||
| psubb mm0, mm4 ; t-tl | |||||
| .skip: | |||||
| movq mm2, [diffq+wq] | |||||
| %assign i 0 | |||||
| %rep 8 | |||||
| movq mm4, mm0 | |||||
| paddb mm4, mm3 ; t-tl+l | |||||
| movq mm5, mm3 | |||||
| pmaxub mm3, mm1 | |||||
| pminub mm5, mm1 | |||||
| pminub mm3, mm4 | |||||
| pmaxub mm3, mm5 ; median | |||||
| paddb mm3, mm2 ; +residual | |||||
| %if i==0 | |||||
| movq mm7, mm3 | |||||
| psllq mm7, 56 | |||||
| %else | |||||
| movq mm6, mm3 | |||||
| psrlq mm7, 8 | |||||
| psllq mm6, 56 | |||||
| por mm7, mm6 | |||||
| %endif | |||||
| %if i<7 | |||||
| psrlq mm0, 8 | |||||
| psrlq mm1, 8 | |||||
| psrlq mm2, 8 | |||||
| %endif | |||||
| %assign i i+1 | |||||
| %endrep | |||||
| movq [dstq+wq], mm7 | |||||
| add wq, 8 | |||||
| jl .loop | |||||
| movzx r2d, byte [dstq-1] | |||||
| mov [leftq], r2d | |||||
| movzx r2d, byte [topq-1] | |||||
| mov [left_topq], r2d | |||||
| RET | |||||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||||
| add srcq, wq | |||||
| add dstq, wq | |||||
| neg wq | |||||
| %%.loop: | |||||
| %if %2 | |||||
| mova m1, [srcq+wq] | |||||
| %else | |||||
| movu m1, [srcq+wq] | |||||
| %endif | |||||
| mova m2, m1 | |||||
| psllw m1, 8 | |||||
| paddb m1, m2 | |||||
| mova m2, m1 | |||||
| pshufb m1, m3 | |||||
| paddb m1, m2 | |||||
| pshufb m0, m5 | |||||
| mova m2, m1 | |||||
| pshufb m1, m4 | |||||
| paddb m1, m2 | |||||
| %if mmsize == 16 | |||||
| mova m2, m1 | |||||
| pshufb m1, m6 | |||||
| paddb m1, m2 | |||||
| %endif | |||||
| paddb m0, m1 | |||||
| %if %1 | |||||
| mova [dstq+wq], m0 | |||||
| %else | |||||
| movq [dstq+wq], m0 | |||||
| movhps [dstq+wq+8], m0 | |||||
| %endif | |||||
| add wq, mmsize | |||||
| jl %%.loop | |||||
| mov eax, mmsize-1 | |||||
| sub eax, wd | |||||
| movd m1, eax | |||||
| pshufb m0, m1 | |||||
| movd eax, m0 | |||||
| RET | |||||
| %endmacro | |||||
| ; int ff_add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src, | |||||
| ; int w, int left) | |||||
| INIT_MMX ssse3 | |||||
| cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | |||||
| .skip_prologue: | |||||
| mova m5, [pb_7] | |||||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||||
| mova m3, [pb_zz11zz55zz99zzdd] | |||||
| movd m0, leftm | |||||
| psllq m0, 56 | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| INIT_XMM sse4 | |||||
| cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left | |||||
| mova m5, [pb_f] | |||||
| mova m6, [pb_zzzzzzzz77777777] | |||||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||||
| mova m3, [pb_zz11zz55zz99zzdd] | |||||
| movd m0, leftm | |||||
| pslldq m0, 15 | |||||
| test srcq, 15 | |||||
| jnz .src_unaligned | |||||
| test dstq, 15 | |||||
| jnz .dst_unaligned | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| .dst_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 1 | |||||
| .src_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 0 | |||||
| ;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
| ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | ; void ff_vector_clip_int32(int32_t *dst, const int32_t *src, int32_t min, | ||||
| ; int32_t max, unsigned int len) | ; int32_t max, unsigned int len) | ||||
| @@ -20,7 +20,6 @@ | |||||
| #include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
| #include "libavutil/cpu.h" | #include "libavutil/cpu.h" | ||||
| #include "libavutil/internal.h" | #include "libavutil/internal.h" | ||||
| #include "libavutil/x86/asm.h" | |||||
| #include "libavutil/x86/cpu.h" | #include "libavutil/x86/cpu.h" | ||||
| #include "libavcodec/avcodec.h" | #include "libavcodec/avcodec.h" | ||||
| #include "libavcodec/dsputil.h" | #include "libavcodec/dsputil.h" | ||||
| @@ -90,14 +89,6 @@ int32_t ff_scalarproduct_and_madd_int16_ssse3(int16_t *v1, const int16_t *v2, | |||||
| void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | void ff_bswap32_buf_ssse3(uint32_t *dst, const uint32_t *src, int w); | ||||
| void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | void ff_bswap32_buf_sse2(uint32_t *dst, const uint32_t *src, int w); | ||||
| void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| int ff_add_hfyu_left_prediction_ssse3(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| int ff_add_hfyu_left_prediction_sse4(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, | void ff_vector_clip_int32_mmx(int32_t *dst, const int32_t *src, | ||||
| int32_t min, int32_t max, unsigned int len); | int32_t min, int32_t max, unsigned int len); | ||||
| void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, | void ff_vector_clip_int32_sse2(int32_t *dst, const int32_t *src, | ||||
| @@ -549,8 +540,6 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
| } | } | ||||
| c->gmc = ff_gmc_mmx; | c->gmc = ff_gmc_mmx; | ||||
| c->add_bytes = ff_add_bytes_mmx; | |||||
| #endif /* HAVE_MMX_INLINE */ | #endif /* HAVE_MMX_INLINE */ | ||||
| #if HAVE_MMX_EXTERNAL | #if HAVE_MMX_EXTERNAL | ||||
| @@ -578,10 +567,6 @@ static av_cold void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx, | |||||
| SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); | SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, mmxext, ); | ||||
| SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); | SET_QPEL_FUNCS(put_no_rnd_qpel, 1, 8, mmxext, ); | ||||
| /* slower than cmov version on AMD */ | |||||
| if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) | |||||
| c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_mmxext; | |||||
| c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | c->scalarproduct_int16 = ff_scalarproduct_int16_mmxext; | ||||
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; | c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_mmxext; | ||||
| #endif /* HAVE_MMXEXT_EXTERNAL */ | #endif /* HAVE_MMXEXT_EXTERNAL */ | ||||
| @@ -636,10 +621,6 @@ static av_cold void dsputil_init_ssse3(DSPContext *c, AVCodecContext *avctx, | |||||
| int cpu_flags, unsigned high_bit_depth) | int cpu_flags, unsigned high_bit_depth) | ||||
| { | { | ||||
| #if HAVE_SSSE3_EXTERNAL | #if HAVE_SSSE3_EXTERNAL | ||||
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_ssse3; | |||||
| if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | |||||
| c->add_hfyu_left_prediction = ff_add_hfyu_left_prediction_sse4; | |||||
| if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit | if (!(cpu_flags & (AV_CPU_FLAG_SSE42 | AV_CPU_FLAG_3DNOW))) // cachesplit | ||||
| c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; | c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_ssse3; | ||||
| c->bswap_buf = ff_bswap32_buf_ssse3; | c->bswap_buf = ff_bswap32_buf_ssse3; | ||||
| @@ -659,11 +640,6 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | |||||
| { | { | ||||
| int cpu_flags = av_get_cpu_flags(); | int cpu_flags = av_get_cpu_flags(); | ||||
| #if HAVE_7REGS && HAVE_INLINE_ASM | |||||
| if (cpu_flags & AV_CPU_FLAG_CMOV) | |||||
| c->add_hfyu_median_prediction = ff_add_hfyu_median_prediction_cmov; | |||||
| #endif | |||||
| if (X86_MMX(cpu_flags)) | if (X86_MMX(cpu_flags)) | ||||
| dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); | dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); | ||||
| @@ -222,32 +222,6 @@ void ff_clear_blocks_sse(int16_t *blocks) | |||||
| : "%"REG_a); | : "%"REG_a); | ||||
| } | } | ||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| x86_reg i = 0; | |||||
| __asm__ volatile ( | |||||
| "jmp 2f \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq (%2, %0), %%mm1 \n\t" | |||||
| "paddb %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm1, (%2, %0) \n\t" | |||||
| "movq 8(%1, %0), %%mm0 \n\t" | |||||
| "movq 8(%2, %0), %%mm1 \n\t" | |||||
| "paddb %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm1, 8(%2, %0) \n\t" | |||||
| "add $16, %0 \n\t" | |||||
| "2: \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| "js 1b \n\t" | |||||
| : "+r" (i) | |||||
| : "r" (src), "r" (dst), "r" ((x86_reg) w - 15)); | |||||
| for (; i < w; i++) | |||||
| dst[i + 0] += src[i + 0]; | |||||
| } | |||||
| /* Draw the edges of width 'w' of an image of size width, height | /* Draw the edges of width 'w' of an image of size width, height | ||||
| * this MMX version can only handle w == 8 || w == 16. */ | * this MMX version can only handle w == 8 || w == 16. */ | ||||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
| @@ -43,12 +43,6 @@ void ff_clear_block_sse(int16_t *block); | |||||
| void ff_clear_blocks_mmx(int16_t *blocks); | void ff_clear_blocks_mmx(int16_t *blocks); | ||||
| void ff_clear_blocks_sse(int16_t *blocks); | void ff_clear_blocks_sse(int16_t *blocks); | ||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||||
| void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | ||||
| int w, int h, int sides); | int w, int h, int sides); | ||||
| @@ -0,0 +1,165 @@ | |||||
| ;****************************************************************************** | |||||
| ;* SIMD-optimized HuffYUV functions | |||||
| ;* Copyright (c) 2008 Loren Merritt | |||||
| ;* | |||||
| ;* This file is part of Libav. | |||||
| ;* | |||||
| ;* Libav is free software; you can redistribute it and/or | |||||
| ;* modify it under the terms of the GNU Lesser General Public | |||||
| ;* License as published by the Free Software Foundation; either | |||||
| ;* version 2.1 of the License, or (at your option) any later version. | |||||
| ;* | |||||
| ;* Libav is distributed in the hope that it will be useful, | |||||
| ;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| ;* Lesser General Public License for more details. | |||||
| ;* | |||||
| ;* You should have received a copy of the GNU Lesser General Public | |||||
| ;* License along with Libav; if not, write to the Free Software | |||||
| ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| ;****************************************************************************** | |||||
| %include "libavutil/x86/x86util.asm" | |||||
| SECTION_RODATA | |||||
| pb_f: times 16 db 15 | |||||
| pb_zzzzzzzz77777777: times 8 db -1 | |||||
| pb_7: times 8 db 7 | |||||
| pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 | |||||
| pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 | |||||
| SECTION_TEXT | |||||
| ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, | |||||
| ; const uint8_t *diff, int w, | |||||
| ; int *left, int *left_top) | |||||
| INIT_MMX mmxext | |||||
| cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top | |||||
| movq mm0, [topq] | |||||
| movq mm2, mm0 | |||||
| movd mm4, [left_topq] | |||||
| psllq mm2, 8 | |||||
| movq mm1, mm0 | |||||
| por mm4, mm2 | |||||
| movd mm3, [leftq] | |||||
| psubb mm0, mm4 ; t-tl | |||||
| add dstq, wq | |||||
| add topq, wq | |||||
| add diffq, wq | |||||
| neg wq | |||||
| jmp .skip | |||||
| .loop: | |||||
| movq mm4, [topq+wq] | |||||
| movq mm0, mm4 | |||||
| psllq mm4, 8 | |||||
| por mm4, mm1 | |||||
| movq mm1, mm0 ; t | |||||
| psubb mm0, mm4 ; t-tl | |||||
| .skip: | |||||
| movq mm2, [diffq+wq] | |||||
| %assign i 0 | |||||
| %rep 8 | |||||
| movq mm4, mm0 | |||||
| paddb mm4, mm3 ; t-tl+l | |||||
| movq mm5, mm3 | |||||
| pmaxub mm3, mm1 | |||||
| pminub mm5, mm1 | |||||
| pminub mm3, mm4 | |||||
| pmaxub mm3, mm5 ; median | |||||
| paddb mm3, mm2 ; +residual | |||||
| %if i==0 | |||||
| movq mm7, mm3 | |||||
| psllq mm7, 56 | |||||
| %else | |||||
| movq mm6, mm3 | |||||
| psrlq mm7, 8 | |||||
| psllq mm6, 56 | |||||
| por mm7, mm6 | |||||
| %endif | |||||
| %if i<7 | |||||
| psrlq mm0, 8 | |||||
| psrlq mm1, 8 | |||||
| psrlq mm2, 8 | |||||
| %endif | |||||
| %assign i i+1 | |||||
| %endrep | |||||
| movq [dstq+wq], mm7 | |||||
| add wq, 8 | |||||
| jl .loop | |||||
| movzx r2d, byte [dstq-1] | |||||
| mov [leftq], r2d | |||||
| movzx r2d, byte [topq-1] | |||||
| mov [left_topq], r2d | |||||
| RET | |||||
| %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned | |||||
| add srcq, wq | |||||
| add dstq, wq | |||||
| neg wq | |||||
| %%.loop: | |||||
| %if %2 | |||||
| mova m1, [srcq+wq] | |||||
| %else | |||||
| movu m1, [srcq+wq] | |||||
| %endif | |||||
| mova m2, m1 | |||||
| psllw m1, 8 | |||||
| paddb m1, m2 | |||||
| mova m2, m1 | |||||
| pshufb m1, m3 | |||||
| paddb m1, m2 | |||||
| pshufb m0, m5 | |||||
| mova m2, m1 | |||||
| pshufb m1, m4 | |||||
| paddb m1, m2 | |||||
| %if mmsize == 16 | |||||
| mova m2, m1 | |||||
| pshufb m1, m6 | |||||
| paddb m1, m2 | |||||
| %endif | |||||
| paddb m0, m1 | |||||
| %if %1 | |||||
| mova [dstq+wq], m0 | |||||
| %else | |||||
| movq [dstq+wq], m0 | |||||
| movhps [dstq+wq+8], m0 | |||||
| %endif | |||||
| add wq, mmsize | |||||
| jl %%.loop | |||||
| mov eax, mmsize-1 | |||||
| sub eax, wd | |||||
| movd m1, eax | |||||
| pshufb m0, m1 | |||||
| movd eax, m0 | |||||
| RET | |||||
| %endmacro | |||||
| ; int ff_add_hfyu_left_pred(uint8_t *dst, const uint8_t *src, int w, int left) | |||||
| INIT_MMX ssse3 | |||||
| cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left | |||||
| .skip_prologue: | |||||
| mova m5, [pb_7] | |||||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||||
| mova m3, [pb_zz11zz55zz99zzdd] | |||||
| movd m0, leftm | |||||
| psllq m0, 56 | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| INIT_XMM sse4 | |||||
| cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left | |||||
| mova m5, [pb_f] | |||||
| mova m6, [pb_zzzzzzzz77777777] | |||||
| mova m4, [pb_zzzz3333zzzzbbbb] | |||||
| mova m3, [pb_zz11zz55zz99zzdd] | |||||
| movd m0, leftm | |||||
| pslldq m0, 15 | |||||
| test srcq, 15 | |||||
| jnz .src_unaligned | |||||
| test dstq, 15 | |||||
| jnz .dst_unaligned | |||||
| ADD_HFYU_LEFT_LOOP 1, 1 | |||||
| .dst_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 1 | |||||
| .src_unaligned: | |||||
| ADD_HFYU_LEFT_LOOP 0, 0 | |||||
| @@ -0,0 +1,30 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_X86_HUFFYUVDSP_H | |||||
| #define AVCODEC_X86_HUFFYUVDSP_H | |||||
| #include <stdint.h> | |||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| #endif /* AVCODEC_X86_HUFFYUVDSP_H */ | |||||
| @@ -0,0 +1,63 @@ | |||||
| /* | |||||
| * This file is part of Libav. | |||||
| * | |||||
| * Libav is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * Libav is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with Libav; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "config.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/asm.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/huffyuvdsp.h" | |||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w); | |||||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top); | |||||
| int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| int ff_add_hfyu_left_pred_sse4(uint8_t *dst, const uint8_t *src, | |||||
| int w, int left); | |||||
| av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) | |||||
| { | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| #if HAVE_7REGS && HAVE_INLINE_ASM | |||||
| if (cpu_flags & AV_CPU_FLAG_CMOV) | |||||
| c->add_hfyu_median_pred = ff_add_hfyu_median_pred_cmov; | |||||
| #endif | |||||
| if (INLINE_MMX(cpu_flags)) | |||||
| c->add_bytes = ff_add_bytes_mmx; | |||||
| if (EXTERNAL_MMXEXT(cpu_flags)) { | |||||
| /* slower than cmov version on AMD */ | |||||
| if (!(cpu_flags & AV_CPU_FLAG_3DNOW)) | |||||
| c->add_hfyu_median_pred = ff_add_hfyu_median_pred_mmxext; | |||||
| } | |||||
| if (EXTERNAL_SSSE3(cpu_flags)) { | |||||
| c->add_hfyu_left_pred = ff_add_hfyu_left_pred_ssse3; | |||||
| if (cpu_flags & AV_CPU_FLAG_SSE4) // not really SSE4, just slow on Conroe | |||||
| c->add_hfyu_left_pred = ff_add_hfyu_left_pred_sse4; | |||||
| } | |||||
| } | |||||
| @@ -20,14 +20,14 @@ | |||||
| #include "config.h" | #include "config.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| #include "dsputil_x86.h" | |||||
| #include "huffyuvdsp.h" | |||||
| #if HAVE_INLINE_ASM | #if HAVE_INLINE_ASM | ||||
| #if HAVE_7REGS | #if HAVE_7REGS | ||||
| void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top) | |||||
| void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, | |||||
| const uint8_t *diff, int w, | |||||
| int *left, int *left_top) | |||||
| { | { | ||||
| x86_reg w2 = -w; | x86_reg w2 = -w; | ||||
| x86_reg x; | x86_reg x; | ||||
| @@ -62,4 +62,30 @@ void ff_add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top, | |||||
| } | } | ||||
| #endif | #endif | ||||
| void ff_add_bytes_mmx(uint8_t *dst, uint8_t *src, int w) | |||||
| { | |||||
| x86_reg i = 0; | |||||
| __asm__ volatile ( | |||||
| "jmp 2f \n\t" | |||||
| "1: \n\t" | |||||
| "movq (%1, %0), %%mm0 \n\t" | |||||
| "movq (%2, %0), %%mm1 \n\t" | |||||
| "paddb %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm1, (%2, %0) \n\t" | |||||
| "movq 8(%1, %0), %%mm0 \n\t" | |||||
| "movq 8(%2, %0), %%mm1 \n\t" | |||||
| "paddb %%mm0, %%mm1 \n\t" | |||||
| "movq %%mm1, 8(%2, %0) \n\t" | |||||
| "add $16, %0 \n\t" | |||||
| "2: \n\t" | |||||
| "cmp %3, %0 \n\t" | |||||
| "js 1b \n\t" | |||||
| : "+r" (i) | |||||
| : "r" (src), "r" (dst), "r" ((x86_reg) w - 15)); | |||||
| for (; i < w; i++) | |||||
| dst[i + 0] += src[i + 0]; | |||||
| } | |||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||