* commit '3c650efb81aaa3b395ba4606ee68a47ee4efb57b': dsputil: Move draw_edges() to mpegvideoencdsp Conflicts: libavcodec/mpegvideo_enc.c libavcodec/x86/Makefile libavcodec/x86/dsputil_init.c libavcodec/x86/dsputil_mmx.c libavcodec/x86/dsputil_x86.h Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -34,6 +34,8 @@ | |||
| #include "golomb.h" | |||
| #include "dirac_arith.h" | |||
| #include "mpeg12data.h" | |||
| #include "libavcodec/mpegvideo.h" | |||
| #include "mpegvideoencdsp.h" | |||
| #include "dirac_dwt.h" | |||
| #include "dirac.h" | |||
| #include "diracdsp.h" | |||
| @@ -137,6 +139,7 @@ typedef struct Plane { | |||
| typedef struct DiracContext { | |||
| AVCodecContext *avctx; | |||
| DSPContext dsp; | |||
| MpegvideoEncDSPContext mpvencdsp; | |||
| DiracDSPContext diracdsp; | |||
| GetBitContext gb; | |||
| dirac_source_params source; | |||
| @@ -424,6 +427,7 @@ static av_cold int dirac_decode_init(AVCodecContext *avctx) | |||
| ff_dsputil_init(&s->dsp, avctx); | |||
| ff_diracdsp_init(&s->diracdsp); | |||
| ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | |||
| for (i = 0; i < MAX_FRAMES; i++) { | |||
| s->all_frames[i].avframe = av_frame_alloc(); | |||
| @@ -1556,7 +1560,7 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in | |||
| int i, edge = EDGE_WIDTH/2; | |||
| ref->hpel[plane][0] = ref->avframe->data[plane]; | |||
| s->dsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ | |||
| s->mpvencdsp.draw_edges(ref->hpel[plane][0], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); /* EDGE_TOP | EDGE_BOTTOM values just copied to make it build, this needs to be ensured */ | |||
| /* no need for hpel if we only have fpel vectors */ | |||
| if (!s->mv_precision) | |||
| @@ -1573,9 +1577,9 @@ static void interpolate_refplane(DiracContext *s, DiracFrame *ref, int plane, in | |||
| s->diracdsp.dirac_hpel_filter(ref->hpel[plane][1], ref->hpel[plane][2], | |||
| ref->hpel[plane][3], ref->hpel[plane][0], | |||
| ref->avframe->linesize[plane], width, height); | |||
| s->dsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| s->dsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| s->dsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(ref->hpel[plane][1], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(ref->hpel[plane][2], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(ref->hpel[plane][3], ref->avframe->linesize[plane], width, height, edge, edge, EDGE_TOP | EDGE_BOTTOM); | |||
| } | |||
| ref->interpolated[plane] = 1; | |||
| } | |||
| @@ -937,34 +937,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||
| WRAPPER8_16_SQ(rd8x8_c, rd16_c) | |||
| WRAPPER8_16_SQ(bit8x8_c, bit16_c) | |||
| /* draw the edges of width 'w' of an image of size width, height */ | |||
| // FIXME: Check that this is OK for MPEG-4 interlaced. | |||
| static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides) | |||
| { | |||
| uint8_t *ptr = buf, *last_line; | |||
| int i; | |||
| /* left and right */ | |||
| for (i = 0; i < height; i++) { | |||
| memset(ptr - w, ptr[0], w); | |||
| memset(ptr + width, ptr[width - 1], w); | |||
| ptr += wrap; | |||
| } | |||
| /* top and bottom + corners */ | |||
| buf -= w; | |||
| last_line = buf + (height - 1) * wrap; | |||
| if (sides & EDGE_TOP) | |||
| for (i = 0; i < h; i++) | |||
| // top | |||
| memcpy(buf - (i + 1) * wrap, buf, width + w + w); | |||
| if (sides & EDGE_BOTTOM) | |||
| for (i = 0; i < h; i++) | |||
| // bottom | |||
| memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); | |||
| } | |||
| /* init static data */ | |||
| av_cold void ff_dsputil_static_init(void) | |||
| { | |||
| @@ -1067,8 +1039,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
| ff_dsputil_init_dwt(c); | |||
| #endif | |||
| c->draw_edges = draw_edges_8_c; | |||
| switch (avctx->bits_per_raw_sample) { | |||
| case 9: | |||
| case 10: | |||
| @@ -99,12 +99,6 @@ typedef struct DSPContext { | |||
| /* (I)DCT */ | |||
| void (*fdct)(int16_t *block /* align 16 */); | |||
| void (*fdct248)(int16_t *block /* align 16 */); | |||
| void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides); | |||
| #define EDGE_WIDTH 16 | |||
| #define EDGE_TOP 1 | |||
| #define EDGE_BOTTOM 2 | |||
| } DSPContext; | |||
| void ff_dsputil_static_init(void); | |||
| @@ -81,6 +81,8 @@ enum OutputFormat { | |||
| #define INPLACE_OFFSET 16 | |||
| #define EDGE_WIDTH 16 | |||
| /* Start codes. */ | |||
| #define SEQ_END_CODE 0x000001b7 | |||
| #define SEQ_START_CODE 0x000001b3 | |||
| @@ -1145,11 +1145,11 @@ static int load_input_picture(MpegEncContext *s, const AVFrame *pic_arg) | |||
| } | |||
| } | |||
| if ((s->width & 15) || (s->height & (vpad-1))) { | |||
| s->dsp.draw_edges(dst, dst_stride, | |||
| w, h, | |||
| 16>>h_shift, | |||
| vpad>>v_shift, | |||
| EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(dst, dst_stride, | |||
| w, h, | |||
| 16>>h_shift, | |||
| vpad>>v_shift, | |||
| EDGE_BOTTOM); | |||
| } | |||
| } | |||
| } | |||
| @@ -1529,18 +1529,25 @@ static void frame_end(MpegEncContext *s) | |||
| const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(s->avctx->pix_fmt); | |||
| int hshift = desc->log2_chroma_w; | |||
| int vshift = desc->log2_chroma_h; | |||
| s->dsp.draw_edges(s->current_picture.f->data[0], s->current_picture.f->linesize[0], | |||
| s->h_edge_pos, s->v_edge_pos, | |||
| EDGE_WIDTH, EDGE_WIDTH, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->dsp.draw_edges(s->current_picture.f->data[1], s->current_picture.f->linesize[1], | |||
| s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, | |||
| EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->dsp.draw_edges(s->current_picture.f->data[2], s->current_picture.f->linesize[2], | |||
| s->h_edge_pos >> hshift, s->v_edge_pos >> vshift, | |||
| EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture.f->data[0], | |||
| s->current_picture.f->linesize[0], | |||
| s->h_edge_pos, s->v_edge_pos, | |||
| EDGE_WIDTH, EDGE_WIDTH, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture.f->data[1], | |||
| s->current_picture.f->linesize[1], | |||
| s->h_edge_pos >> hshift, | |||
| s->v_edge_pos >> vshift, | |||
| EDGE_WIDTH >> hshift, | |||
| EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture.f->data[2], | |||
| s->current_picture.f->linesize[2], | |||
| s->h_edge_pos >> hshift, | |||
| s->v_edge_pos >> vshift, | |||
| EDGE_WIDTH >> hshift, | |||
| EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| } | |||
| emms_c(); | |||
| @@ -18,6 +18,7 @@ | |||
| #include <assert.h> | |||
| #include <stdint.h> | |||
| #include <string.h> | |||
| #include "config.h" | |||
| #include "libavutil/avassert.h" | |||
| @@ -125,6 +126,34 @@ static int pix_norm1_c(uint8_t *pix, int line_size) | |||
| return s; | |||
| } | |||
| /* draw the edges of width 'w' of an image of size width, height */ | |||
| // FIXME: Check that this is OK for MPEG-4 interlaced. | |||
| static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides) | |||
| { | |||
| uint8_t *ptr = buf, *last_line; | |||
| int i; | |||
| /* left and right */ | |||
| for (i = 0; i < height; i++) { | |||
| memset(ptr - w, ptr[0], w); | |||
| memset(ptr + width, ptr[width - 1], w); | |||
| ptr += wrap; | |||
| } | |||
| /* top and bottom + corners */ | |||
| buf -= w; | |||
| last_line = buf + (height - 1) * wrap; | |||
| if (sides & EDGE_TOP) | |||
| for (i = 0; i < h; i++) | |||
| // top | |||
| memcpy(buf - (i + 1) * wrap, buf, width + w + w); | |||
| if (sides & EDGE_BOTTOM) | |||
| for (i = 0; i < h; i++) | |||
| // bottom | |||
| memcpy(last_line + (i + 1) * wrap, last_line, width + w + w); | |||
| } | |||
| av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||
| AVCodecContext *avctx) | |||
| { | |||
| @@ -139,6 +168,8 @@ av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||
| c->pix_sum = pix_sum_c; | |||
| c->pix_norm1 = pix_norm1_c; | |||
| c->draw_edges = draw_edges_8_c; | |||
| if (ARCH_ARM) | |||
| ff_mpegvideoencdsp_init_arm(c, avctx); | |||
| if (ARCH_PPC) | |||
| @@ -26,6 +26,9 @@ | |||
| #define BASIS_SHIFT 16 | |||
| #define RECON_SHIFT 6 | |||
| #define EDGE_TOP 1 | |||
| #define EDGE_BOTTOM 2 | |||
| typedef struct MpegvideoEncDSPContext { | |||
| int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | |||
| int16_t basis[64], int scale); | |||
| @@ -36,6 +39,9 @@ typedef struct MpegvideoEncDSPContext { | |||
| void (*shrink[4])(uint8_t *dst, int dst_wrap, const uint8_t *src, | |||
| int src_wrap, int width, int height); | |||
| void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides); | |||
| } MpegvideoEncDSPContext; | |||
| void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||
| @@ -433,6 +433,7 @@ av_cold int ff_snow_common_init(AVCodecContext *avctx){ | |||
| ff_videodsp_init(&s->vdsp, 8); | |||
| ff_dwt_init(&s->dwt); | |||
| ff_h264qpel_init(&s->h264qpel, 8); | |||
| ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | |||
| #define mcf(dx,dy)\ | |||
| s->qdsp.put_qpel_pixels_tab [0][dy+dx/4]=\ | |||
| @@ -642,16 +643,16 @@ int ff_snow_frame_start(SnowContext *s){ | |||
| int h= s->avctx->height; | |||
| if (s->current_picture->data[0] && !(s->avctx->flags&CODEC_FLAG_EMU_EDGE)) { | |||
| s->dsp.draw_edges(s->current_picture->data[0], | |||
| s->current_picture->linesize[0], w , h , | |||
| EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture->data[0], | |||
| s->current_picture->linesize[0], w , h , | |||
| EDGE_WIDTH , EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM); | |||
| if (s->current_picture->data[2]) { | |||
| s->dsp.draw_edges(s->current_picture->data[1], | |||
| s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, | |||
| EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); | |||
| s->dsp.draw_edges(s->current_picture->data[2], | |||
| s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, | |||
| EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture->data[1], | |||
| s->current_picture->linesize[1], w>>s->chroma_h_shift, h>>s->chroma_v_shift, | |||
| EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->current_picture->data[2], | |||
| s->current_picture->linesize[2], w>>s->chroma_h_shift, h>>s->chroma_v_shift, | |||
| EDGE_WIDTH>>s->chroma_h_shift, EDGE_WIDTH>>s->chroma_v_shift, EDGE_TOP | EDGE_BOTTOM); | |||
| } | |||
| } | |||
| @@ -115,6 +115,7 @@ typedef struct SnowContext{ | |||
| QpelDSPContext qdsp; | |||
| VideoDSPContext vdsp; | |||
| H264QpelContext h264qpel; | |||
| MpegvideoEncDSPContext mpvencdsp; | |||
| SnowDWTContext dwt; | |||
| AVFrame *new_picture; | |||
| AVFrame *input_picture; ///< new_picture with the internal linesizes | |||
| @@ -1568,10 +1568,10 @@ static int encode_frame(AVCodecContext *avctx, AVPacket *pkt, | |||
| memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]], | |||
| &pict->data[i][y * pict->linesize[i]], | |||
| width>>hshift); | |||
| s->dsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], | |||
| width >> hshift, height >> vshift, | |||
| EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| s->mpvencdsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i], | |||
| width >> hshift, height >> vshift, | |||
| EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift, | |||
| EDGE_TOP | EDGE_BOTTOM); | |||
| } | |||
| emms_c(); | |||
| @@ -43,6 +43,7 @@ | |||
| #include "avcodec.h" | |||
| #include "dsputil.h" | |||
| #include "libavutil/opt.h" | |||
| #include "mpegvideo.h" | |||
| #include "thread.h" | |||
| #include "frame_thread_encoder.h" | |||
| #include "internal.h" | |||
| @@ -58,7 +58,6 @@ OBJS-$(CONFIG_VP8_DECODER) += x86/vp8dsp_init.o | |||
| OBJS-$(CONFIG_VP9_DECODER) += x86/vp9dsp_init.o | |||
| OBJS-$(CONFIG_WEBP_DECODER) += x86/vp8dsp_init.o | |||
| MMX-OBJS-$(CONFIG_DSPUTIL) += x86/dsputil_mmx.o | |||
| MMX-OBJS-$(CONFIG_DIRAC_DECODER) += x86/dirac_dwt.o | |||
| MMX-OBJS-$(CONFIG_ENCODERS) += x86/fdct.o | |||
| MMX-OBJS-$(CONFIG_IDCTDSP) += x86/idctdsp_mmx.o \ | |||
| @@ -27,25 +27,9 @@ | |||
| #include "libavcodec/dsputil.h" | |||
| #include "dsputil_x86.h" | |||
| static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
| int cpu_flags, unsigned high_bit_depth) | |||
| { | |||
| #if HAVE_MMX_INLINE | |||
| if (!high_bit_depth) { | |||
| c->draw_edges = ff_draw_edges_mmx; | |||
| } | |||
| #endif /* HAVE_MMX_INLINE */ | |||
| } | |||
| av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth) | |||
| { | |||
| int cpu_flags = av_get_cpu_flags(); | |||
| if (X86_MMX(cpu_flags)) | |||
| dsputil_init_mmx(c, avctx, cpu_flags, high_bit_depth); | |||
| if (CONFIG_ENCODERS) | |||
| ff_dsputilenc_init_mmx(c, avctx, high_bit_depth); | |||
| } | |||
| @@ -1,150 +0,0 @@ | |||
| /* | |||
| * MMX optimized DSP utils | |||
| * Copyright (c) 2000, 2001 Fabrice Bellard | |||
| * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |||
| * | |||
| * This file is part of FFmpeg. | |||
| * | |||
| * FFmpeg is free software; you can redistribute it and/or | |||
| * modify it under the terms of the GNU Lesser General Public | |||
| * License as published by the Free Software Foundation; either | |||
| * version 2.1 of the License, or (at your option) any later version. | |||
| * | |||
| * FFmpeg is distributed in the hope that it will be useful, | |||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
| * Lesser General Public License for more details. | |||
| * | |||
| * You should have received a copy of the GNU Lesser General Public | |||
| * License along with FFmpeg; if not, write to the Free Software | |||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
| * | |||
| * MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |||
| */ | |||
| #include "config.h" | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/asm.h" | |||
| #include "libavcodec/pixels.h" | |||
| #include "libavcodec/videodsp.h" | |||
| #include "dsputil_x86.h" | |||
| #include "inline_asm.h" | |||
| #if HAVE_INLINE_ASM | |||
| /* Draw the edges of width 'w' of an image of size width, height | |||
| * this MMX version can only handle w == 8 || w == 16. */ | |||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides) | |||
| { | |||
| uint8_t *ptr, *last_line; | |||
| int i; | |||
| last_line = buf + (height - 1) * wrap; | |||
| /* left and right */ | |||
| ptr = buf; | |||
| if (w == 8) { | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) wrap), "r" ((x86_reg) width), | |||
| "r" (ptr + wrap * height)); | |||
| } else if (w == 16) { | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq %%mm0, -16(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "movq %%mm1, 8(%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r"(ptr) | |||
| : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) | |||
| ); | |||
| } else { | |||
| av_assert1(w == 4); | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "movd %%mm0, -4(%0) \n\t" | |||
| "movd -4(%0, %2), %%mm1 \n\t" | |||
| "punpcklbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movd %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) wrap), "r" ((x86_reg) width), | |||
| "r" (ptr + wrap * height)); | |||
| } | |||
| /* top and bottom (and hopefully also the corners) */ | |||
| if (sides & EDGE_TOP) { | |||
| for (i = 0; i < h; i += 4) { | |||
| ptr = buf - (i + 1) * wrap - w; | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) buf - (x86_reg) ptr - w), | |||
| "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), | |||
| "r" (ptr + width + 2 * w)); | |||
| } | |||
| } | |||
| if (sides & EDGE_BOTTOM) { | |||
| for (i = 0; i < h; i += 4) { | |||
| ptr = last_line + (i + 1) * wrap - w; | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) last_line - (x86_reg) ptr - w), | |||
| "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), | |||
| "r" (ptr + width + 2 * w)); | |||
| } | |||
| } | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| @@ -31,9 +31,6 @@ void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
| unsigned high_bit_depth); | |||
| void ff_dsputil_init_pix_mmx(DSPContext *c, AVCodecContext *avctx); | |||
| void ff_draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides); | |||
| void ff_mmx_idct(int16_t *block); | |||
| void ff_mmxext_idct(int16_t *block); | |||
| @@ -17,6 +17,7 @@ | |||
| */ | |||
| #include "libavutil/attributes.h" | |||
| #include "libavutil/avassert.h" | |||
| #include "libavutil/cpu.h" | |||
| #include "libavutil/x86/cpu.h" | |||
| #include "libavcodec/avcodec.h" | |||
| @@ -96,6 +97,120 @@ int ff_pix_norm1_sse2(uint8_t *pix, int line_size); | |||
| #undef PHADDD | |||
| #endif /* HAVE_SSSE3_INLINE */ | |||
| /* Draw the edges of width 'w' of an image of size width, height | |||
| * this MMX version can only handle w == 8 || w == 16. */ | |||
| static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, | |||
| int w, int h, int sides) | |||
| { | |||
| uint8_t *ptr, *last_line; | |||
| int i; | |||
| last_line = buf + (height - 1) * wrap; | |||
| /* left and right */ | |||
| ptr = buf; | |||
| if (w == 8) { | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) wrap), "r" ((x86_reg) width), | |||
| "r" (ptr + wrap * height)); | |||
| } else if (w == 16) { | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "punpckldq %%mm0, %%mm0 \n\t" | |||
| "movq %%mm0, -8(%0) \n\t" | |||
| "movq %%mm0, -16(%0) \n\t" | |||
| "movq -8(%0, %2), %%mm1 \n\t" | |||
| "punpckhbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movq %%mm1, (%0, %2) \n\t" | |||
| "movq %%mm1, 8(%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r"(ptr) | |||
| : "r"((x86_reg)wrap), "r"((x86_reg)width), "r"(ptr + wrap * height) | |||
| ); | |||
| } else { | |||
| av_assert1(w == 4); | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movd (%0), %%mm0 \n\t" | |||
| "punpcklbw %%mm0, %%mm0 \n\t" | |||
| "punpcklwd %%mm0, %%mm0 \n\t" | |||
| "movd %%mm0, -4(%0) \n\t" | |||
| "movd -4(%0, %2), %%mm1 \n\t" | |||
| "punpcklbw %%mm1, %%mm1 \n\t" | |||
| "punpckhwd %%mm1, %%mm1 \n\t" | |||
| "punpckhdq %%mm1, %%mm1 \n\t" | |||
| "movd %%mm1, (%0, %2) \n\t" | |||
| "add %1, %0 \n\t" | |||
| "cmp %3, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) wrap), "r" ((x86_reg) width), | |||
| "r" (ptr + wrap * height)); | |||
| } | |||
| /* top and bottom (and hopefully also the corners) */ | |||
| if (sides & EDGE_TOP) { | |||
| for (i = 0; i < h; i += 4) { | |||
| ptr = buf - (i + 1) * wrap - w; | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) buf - (x86_reg) ptr - w), | |||
| "r" ((x86_reg) - wrap), "r" ((x86_reg) - wrap * 3), | |||
| "r" (ptr + width + 2 * w)); | |||
| } | |||
| } | |||
| if (sides & EDGE_BOTTOM) { | |||
| for (i = 0; i < h; i += 4) { | |||
| ptr = last_line + (i + 1) * wrap - w; | |||
| __asm__ volatile ( | |||
| "1: \n\t" | |||
| "movq (%1, %0), %%mm0 \n\t" | |||
| "movq %%mm0, (%0) \n\t" | |||
| "movq %%mm0, (%0, %2) \n\t" | |||
| "movq %%mm0, (%0, %2, 2) \n\t" | |||
| "movq %%mm0, (%0, %3) \n\t" | |||
| "add $8, %0 \n\t" | |||
| "cmp %4, %0 \n\t" | |||
| "jb 1b \n\t" | |||
| : "+r" (ptr) | |||
| : "r" ((x86_reg) last_line - (x86_reg) ptr - w), | |||
| "r" ((x86_reg) wrap), "r" ((x86_reg) wrap * 3), | |||
| "r" (ptr + width + 2 * w)); | |||
| } | |||
| } | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||
| @@ -124,6 +239,10 @@ av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||
| c->try_8x8basis = try_8x8basis_mmx; | |||
| } | |||
| c->add_8x8basis = add_8x8basis_mmx; | |||
| if (avctx->bits_per_raw_sample <= 8) { | |||
| c->draw_edges = draw_edges_mmx; | |||
| } | |||
| } | |||
| if (INLINE_AMD3DNOW(cpu_flags)) { | |||