* commit '8d686ca59db14900ad5c12b547fb8a7afc8b0b94': dsputil: Split off *_8x8basis to a separate context Conflicts: libavcodec/dsputil.c libavcodec/mpegvideo_enc.c libavcodec/x86/dsputilenc_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n2.3
| @@ -77,7 +77,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ | |||||
| OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ | OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ | ||||
| mpegvideo_motion.o mpegutils.o | mpegvideo_motion.o mpegutils.o | ||||
| OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | ||||
| motion_est.o ratecontrol.o | |||||
| motion_est.o ratecontrol.o \ | |||||
| mpegvideoencdsp.o | |||||
| OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | ||||
| OBJS-$(CONFIG_RANGECODER) += rangecoder.o | OBJS-$(CONFIG_RANGECODER) += rangecoder.o | ||||
| RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | ||||
| @@ -465,35 +465,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int | |||||
| return score1 + FFABS(score2) * 8; | return score1 + FFABS(score2) * 8; | ||||
| } | } | ||||
| static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], | |||||
| int16_t basis[64], int scale) | |||||
| { | |||||
| int i; | |||||
| unsigned int sum = 0; | |||||
| for (i = 0; i < 8 * 8; i++) { | |||||
| int b = rem[i] + ((basis[i] * scale + | |||||
| (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||||
| (BASIS_SHIFT - RECON_SHIFT)); | |||||
| int w = weight[i]; | |||||
| b >>= RECON_SHIFT; | |||||
| av_assert2(-512 < b && b < 512); | |||||
| sum += (w * b) * (w * b) >> 4; | |||||
| } | |||||
| return sum >> 2; | |||||
| } | |||||
| static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < 8 * 8; i++) | |||||
| rem[i] += (basis[i] * scale + | |||||
| (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||||
| (BASIS_SHIFT - RECON_SHIFT); | |||||
| } | |||||
| static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, | static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, | ||||
| int stride, int h) | int stride, int h) | ||||
| { | { | ||||
| @@ -1170,9 +1141,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||||
| ff_dsputil_init_dwt(c); | ff_dsputil_init_dwt(c); | ||||
| #endif | #endif | ||||
| c->try_8x8basis = try_8x8basis_c; | |||||
| c->add_8x8basis = add_8x8basis_c; | |||||
| c->shrink[0] = av_image_copy_plane; | c->shrink[0] = av_image_copy_plane; | ||||
| c->shrink[1] = ff_shrink22; | c->shrink[1] = ff_shrink22; | ||||
| c->shrink[2] = ff_shrink44; | c->shrink[2] = ff_shrink44; | ||||
| @@ -103,12 +103,6 @@ typedef struct DSPContext { | |||||
| void (*fdct)(int16_t *block /* align 16 */); | void (*fdct)(int16_t *block /* align 16 */); | ||||
| void (*fdct248)(int16_t *block /* align 16 */); | void (*fdct248)(int16_t *block /* align 16 */); | ||||
| int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | |||||
| int16_t basis[64], int scale); | |||||
| void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |||||
| #define BASIS_SHIFT 16 | |||||
| #define RECON_SHIFT 6 | |||||
| void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, | void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, | ||||
| int w, int h, int sides); | int w, int h, int sides); | ||||
| #define EDGE_WIDTH 16 | #define EDGE_WIDTH 16 | ||||
| @@ -38,6 +38,7 @@ | |||||
| #include "hpeldsp.h" | #include "hpeldsp.h" | ||||
| #include "idctdsp.h" | #include "idctdsp.h" | ||||
| #include "mpegvideodsp.h" | #include "mpegvideodsp.h" | ||||
| #include "mpegvideoencdsp.h" | |||||
| #include "put_bits.h" | #include "put_bits.h" | ||||
| #include "ratecontrol.h" | #include "ratecontrol.h" | ||||
| #include "parser.h" | #include "parser.h" | ||||
| @@ -365,6 +366,7 @@ typedef struct MpegEncContext { | |||||
| HpelDSPContext hdsp; | HpelDSPContext hdsp; | ||||
| IDCTDSPContext idsp; | IDCTDSPContext idsp; | ||||
| MpegVideoDSPContext mdsp; | MpegVideoDSPContext mdsp; | ||||
| MpegvideoEncDSPContext mpvencdsp; | |||||
| QpelDSPContext qdsp; | QpelDSPContext qdsp; | ||||
| VideoDSPContext vdsp; | VideoDSPContext vdsp; | ||||
| H263DSPContext h263dsp; | H263DSPContext h263dsp; | ||||
| @@ -818,6 +818,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) | |||||
| if (ff_MPV_common_init(s) < 0) | if (ff_MPV_common_init(s) < 0) | ||||
| return -1; | return -1; | ||||
| ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | |||||
| ff_qpeldsp_init(&s->qdsp); | ff_qpeldsp_init(&s->qdsp); | ||||
| s->avctx->coded_frame = s->current_picture.f; | s->avctx->coded_frame = s->current_picture.f; | ||||
| @@ -4090,7 +4091,7 @@ STOP_TIMER("memset rem[]")} | |||||
| run_tab[rle_index++]=run; | run_tab[rle_index++]=run; | ||||
| run=0; | run=0; | ||||
| s->dsp.add_8x8basis(rem, basis[j], coeff); | |||||
| s->mpvencdsp.add_8x8basis(rem, basis[j], coeff); | |||||
| }else{ | }else{ | ||||
| run++; | run++; | ||||
| } | } | ||||
| @@ -4104,7 +4105,7 @@ STOP_TIMER("init rem[]") | |||||
| {START_TIMER | {START_TIMER | ||||
| #endif | #endif | ||||
| for(;;){ | for(;;){ | ||||
| int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0); | |||||
| int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0); | |||||
| int best_coeff=0; | int best_coeff=0; | ||||
| int best_change=0; | int best_change=0; | ||||
| int run2, best_unquant_change=0, analyze_gradient; | int run2, best_unquant_change=0, analyze_gradient; | ||||
| @@ -4148,7 +4149,8 @@ STOP_TIMER("dct")} | |||||
| if(new_coeff >= 2048 || new_coeff < 0) | if(new_coeff >= 2048 || new_coeff < 0) | ||||
| continue; | continue; | ||||
| score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff); | |||||
| score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], | |||||
| new_coeff - old_coeff); | |||||
| if(score<best_score){ | if(score<best_score){ | ||||
| best_score= score; | best_score= score; | ||||
| best_coeff= 0; | best_coeff= 0; | ||||
| @@ -4271,7 +4273,8 @@ STOP_TIMER("dct")} | |||||
| unquant_change= new_coeff - old_coeff; | unquant_change= new_coeff - old_coeff; | ||||
| av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0); | av_assert2((score < 100*lambda && score > -100*lambda) || lambda==0); | ||||
| score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change); | |||||
| score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j], | |||||
| unquant_change); | |||||
| if(score<best_score){ | if(score<best_score){ | ||||
| best_score= score; | best_score= score; | ||||
| best_coeff= i; | best_coeff= i; | ||||
| @@ -4345,7 +4348,7 @@ if(256*256*256*64 % count == 0){ | |||||
| } | } | ||||
| } | } | ||||
| s->dsp.add_8x8basis(rem, basis[j], best_unquant_change); | |||||
| s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change); | |||||
| }else{ | }else{ | ||||
| break; | break; | ||||
| } | } | ||||
| @@ -0,0 +1,65 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include <assert.h> | |||||
| #include <stdint.h> | |||||
| #include "config.h" | |||||
| #include "libavutil/avassert.h" | |||||
| #include "libavutil/attributes.h" | |||||
| #include "avcodec.h" | |||||
| #include "mpegvideoencdsp.h" | |||||
| static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], | |||||
| int16_t basis[64], int scale) | |||||
| { | |||||
| int i; | |||||
| unsigned int sum = 0; | |||||
| for (i = 0; i < 8 * 8; i++) { | |||||
| int b = rem[i] + ((basis[i] * scale + | |||||
| (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||||
| (BASIS_SHIFT - RECON_SHIFT)); | |||||
| int w = weight[i]; | |||||
| b >>= RECON_SHIFT; | |||||
| av_assert2(-512 < b && b < 512); | |||||
| sum += (w * b) * (w * b) >> 4; | |||||
| } | |||||
| return sum >> 2; | |||||
| } | |||||
| static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale) | |||||
| { | |||||
| int i; | |||||
| for (i = 0; i < 8 * 8; i++) | |||||
| rem[i] += (basis[i] * scale + | |||||
| (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||||
| (BASIS_SHIFT - RECON_SHIFT); | |||||
| } | |||||
| av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||||
| AVCodecContext *avctx) | |||||
| { | |||||
| c->try_8x8basis = try_8x8basis_c; | |||||
| c->add_8x8basis = add_8x8basis_c; | |||||
| if (ARCH_X86) | |||||
| ff_mpegvideoencdsp_init_x86(c, avctx); | |||||
| } | |||||
| @@ -0,0 +1,41 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #ifndef AVCODEC_MPEGVIDEOENCDSP_H | |||||
| #define AVCODEC_MPEGVIDEOENCDSP_H | |||||
| #include <stdint.h> | |||||
| #include "avcodec.h" | |||||
| #define BASIS_SHIFT 16 | |||||
| #define RECON_SHIFT 6 | |||||
| typedef struct MpegvideoEncDSPContext { | |||||
| int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | |||||
| int16_t basis[64], int scale); | |||||
| void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |||||
| } MpegvideoEncDSPContext; | |||||
| void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||||
| AVCodecContext *avctx); | |||||
| void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||||
| AVCodecContext *avctx); | |||||
| #endif /* AVCODEC_MPEGVIDEOENCDSP_H */ | |||||
| @@ -28,7 +28,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o | |||||
| OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | ||||
| OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | ||||
| x86/mpegvideodsp.o | x86/mpegvideodsp.o | ||||
| OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o | |||||
| OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ | |||||
| x86/mpegvideoencdsp_init.o | |||||
| OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | ||||
| OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | ||||
| OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | ||||
| @@ -352,72 +352,6 @@ static int vsad16_mmxext(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||||
| #undef SUM | #undef SUM | ||||
| #define PHADDD(a, t) \ | |||||
| "movq " #a ", " #t " \n\t" \ | |||||
| "psrlq $32, " #a " \n\t" \ | |||||
| "paddd " #t ", " #a " \n\t" | |||||
| /* | |||||
| * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31] | |||||
| * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31] | |||||
| * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30] | |||||
| */ | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhw " #s ", " #x " \n\t" \ | |||||
| "pmulhw " #s ", " #y " \n\t" \ | |||||
| "paddw " #o ", " #x " \n\t" \ | |||||
| "paddw " #o ", " #y " \n\t" \ | |||||
| "psraw $1, " #x " \n\t" \ | |||||
| "psraw $1, " #y " \n\t" | |||||
| #define DEF(x) x ## _mmx | |||||
| #define SET_RND MOVQ_WONE | |||||
| #define SCALE_OFFSET 1 | |||||
| #include "dsputil_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #define DEF(x) x ## _3dnow | |||||
| #define SET_RND(x) | |||||
| #define SCALE_OFFSET 0 | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhrw " #s ", " #x " \n\t" \ | |||||
| "pmulhrw " #s ", " #y " \n\t" | |||||
| #include "dsputil_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #if HAVE_SSSE3_INLINE | |||||
| #undef PHADDD | |||||
| #define DEF(x) x ## _ssse3 | |||||
| #define SET_RND(x) | |||||
| #define SCALE_OFFSET -1 | |||||
| #define PHADDD(a, t) \ | |||||
| "pshufw $0x0E, " #a ", " #t " \n\t" \ | |||||
| /* faster than phaddd on core2 */ \ | |||||
| "paddd " #t ", " #a " \n\t" | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhrsw " #s ", " #x " \n\t" \ | |||||
| "pmulhrsw " #s ", " #y " \n\t" | |||||
| #include "dsputil_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #undef PHADDD | |||||
| #endif /* HAVE_SSSE3_INLINE */ | |||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | ||||
| @@ -448,16 +382,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | ||||
| c->vsad[0] = vsad16_mmx; | c->vsad[0] = vsad16_mmx; | ||||
| c->try_8x8basis = try_8x8basis_mmx; | |||||
| } | |||||
| c->add_8x8basis = add_8x8basis_mmx; | |||||
| } | |||||
| if (INLINE_AMD3DNOW(cpu_flags)) { | |||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->try_8x8basis = try_8x8basis_3dnow; | |||||
| } | } | ||||
| c->add_8x8basis = add_8x8basis_3dnow; | |||||
| } | } | ||||
| if (INLINE_MMXEXT(cpu_flags)) { | if (INLINE_MMXEXT(cpu_flags)) { | ||||
| @@ -480,10 +405,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||||
| #if HAVE_SSSE3_INLINE | #if HAVE_SSSE3_INLINE | ||||
| if (INLINE_SSSE3(cpu_flags)) { | if (INLINE_SSSE3(cpu_flags)) { | ||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->try_8x8basis = try_8x8basis_ssse3; | |||||
| } | |||||
| c->add_8x8basis = add_8x8basis_ssse3; | |||||
| } | } | ||||
| #endif | #endif | ||||
| #endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
| @@ -1,5 +1,5 @@ | |||||
| /* | /* | ||||
| * DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3 | |||||
| * QNS functions are compiled 3 times for MMX/3DNOW/SSSE3 | |||||
| * Copyright (c) 2004 Michael Niedermayer | * Copyright (c) 2004 Michael Niedermayer | ||||
| * | * | ||||
| * MMX optimization by Michael Niedermayer <michaelni@gmx.at> | * MMX optimization by Michael Niedermayer <michaelni@gmx.at> | ||||
| @@ -22,9 +22,9 @@ | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
| */ | */ | ||||
| #include <assert.h> | |||||
| #include <stdint.h> | #include <stdint.h> | ||||
| #include "libavutil/avassert.h" | |||||
| #include "libavutil/common.h" | #include "libavutil/common.h" | ||||
| #include "libavutil/x86/asm.h" | #include "libavutil/x86/asm.h" | ||||
| @@ -0,0 +1,125 @@ | |||||
| /* | |||||
| * This file is part of FFmpeg. | |||||
| * | |||||
| * FFmpeg is free software; you can redistribute it and/or | |||||
| * modify it under the terms of the GNU Lesser General Public | |||||
| * License as published by the Free Software Foundation; either | |||||
| * version 2.1 of the License, or (at your option) any later version. | |||||
| * | |||||
| * FFmpeg is distributed in the hope that it will be useful, | |||||
| * but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
| * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
| * Lesser General Public License for more details. | |||||
| * | |||||
| * You should have received a copy of the GNU Lesser General Public | |||||
| * License along with FFmpeg; if not, write to the Free Software | |||||
| * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
| */ | |||||
| #include "libavutil/attributes.h" | |||||
| #include "libavutil/cpu.h" | |||||
| #include "libavutil/x86/cpu.h" | |||||
| #include "libavcodec/avcodec.h" | |||||
| #include "libavcodec/mpegvideoencdsp.h" | |||||
| #if HAVE_INLINE_ASM | |||||
| #define PHADDD(a, t) \ | |||||
| "movq " #a ", " #t " \n\t" \ | |||||
| "psrlq $32, " #a " \n\t" \ | |||||
| "paddd " #t ", " #a " \n\t" | |||||
| /* | |||||
| * pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31] | |||||
| * pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31] | |||||
| * pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30] | |||||
| */ | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhw " #s ", " #x " \n\t" \ | |||||
| "pmulhw " #s ", " #y " \n\t" \ | |||||
| "paddw " #o ", " #x " \n\t" \ | |||||
| "paddw " #o ", " #y " \n\t" \ | |||||
| "psraw $1, " #x " \n\t" \ | |||||
| "psraw $1, " #y " \n\t" | |||||
| #define DEF(x) x ## _mmx | |||||
| #define SET_RND MOVQ_WONE | |||||
| #define SCALE_OFFSET 1 | |||||
| #include "mpegvideoenc_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #define DEF(x) x ## _3dnow | |||||
| #define SET_RND(x) | |||||
| #define SCALE_OFFSET 0 | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhrw " #s ", " #x " \n\t" \ | |||||
| "pmulhrw " #s ", " #y " \n\t" | |||||
| #include "mpegvideoenc_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #if HAVE_SSSE3_INLINE | |||||
| #undef PHADDD | |||||
| #define DEF(x) x ## _ssse3 | |||||
| #define SET_RND(x) | |||||
| #define SCALE_OFFSET -1 | |||||
| #define PHADDD(a, t) \ | |||||
| "pshufw $0x0E, " #a ", " #t " \n\t" \ | |||||
| /* faster than phaddd on core2 */ \ | |||||
| "paddd " #t ", " #a " \n\t" | |||||
| #define PMULHRW(x, y, s, o) \ | |||||
| "pmulhrsw " #s ", " #x " \n\t" \ | |||||
| "pmulhrsw " #s ", " #y " \n\t" | |||||
| #include "mpegvideoenc_qns_template.c" | |||||
| #undef DEF | |||||
| #undef SET_RND | |||||
| #undef SCALE_OFFSET | |||||
| #undef PMULHRW | |||||
| #undef PHADDD | |||||
| #endif /* HAVE_SSSE3_INLINE */ | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||||
| AVCodecContext *avctx) | |||||
| { | |||||
| #if HAVE_INLINE_ASM | |||||
| int cpu_flags = av_get_cpu_flags(); | |||||
| if (INLINE_MMX(cpu_flags)) { | |||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->try_8x8basis = try_8x8basis_mmx; | |||||
| } | |||||
| c->add_8x8basis = add_8x8basis_mmx; | |||||
| } | |||||
| if (INLINE_AMD3DNOW(cpu_flags)) { | |||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->try_8x8basis = try_8x8basis_3dnow; | |||||
| } | |||||
| c->add_8x8basis = add_8x8basis_3dnow; | |||||
| } | |||||
| #if HAVE_SSSE3_INLINE | |||||
| if (INLINE_SSSE3(cpu_flags)) { | |||||
| if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||||
| c->try_8x8basis = try_8x8basis_ssse3; | |||||
| } | |||||
| c->add_8x8basis = add_8x8basis_ssse3; | |||||
| } | |||||
| #endif /* HAVE_SSSE3_INLINE */ | |||||
| #endif /* HAVE_INLINE_ASM */ | |||||
| } | |||||