@@ -67,7 +67,8 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += mpegaudiodsp.o \ | |||
OBJS-$(CONFIG_MPEGVIDEO) += mpegvideo.o mpegvideodsp.o \ | |||
mpegvideo_motion.o mpegutils.o | |||
OBJS-$(CONFIG_MPEGVIDEOENC) += mpegvideo_enc.o mpeg12data.o \ | |||
motion_est.o ratecontrol.o | |||
motion_est.o ratecontrol.o \ | |||
mpegvideoencdsp.o | |||
OBJS-$(CONFIG_QPELDSP) += qpeldsp.o | |||
OBJS-$(CONFIG_RANGECODER) += rangecoder.o | |||
RDFT-OBJS-$(CONFIG_HARDCODED_TABLES) += sin_tables.o | |||
@@ -464,35 +464,6 @@ static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int | |||
return score1 + FFABS(score2) * 8; | |||
} | |||
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], | |||
int16_t basis[64], int scale) | |||
{ | |||
int i; | |||
unsigned int sum = 0; | |||
for (i = 0; i < 8 * 8; i++) { | |||
int b = rem[i] + ((basis[i] * scale + | |||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||
(BASIS_SHIFT - RECON_SHIFT)); | |||
int w = weight[i]; | |||
b >>= RECON_SHIFT; | |||
assert(-512 < b && b < 512); | |||
sum += (w * b) * (w * b) >> 4; | |||
} | |||
return sum >> 2; | |||
} | |||
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale) | |||
{ | |||
int i; | |||
for (i = 0; i < 8 * 8; i++) | |||
rem[i] += (basis[i] * scale + | |||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||
(BASIS_SHIFT - RECON_SHIFT); | |||
} | |||
static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b, | |||
int stride, int h) | |||
{ | |||
@@ -1126,9 +1097,6 @@ av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx) | |||
c->nsse[0] = nsse16_c; | |||
c->nsse[1] = nsse8_c; | |||
c->try_8x8basis = try_8x8basis_c; | |||
c->add_8x8basis = add_8x8basis_c; | |||
c->shrink[0] = av_image_copy_plane; | |||
c->shrink[1] = ff_shrink22; | |||
c->shrink[2] = ff_shrink44; | |||
@@ -87,12 +87,6 @@ typedef struct DSPContext { | |||
void (*fdct)(int16_t *block /* align 16 */); | |||
void (*fdct248)(int16_t *block /* align 16 */); | |||
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | |||
int16_t basis[64], int scale); | |||
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |||
#define BASIS_SHIFT 16 | |||
#define RECON_SHIFT 6 | |||
void (*draw_edges)(uint8_t *buf, int wrap, int width, int height, | |||
int w, int h, int sides); | |||
#define EDGE_WIDTH 16 | |||
@@ -37,6 +37,7 @@ | |||
#include "hpeldsp.h" | |||
#include "idctdsp.h" | |||
#include "mpegvideodsp.h" | |||
#include "mpegvideoencdsp.h" | |||
#include "put_bits.h" | |||
#include "ratecontrol.h" | |||
#include "parser.h" | |||
@@ -355,6 +356,7 @@ typedef struct MpegEncContext { | |||
HpelDSPContext hdsp; | |||
IDCTDSPContext idsp; | |||
MpegVideoDSPContext mdsp; | |||
MpegvideoEncDSPContext mpvencdsp; | |||
QpelDSPContext qdsp; | |||
VideoDSPContext vdsp; | |||
H263DSPContext h263dsp; | |||
@@ -701,6 +701,7 @@ av_cold int ff_MPV_encode_init(AVCodecContext *avctx) | |||
if (ARCH_X86) | |||
ff_MPV_encode_init_x86(s); | |||
ff_mpegvideoencdsp_init(&s->mpvencdsp, avctx); | |||
ff_qpeldsp_init(&s->qdsp); | |||
s->avctx->coded_frame = s->current_picture.f; | |||
@@ -3871,7 +3872,7 @@ STOP_TIMER("memset rem[]")} | |||
run_tab[rle_index++]=run; | |||
run=0; | |||
s->dsp.add_8x8basis(rem, basis[j], coeff); | |||
s->mpvencdsp.add_8x8basis(rem, basis[j], coeff); | |||
}else{ | |||
run++; | |||
} | |||
@@ -3885,7 +3886,7 @@ STOP_TIMER("init rem[]") | |||
{START_TIMER | |||
#endif | |||
for(;;){ | |||
int best_score=s->dsp.try_8x8basis(rem, weight, basis[0], 0); | |||
int best_score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], 0); | |||
int best_coeff=0; | |||
int best_change=0; | |||
int run2, best_unquant_change=0, analyze_gradient; | |||
@@ -3929,7 +3930,8 @@ STOP_TIMER("dct")} | |||
if(new_coeff >= 2048 || new_coeff < 0) | |||
continue; | |||
score= s->dsp.try_8x8basis(rem, weight, basis[0], new_coeff - old_coeff); | |||
score = s->mpvencdsp.try_8x8basis(rem, weight, basis[0], | |||
new_coeff - old_coeff); | |||
if(score<best_score){ | |||
best_score= score; | |||
best_coeff= 0; | |||
@@ -4052,7 +4054,8 @@ STOP_TIMER("dct")} | |||
unquant_change= new_coeff - old_coeff; | |||
assert((score < 100*lambda && score > -100*lambda) || lambda==0); | |||
score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change); | |||
score += s->mpvencdsp.try_8x8basis(rem, weight, basis[j], | |||
unquant_change); | |||
if(score<best_score){ | |||
best_score= score; | |||
best_coeff= i; | |||
@@ -4126,7 +4129,7 @@ if(256*256*256*64 % count == 0){ | |||
} | |||
} | |||
s->dsp.add_8x8basis(rem, basis[j], best_unquant_change); | |||
s->mpvencdsp.add_8x8basis(rem, basis[j], best_unquant_change); | |||
}else{ | |||
break; | |||
} | |||
@@ -0,0 +1,64 @@ | |||
/* | |||
* This file is part of Libav. | |||
* | |||
* Libav is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* Libav is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with Libav; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#include <assert.h> | |||
#include <stdint.h> | |||
#include "config.h" | |||
#include "libavutil/attributes.h" | |||
#include "avcodec.h" | |||
#include "mpegvideoencdsp.h" | |||
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], | |||
int16_t basis[64], int scale) | |||
{ | |||
int i; | |||
unsigned int sum = 0; | |||
for (i = 0; i < 8 * 8; i++) { | |||
int b = rem[i] + ((basis[i] * scale + | |||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||
(BASIS_SHIFT - RECON_SHIFT)); | |||
int w = weight[i]; | |||
b >>= RECON_SHIFT; | |||
assert(-512 < b && b < 512); | |||
sum += (w * b) * (w * b) >> 4; | |||
} | |||
return sum >> 2; | |||
} | |||
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale) | |||
{ | |||
int i; | |||
for (i = 0; i < 8 * 8; i++) | |||
rem[i] += (basis[i] * scale + | |||
(1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >> | |||
(BASIS_SHIFT - RECON_SHIFT); | |||
} | |||
av_cold void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||
AVCodecContext *avctx) | |||
{ | |||
c->try_8x8basis = try_8x8basis_c; | |||
c->add_8x8basis = add_8x8basis_c; | |||
if (ARCH_X86) | |||
ff_mpegvideoencdsp_init_x86(c, avctx); | |||
} |
@@ -0,0 +1,41 @@ | |||
/* | |||
* This file is part of Libav. | |||
* | |||
* Libav is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* Libav is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with Libav; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#ifndef AVCODEC_MPEGVIDEOENCDSP_H | |||
#define AVCODEC_MPEGVIDEOENCDSP_H | |||
#include <stdint.h> | |||
#include "avcodec.h" | |||
#define BASIS_SHIFT 16 | |||
#define RECON_SHIFT 6 | |||
typedef struct MpegvideoEncDSPContext { | |||
int (*try_8x8basis)(int16_t rem[64], int16_t weight[64], | |||
int16_t basis[64], int scale); | |||
void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale); | |||
} MpegvideoEncDSPContext; | |||
void ff_mpegvideoencdsp_init(MpegvideoEncDSPContext *c, | |||
AVCodecContext *avctx); | |||
void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||
AVCodecContext *avctx); | |||
#endif /* AVCODEC_MPEGVIDEOENCDSP_H */ |
@@ -23,7 +23,8 @@ OBJS-$(CONFIG_LPC) += x86/lpc.o | |||
OBJS-$(CONFIG_MPEGAUDIODSP) += x86/mpegaudiodsp.o | |||
OBJS-$(CONFIG_MPEGVIDEO) += x86/mpegvideo.o \ | |||
x86/mpegvideodsp.o | |||
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o | |||
OBJS-$(CONFIG_MPEGVIDEOENC) += x86/mpegvideoenc.o \ | |||
x86/mpegvideoencdsp_init.o | |||
OBJS-$(CONFIG_QPELDSP) += x86/qpeldsp_init.o | |||
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | |||
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | |||
@@ -805,72 +805,6 @@ DCT_SAD_FUNC(ssse3) | |||
#undef HSUM | |||
#undef DCT_SAD | |||
#define PHADDD(a, t) \ | |||
"movq " #a ", " #t " \n\t" \ | |||
"psrlq $32, " #a " \n\t" \ | |||
"paddd " #t ", " #a " \n\t" | |||
/* | |||
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31] | |||
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31] | |||
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30] | |||
*/ | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhw " #s ", " #x " \n\t" \ | |||
"pmulhw " #s ", " #y " \n\t" \ | |||
"paddw " #o ", " #x " \n\t" \ | |||
"paddw " #o ", " #y " \n\t" \ | |||
"psraw $1, " #x " \n\t" \ | |||
"psraw $1, " #y " \n\t" | |||
#define DEF(x) x ## _mmx | |||
#define SET_RND MOVQ_WONE | |||
#define SCALE_OFFSET 1 | |||
#include "dsputil_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#define DEF(x) x ## _3dnow | |||
#define SET_RND(x) | |||
#define SCALE_OFFSET 0 | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhrw " #s ", " #x " \n\t" \ | |||
"pmulhrw " #s ", " #y " \n\t" | |||
#include "dsputil_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#if HAVE_SSSE3_INLINE | |||
#undef PHADDD | |||
#define DEF(x) x ## _ssse3 | |||
#define SET_RND(x) | |||
#define SCALE_OFFSET -1 | |||
#define PHADDD(a, t) \ | |||
"pshufw $0x0E, " #a ", " #t " \n\t" \ | |||
/* faster than phaddd on core2 */ \ | |||
"paddd " #t ", " #a " \n\t" | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhrsw " #s ", " #x " \n\t" \ | |||
"pmulhrsw " #s ", " #y " \n\t" | |||
#include "dsputil_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#undef PHADDD | |||
#endif /* HAVE_SSSE3_INLINE */ | |||
#endif /* HAVE_INLINE_ASM */ | |||
int ff_sse16_sse2(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2, | |||
@@ -921,16 +855,7 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
c->nsse[1] = nsse8_mmx; | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->vsad[0] = vsad16_mmx; | |||
c->try_8x8basis = try_8x8basis_mmx; | |||
} | |||
c->add_8x8basis = add_8x8basis_mmx; | |||
} | |||
if (INLINE_AMD3DNOW(cpu_flags)) { | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->try_8x8basis = try_8x8basis_3dnow; | |||
} | |||
c->add_8x8basis = add_8x8basis_3dnow; | |||
} | |||
if (INLINE_MMXEXT(cpu_flags)) { | |||
@@ -956,10 +881,6 @@ av_cold void ff_dsputilenc_init_mmx(DSPContext *c, AVCodecContext *avctx, | |||
#if HAVE_SSSE3_INLINE | |||
if (INLINE_SSSE3(cpu_flags)) { | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->try_8x8basis = try_8x8basis_ssse3; | |||
} | |||
c->add_8x8basis = add_8x8basis_ssse3; | |||
c->sum_abs_dctelem = sum_abs_dctelem_ssse3; | |||
} | |||
#endif | |||
@@ -1,5 +1,5 @@ | |||
/* | |||
* DSP utils : QNS functions are compiled 3 times for mmx/3dnow/ssse3 | |||
* QNS functions are compiled 3 times for MMX/3DNOW/SSSE3 | |||
* Copyright (c) 2004 Michael Niedermayer | |||
* | |||
* MMX optimization by Michael Niedermayer <michaelni@gmx.at> |
@@ -0,0 +1,125 @@ | |||
/* | |||
* This file is part of Libav. | |||
* | |||
* Libav is free software; you can redistribute it and/or | |||
* modify it under the terms of the GNU Lesser General Public | |||
* License as published by the Free Software Foundation; either | |||
* version 2.1 of the License, or (at your option) any later version. | |||
* | |||
* Libav is distributed in the hope that it will be useful, | |||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||
* Lesser General Public License for more details. | |||
* | |||
* You should have received a copy of the GNU Lesser General Public | |||
* License along with Libav; if not, write to the Free Software | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#include "libavutil/attributes.h" | |||
#include "libavutil/cpu.h" | |||
#include "libavutil/x86/cpu.h" | |||
#include "libavcodec/avcodec.h" | |||
#include "libavcodec/mpegvideoencdsp.h" | |||
#if HAVE_INLINE_ASM | |||
#define PHADDD(a, t) \ | |||
"movq " #a ", " #t " \n\t" \ | |||
"psrlq $32, " #a " \n\t" \ | |||
"paddd " #t ", " #a " \n\t" | |||
/* | |||
* pmulhw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15])[16 - 31] | |||
* pmulhrw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x8000)[16 - 31] | |||
* pmulhrsw: dst[0 - 15] = (src[0 - 15] * dst[0 - 15] + 0x4000)[15 - 30] | |||
*/ | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhw " #s ", " #x " \n\t" \ | |||
"pmulhw " #s ", " #y " \n\t" \ | |||
"paddw " #o ", " #x " \n\t" \ | |||
"paddw " #o ", " #y " \n\t" \ | |||
"psraw $1, " #x " \n\t" \ | |||
"psraw $1, " #y " \n\t" | |||
#define DEF(x) x ## _mmx | |||
#define SET_RND MOVQ_WONE | |||
#define SCALE_OFFSET 1 | |||
#include "mpegvideoenc_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#define DEF(x) x ## _3dnow | |||
#define SET_RND(x) | |||
#define SCALE_OFFSET 0 | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhrw " #s ", " #x " \n\t" \ | |||
"pmulhrw " #s ", " #y " \n\t" | |||
#include "mpegvideoenc_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#if HAVE_SSSE3_INLINE | |||
#undef PHADDD | |||
#define DEF(x) x ## _ssse3 | |||
#define SET_RND(x) | |||
#define SCALE_OFFSET -1 | |||
#define PHADDD(a, t) \ | |||
"pshufw $0x0E, " #a ", " #t " \n\t" \ | |||
/* faster than phaddd on core2 */ \ | |||
"paddd " #t ", " #a " \n\t" | |||
#define PMULHRW(x, y, s, o) \ | |||
"pmulhrsw " #s ", " #x " \n\t" \ | |||
"pmulhrsw " #s ", " #y " \n\t" | |||
#include "mpegvideoenc_qns_template.c" | |||
#undef DEF | |||
#undef SET_RND | |||
#undef SCALE_OFFSET | |||
#undef PMULHRW | |||
#undef PHADDD | |||
#endif /* HAVE_SSSE3_INLINE */ | |||
#endif /* HAVE_INLINE_ASM */ | |||
av_cold void ff_mpegvideoencdsp_init_x86(MpegvideoEncDSPContext *c, | |||
AVCodecContext *avctx) | |||
{ | |||
#if HAVE_INLINE_ASM | |||
int cpu_flags = av_get_cpu_flags(); | |||
if (INLINE_MMX(cpu_flags)) { | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->try_8x8basis = try_8x8basis_mmx; | |||
} | |||
c->add_8x8basis = add_8x8basis_mmx; | |||
} | |||
if (INLINE_AMD3DNOW(cpu_flags)) { | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->try_8x8basis = try_8x8basis_3dnow; | |||
} | |||
c->add_8x8basis = add_8x8basis_3dnow; | |||
} | |||
#if HAVE_SSSE3_INLINE | |||
if (INLINE_SSSE3(cpu_flags)) { | |||
if (!(avctx->flags & CODEC_FLAG_BITEXACT)) { | |||
c->try_8x8basis = try_8x8basis_ssse3; | |||
} | |||
c->add_8x8basis = add_8x8basis_ssse3; | |||
} | |||
#endif /* HAVE_SSSE3_INLINE */ | |||
#endif /* HAVE_INLINE_ASM */ | |||
} |