* qatar/master: vc1dec: Remove separate scaling function for interlaced field MVs vc1dec: Invoke edge_emulation regardless of MV precision x86: Use consistent 3dnowext function and macro name suffixes g723_1: scale output as supposed for the case with postfilter disabled g723_1: increase excitation storage by 4 g723_1: fix upper bound parameter from inverse maximum autocorrelation g723_1: make scale_vector() behave like the reference g723_1: fix off-by-one error in normalize_bits() g723_1: save/restore excitation with offset to store LPC history wmapro: prevent division by zero when sample rate is unspecified x86: proresdsp: improve SIGNEXTEND macro comments x86: h264dsp: K&R formatting cosmetics LICENSE: Document all GPL files Conflicts: libavcodec/g723_1.c libavcodec/wmaprodec.c libavcodec/x86/h264dsp_mmx.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.0
@@ -17,6 +17,14 @@ Specifically, the GPL parts of FFmpeg are | |||||
- optional x86 optimizations in the files | - optional x86 optimizations in the files | ||||
libavcodec/x86/idct_mmx.c | libavcodec/x86/idct_mmx.c | ||||
- the X11 grabber in libavdevice/x11grab.c | - the X11 grabber in libavdevice/x11grab.c | ||||
- the texi2pod.pl tool | |||||
- the following filters in libavfilter: | |||||
- vf_blackframe.c | |||||
- vf_boxblur.c | |||||
- vf_cropdetect.c | |||||
- vf_delogo.c | |||||
- vf_hqdn3d.c | |||||
- vf_yadif.c | |||||
There are a handful of files under other licensing terms, namely: | There are a handful of files under other licensing terms, namely: | ||||
@@ -52,7 +52,7 @@ typedef struct g723_1_context { | |||||
int16_t prev_lsp[LPC_ORDER]; | int16_t prev_lsp[LPC_ORDER]; | ||||
int16_t prev_excitation[PITCH_MAX]; | int16_t prev_excitation[PITCH_MAX]; | ||||
int16_t excitation[PITCH_MAX + FRAME_LEN]; | |||||
int16_t excitation[PITCH_MAX + FRAME_LEN + 4]; | |||||
int16_t synth_mem[LPC_ORDER]; | int16_t synth_mem[LPC_ORDER]; | ||||
int16_t fir_mem[LPC_ORDER]; | int16_t fir_mem[LPC_ORDER]; | ||||
int iir_mem[LPC_ORDER]; | int iir_mem[LPC_ORDER]; | ||||
@@ -267,8 +267,10 @@ static int scale_vector(int16_t *vector, int length) | |||||
bits = normalize_bits(max, 15); | bits = normalize_bits(max, 15); | ||||
scale = shift_table[bits]; | scale = shift_table[bits]; | ||||
for (i = 0; i < length; i++) | |||||
for (i = 0; i < length; i++) { | |||||
av_assert2(av_clipl_int32(vector[i] * (int64_t)scale << 1) == vector[i] * (int64_t)scale << 1); | |||||
vector[i] = (vector[i] * scale) >> 3; | vector[i] = (vector[i] * scale) >> 3; | ||||
} | |||||
return bits - 3; | return bits - 3; | ||||
} | } | ||||
@@ -592,7 +594,10 @@ static int autocorr_max(G723_1_Context *p, int offset, int *ccr_max, | |||||
int i; | int i; | ||||
pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag); | pitch_lag = FFMIN(PITCH_MAX - 3, pitch_lag); | ||||
limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3); | |||||
if (dir > 0) | |||||
limit = FFMIN(FRAME_LEN + PITCH_MAX - offset - length, pitch_lag + 3); | |||||
else | |||||
limit = pitch_lag + 3; | |||||
for (i = pitch_lag - 3; i <= limit; i++) { | for (i = pitch_lag - 3; i <= limit; i++) { | ||||
ccr = ff_dot_product(buf, buf + dir * i, length)<<1; | ccr = ff_dot_product(buf, buf + dir * i, length)<<1; | ||||
@@ -967,7 +972,6 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, | |||||
G723_1_Context *p = avctx->priv_data; | G723_1_Context *p = avctx->priv_data; | ||||
const uint8_t *buf = avpkt->data; | const uint8_t *buf = avpkt->data; | ||||
int buf_size = avpkt->size; | int buf_size = avpkt->size; | ||||
int16_t *out; | |||||
int dec_mode = buf[0] & 3; | int dec_mode = buf[0] & 3; | ||||
PPFParam ppf[SUBFRAMES]; | PPFParam ppf[SUBFRAMES]; | ||||
@@ -975,6 +979,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, | |||||
int16_t lpc[SUBFRAMES * LPC_ORDER]; | int16_t lpc[SUBFRAMES * LPC_ORDER]; | ||||
int16_t acb_vector[SUBFRAME_LEN]; | int16_t acb_vector[SUBFRAME_LEN]; | ||||
int16_t *vector_ptr; | int16_t *vector_ptr; | ||||
int16_t *out; | |||||
int bad_frame = 0, i, j, ret; | int bad_frame = 0, i, j, ret; | ||||
if (!buf_size || buf_size < frame_size[dec_mode]) { | if (!buf_size || buf_size < frame_size[dec_mode]) { | ||||
@@ -995,8 +1000,8 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, | |||||
av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n"); | ||||
return ret; | return ret; | ||||
} | } | ||||
out= (int16_t*)p->frame.data[0]; | |||||
out = (int16_t *)p->frame.data[0]; | |||||
if (p->cur_frame_type == ACTIVE_FRAME) { | if (p->cur_frame_type == ACTIVE_FRAME) { | ||||
if (!bad_frame) | if (!bad_frame) | ||||
@@ -1079,7 +1084,7 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, | |||||
memcpy(p->prev_excitation, p->excitation + FRAME_LEN, | memcpy(p->prev_excitation, p->excitation + FRAME_LEN, | ||||
PITCH_MAX * sizeof(*p->excitation)); | PITCH_MAX * sizeof(*p->excitation)); | ||||
} else { | } else { | ||||
memset(out, 0, sizeof(int16_t)*FRAME_LEN); | |||||
memset(out, 0, FRAME_LEN * 2); | |||||
av_log(avctx, AV_LOG_WARNING, | av_log(avctx, AV_LOG_WARNING, | ||||
"G.723.1: Comfort noise generation not supported yet\n"); | "G.723.1: Comfort noise generation not supported yet\n"); | ||||
return frame_size[dec_mode]; | return frame_size[dec_mode]; | ||||
@@ -1094,13 +1099,18 @@ static int g723_1_decode_frame(AVCodecContext *avctx, void *data, | |||||
0, 1, 1 << 12); | 0, 1, 1 << 12); | ||||
memcpy(p->synth_mem, out + FRAME_LEN, LPC_ORDER * sizeof(int16_t)); | memcpy(p->synth_mem, out + FRAME_LEN, LPC_ORDER * sizeof(int16_t)); | ||||
if (p->postfilter) | |||||
if (p->postfilter) { | |||||
formant_postfilter(p, lpc, out); | formant_postfilter(p, lpc, out); | ||||
} else { // if output is not postfiltered it should be scaled by 2 | |||||
for (i = 0; i < FRAME_LEN; i++) | |||||
out[LPC_ORDER + i] = av_clip_int16(out[LPC_ORDER + i] << 1); | |||||
} | |||||
memmove(out, out + LPC_ORDER, sizeof(int16_t)*FRAME_LEN); | memmove(out, out + LPC_ORDER, sizeof(int16_t)*FRAME_LEN); | ||||
p->frame.nb_samples = FRAME_LEN; | p->frame.nb_samples = FRAME_LEN; | ||||
*(AVFrame*)data = p->frame; | |||||
*got_frame_ptr = 1; | |||||
*got_frame_ptr = 1; | |||||
*(AVFrame *)data = p->frame; | |||||
return frame_size[dec_mode]; | return frame_size[dec_mode]; | ||||
} | } | ||||
@@ -1882,8 +1882,8 @@ static void vc1_interp_mc(VC1Context *v) | |||||
} | } | ||||
if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 | if (v->rangeredfrm || s->h_edge_pos < 22 || v_edge_pos < 22 | ||||
|| (unsigned)(src_x - s->mspel) > s->h_edge_pos - (mx & 3) - 16 - s->mspel * 3 | |||||
|| (unsigned)(src_y - s->mspel) > v_edge_pos - (my & 3) - 16 - s->mspel * 3) { | |||||
|| (unsigned)(src_x - 1) > s->h_edge_pos - (mx & 3) - 16 - 3 | |||||
|| (unsigned)(src_y - 1) > v_edge_pos - (my & 3) - 16 - 3) { | |||||
uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize; | uint8_t *uvbuf = s->edge_emu_buffer + 19 * s->linesize; | ||||
srcY -= s->mspel * (1 + s->linesize); | srcY -= s->mspel * (1 + s->linesize); | ||||
@@ -1979,20 +1979,6 @@ static av_always_inline int scale_mv(int value, int bfrac, int inv, int qs) | |||||
#endif | #endif | ||||
} | } | ||||
static av_always_inline int scale_mv_intfi(int value, int bfrac, int inv, | |||||
int qs, int qs_last) | |||||
{ | |||||
int n = bfrac; | |||||
if (inv) | |||||
n -= 256; | |||||
n <<= !qs_last; | |||||
if (!qs) | |||||
return (value * n + 255) >> 9; | |||||
else | |||||
return (value * n + 128) >> 8; | |||||
} | |||||
/** Reconstruct motion vector for B-frame and do motion compensation | /** Reconstruct motion vector for B-frame and do motion compensation | ||||
*/ | */ | ||||
static inline void vc1_b_mc(VC1Context *v, int dmv_x[2], int dmv_y[2], | static inline void vc1_b_mc(VC1Context *v, int dmv_x[2], int dmv_y[2], | ||||
@@ -2246,14 +2232,14 @@ static inline void vc1_pred_b_mv_intfi(VC1Context *v, int n, int *dmv_x, int *dm | |||||
if (v->bmvtype == BMV_TYPE_DIRECT) { | if (v->bmvtype == BMV_TYPE_DIRECT) { | ||||
int total_opp, k, f; | int total_opp, k, f; | ||||
if (s->next_picture.f.mb_type[mb_pos + v->mb_off] != MB_TYPE_INTRA) { | if (s->next_picture.f.mb_type[mb_pos + v->mb_off] != MB_TYPE_INTRA) { | ||||
s->mv[0][0][0] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], | |||||
v->bfraction, 0, s->quarter_sample, v->qs_last); | |||||
s->mv[0][0][1] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], | |||||
v->bfraction, 0, s->quarter_sample, v->qs_last); | |||||
s->mv[1][0][0] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], | |||||
v->bfraction, 1, s->quarter_sample, v->qs_last); | |||||
s->mv[1][0][1] = scale_mv_intfi(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], | |||||
v->bfraction, 1, s->quarter_sample, v->qs_last); | |||||
s->mv[0][0][0] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], | |||||
v->bfraction, 0, s->quarter_sample); | |||||
s->mv[0][0][1] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], | |||||
v->bfraction, 0, s->quarter_sample); | |||||
s->mv[1][0][0] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][0], | |||||
v->bfraction, 1, s->quarter_sample); | |||||
s->mv[1][0][1] = scale_mv(s->next_picture.f.motion_val[1][s->block_index[0] + v->blocks_off][1], | |||||
v->bfraction, 1, s->quarter_sample); | |||||
total_opp = v->mv_f_next[0][s->block_index[0] + v->blocks_off] | total_opp = v->mv_f_next[0][s->block_index[0] + v->blocks_off] | ||||
+ v->mv_f_next[0][s->block_index[1] + v->blocks_off] | + v->mv_f_next[0][s->block_index[1] + v->blocks_off] | ||||
@@ -341,6 +341,11 @@ static av_cold int decode_init(AVCodecContext *avctx) | |||||
return AVERROR_INVALIDDATA; | return AVERROR_INVALIDDATA; | ||||
} | } | ||||
if (s->avctx->sample_rate <= 0) { | |||||
av_log(avctx, AV_LOG_ERROR, "invalid sample rate\n"); | |||||
return AVERROR_INVALIDDATA; | |||||
} | |||||
s->num_channels = avctx->channels; | s->num_channels = avctx->channels; | ||||
if (s->num_channels < 0) { | if (s->num_channels < 0) { | ||||
@@ -2485,9 +2485,9 @@ static void ac3_downmix_sse(float (*samples)[256], float (*matrix)[2], | |||||
} | } | ||||
#if HAVE_6REGS | #if HAVE_6REGS | ||||
static void vector_fmul_window_3dnow2(float *dst, const float *src0, | |||||
const float *src1, const float *win, | |||||
int len) | |||||
static void vector_fmul_window_3dnowext(float *dst, const float *src0, | |||||
const float *src1, const float *win, | |||||
int len) | |||||
{ | { | ||||
x86_reg i = -len * 4; | x86_reg i = -len * 4; | ||||
x86_reg j = len * 4 - 8; | x86_reg j = len * 4 - 8; | ||||
@@ -2939,11 +2939,11 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, | |||||
#endif | #endif | ||||
} | } | ||||
static void dsputil_init_3dnow2(DSPContext *c, AVCodecContext *avctx, | |||||
int mm_flags) | |||||
static void dsputil_init_3dnowext(DSPContext *c, AVCodecContext *avctx, | |||||
int mm_flags) | |||||
{ | { | ||||
#if HAVE_6REGS && HAVE_INLINE_ASM | #if HAVE_6REGS && HAVE_INLINE_ASM | ||||
c->vector_fmul_window = vector_fmul_window_3dnow2; | |||||
c->vector_fmul_window = vector_fmul_window_3dnowext; | |||||
#endif | #endif | ||||
} | } | ||||
@@ -3194,7 +3194,7 @@ void ff_dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx) | |||||
dsputil_init_3dnow(c, avctx, mm_flags); | dsputil_init_3dnow(c, avctx, mm_flags); | ||||
if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) | if (mm_flags & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) | ||||
dsputil_init_3dnow2(c, avctx, mm_flags); | |||||
dsputil_init_3dnowext(c, avctx, mm_flags); | |||||
if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) | if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) | ||||
dsputil_init_sse(c, avctx, mm_flags); | dsputil_init_sse(c, avctx, mm_flags); | ||||
@@ -34,9 +34,9 @@ av_cold void ff_fft_init_mmx(FFTContext *s) | |||||
} | } | ||||
if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { | if (has_vectors & AV_CPU_FLAG_3DNOWEXT && HAVE_AMD3DNOWEXT) { | ||||
/* 3DNowEx for K7 */ | /* 3DNowEx for K7 */ | ||||
s->imdct_calc = ff_imdct_calc_3dnow2; | |||||
s->imdct_half = ff_imdct_half_3dnow2; | |||||
s->fft_calc = ff_fft_calc_3dnow2; | |||||
s->imdct_calc = ff_imdct_calc_3dnowext; | |||||
s->imdct_half = ff_imdct_half_3dnowext; | |||||
s->fft_calc = ff_fft_calc_3dnowext; | |||||
} | } | ||||
#endif | #endif | ||||
if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { | if (has_vectors & AV_CPU_FLAG_SSE && HAVE_SSE) { | ||||
@@ -25,12 +25,12 @@ void ff_fft_permute_sse(FFTContext *s, FFTComplex *z); | |||||
void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); | void ff_fft_calc_avx(FFTContext *s, FFTComplex *z); | ||||
void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | void ff_fft_calc_sse(FFTContext *s, FFTComplex *z); | ||||
void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); | void ff_fft_calc_3dnow(FFTContext *s, FFTComplex *z); | ||||
void ff_fft_calc_3dnow2(FFTContext *s, FFTComplex *z); | |||||
void ff_fft_calc_3dnowext(FFTContext *s, FFTComplex *z); | |||||
void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_calc_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | ||||
void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_half_3dnow(FFTContext *s, FFTSample *output, const FFTSample *input); | ||||
void ff_imdct_calc_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); | |||||
void ff_imdct_half_3dnow2(FFTContext *s, FFTSample *output, const FFTSample *input); | |||||
void ff_imdct_calc_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); | |||||
void ff_imdct_half_3dnowext(FFTContext *s, FFTSample *output, const FFTSample *input); | |||||
void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_calc_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | ||||
void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_half_sse(FFTContext *s, FFTSample *output, const FFTSample *input); | ||||
void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); | void ff_imdct_half_avx(FFTContext *s, FFTSample *output, const FFTSample *input); | ||||
@@ -93,14 +93,14 @@ cextern cos_ %+ i | |||||
SECTION_TEXT | SECTION_TEXT | ||||
%macro T2_3DN 4 ; z0, z1, mem0, mem1 | |||||
%macro T2_3DNOW 4 ; z0, z1, mem0, mem1 | |||||
mova %1, %3 | mova %1, %3 | ||||
mova %2, %1 | mova %2, %1 | ||||
pfadd %1, %4 | pfadd %1, %4 | ||||
pfsub %2, %4 | pfsub %2, %4 | ||||
%endmacro | %endmacro | ||||
%macro T4_3DN 6 ; z0, z1, z2, z3, tmp0, tmp1 | |||||
%macro T4_3DNOW 6 ; z0, z1, z2, z3, tmp0, tmp1 | |||||
mova %5, %3 | mova %5, %3 | ||||
pfsub %3, %4 | pfsub %3, %4 | ||||
pfadd %5, %4 ; {t6,t5} | pfadd %5, %4 ; {t6,t5} | ||||
@@ -445,13 +445,13 @@ fft16_sse: | |||||
ret | ret | ||||
%macro FFT48_3DN 0 | |||||
%macro FFT48_3DNOW 0 | |||||
align 16 | align 16 | ||||
fft4 %+ SUFFIX: | fft4 %+ SUFFIX: | ||||
T2_3DN m0, m1, Z(0), Z(1) | |||||
T2_3DNOW m0, m1, Z(0), Z(1) | |||||
mova m2, Z(2) | mova m2, Z(2) | ||||
mova m3, Z(3) | mova m3, Z(3) | ||||
T4_3DN m0, m1, m2, m3, m4, m5 | |||||
T4_3DNOW m0, m1, m2, m3, m4, m5 | |||||
PUNPCK m0, m1, m4 | PUNPCK m0, m1, m4 | ||||
PUNPCK m2, m3, m5 | PUNPCK m2, m3, m5 | ||||
mova Z(0), m0 | mova Z(0), m0 | ||||
@@ -462,14 +462,14 @@ fft4 %+ SUFFIX: | |||||
align 16 | align 16 | ||||
fft8 %+ SUFFIX: | fft8 %+ SUFFIX: | ||||
T2_3DN m0, m1, Z(0), Z(1) | |||||
T2_3DNOW m0, m1, Z(0), Z(1) | |||||
mova m2, Z(2) | mova m2, Z(2) | ||||
mova m3, Z(3) | mova m3, Z(3) | ||||
T4_3DN m0, m1, m2, m3, m4, m5 | |||||
T4_3DNOW m0, m1, m2, m3, m4, m5 | |||||
mova Z(0), m0 | mova Z(0), m0 | ||||
mova Z(2), m2 | mova Z(2), m2 | ||||
T2_3DN m4, m5, Z(4), Z(5) | |||||
T2_3DN m6, m7, Z2(6), Z2(7) | |||||
T2_3DNOW m4, m5, Z(4), Z(5) | |||||
T2_3DNOW m6, m7, Z2(6), Z2(7) | |||||
PSWAPD m0, m5 | PSWAPD m0, m5 | ||||
PSWAPD m2, m7 | PSWAPD m2, m7 | ||||
pxor m0, [ps_m1p1] | pxor m0, [ps_m1p1] | ||||
@@ -478,12 +478,12 @@ fft8 %+ SUFFIX: | |||||
pfadd m7, m2 | pfadd m7, m2 | ||||
pfmul m5, [ps_root2] | pfmul m5, [ps_root2] | ||||
pfmul m7, [ps_root2] | pfmul m7, [ps_root2] | ||||
T4_3DN m1, m3, m5, m7, m0, m2 | |||||
T4_3DNOW m1, m3, m5, m7, m0, m2 | |||||
mova Z(5), m5 | mova Z(5), m5 | ||||
mova Z2(7), m7 | mova Z2(7), m7 | ||||
mova m0, Z(0) | mova m0, Z(0) | ||||
mova m2, Z(2) | mova m2, Z(2) | ||||
T4_3DN m0, m2, m4, m6, m5, m7 | |||||
T4_3DNOW m0, m2, m4, m6, m5, m7 | |||||
PUNPCK m0, m1, m5 | PUNPCK m0, m1, m5 | ||||
PUNPCK m2, m3, m7 | PUNPCK m2, m3, m7 | ||||
mova Z(0), m0 | mova Z(0), m0 | ||||
@@ -501,7 +501,7 @@ fft8 %+ SUFFIX: | |||||
%if ARCH_X86_32 | %if ARCH_X86_32 | ||||
%macro PSWAPD 2 | %macro PSWAPD 2 | ||||
%if cpuflag(3dnow2) | |||||
%if cpuflag(3dnowext) | |||||
pswapd %1, %2 | pswapd %1, %2 | ||||
%elifidn %1, %2 | %elifidn %1, %2 | ||||
movd [r0+12], %1 | movd [r0+12], %1 | ||||
@@ -513,11 +513,11 @@ fft8 %+ SUFFIX: | |||||
%endif | %endif | ||||
%endmacro | %endmacro | ||||
INIT_MMX 3dnow2 | |||||
FFT48_3DN | |||||
INIT_MMX 3dnowext | |||||
FFT48_3DNOW | |||||
INIT_MMX 3dnow | INIT_MMX 3dnow | ||||
FFT48_3DN | |||||
FFT48_3DNOW | |||||
%endif | %endif | ||||
%define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] | %define Z(x) [zcq + o1q*(x&6) + mmsize*(x&1)] | ||||
@@ -634,7 +634,7 @@ cglobal fft_calc, 2,5,8 | |||||
%if ARCH_X86_32 | %if ARCH_X86_32 | ||||
INIT_MMX 3dnow | INIT_MMX 3dnow | ||||
FFT_CALC_FUNC | FFT_CALC_FUNC | ||||
INIT_MMX 3dnow2 | |||||
INIT_MMX 3dnowext | |||||
FFT_CALC_FUNC | FFT_CALC_FUNC | ||||
%endif | %endif | ||||
INIT_XMM sse | INIT_XMM sse | ||||
@@ -728,7 +728,7 @@ cglobal imdct_calc, 3,5,3 | |||||
%if ARCH_X86_32 | %if ARCH_X86_32 | ||||
INIT_MMX 3dnow | INIT_MMX 3dnow | ||||
IMDCT_CALC_FUNC | IMDCT_CALC_FUNC | ||||
INIT_MMX 3dnow2 | |||||
INIT_MMX 3dnowext | |||||
IMDCT_CALC_FUNC | IMDCT_CALC_FUNC | ||||
%endif | %endif | ||||
@@ -744,8 +744,8 @@ INIT_MMX 3dnow | |||||
%define unpckhps punpckhdq | %define unpckhps punpckhdq | ||||
DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] | DECL_PASS pass_3dnow, PASS_SMALL 1, [wq], [wq+o1q] | ||||
DECL_PASS pass_interleave_3dnow, PASS_BIG 0 | DECL_PASS pass_interleave_3dnow, PASS_BIG 0 | ||||
%define pass_3dnow2 pass_3dnow | |||||
%define pass_interleave_3dnow2 pass_interleave_3dnow | |||||
%define pass_3dnowext pass_3dnow | |||||
%define pass_interleave_3dnowext pass_interleave_3dnow | |||||
%endif | %endif | ||||
%ifdef PIC | %ifdef PIC | ||||
@@ -814,7 +814,7 @@ DECL_FFT 5, _interleave | |||||
INIT_MMX 3dnow | INIT_MMX 3dnow | ||||
DECL_FFT 4 | DECL_FFT 4 | ||||
DECL_FFT 4, _interleave | DECL_FFT 4, _interleave | ||||
INIT_MMX 3dnow2 | |||||
INIT_MMX 3dnowext | |||||
DECL_FFT 4 | DECL_FFT 4 | ||||
DECL_FFT 4, _interleave | DECL_FFT 4, _interleave | ||||
%endif | %endif | ||||
@@ -846,7 +846,7 @@ INIT_XMM sse | |||||
PSWAPD m5, m3 | PSWAPD m5, m3 | ||||
pfmul m2, m3 | pfmul m2, m3 | ||||
pfmul m6, m5 | pfmul m6, m5 | ||||
%if cpuflag(3dnow2) | |||||
%if cpuflag(3dnowext) | |||||
pfpnacc m0, m4 | pfpnacc m0, m4 | ||||
pfpnacc m2, m6 | pfpnacc m2, m6 | ||||
%else | %else | ||||
@@ -1019,7 +1019,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | |||||
xor r4, r4 | xor r4, r4 | ||||
sub r4, r3 | sub r4, r3 | ||||
%endif | %endif | ||||
%if notcpuflag(3dnow2) && mmsize == 8 | |||||
%if notcpuflag(3dnowext) && mmsize == 8 | |||||
movd m7, [ps_m1m1m1m1] | movd m7, [ps_m1m1m1m1] | ||||
%endif | %endif | ||||
.pre: | .pre: | ||||
@@ -1103,7 +1103,7 @@ DECL_IMDCT POSROTATESHUF | |||||
INIT_MMX 3dnow | INIT_MMX 3dnow | ||||
DECL_IMDCT POSROTATESHUF_3DNOW | DECL_IMDCT POSROTATESHUF_3DNOW | ||||
INIT_MMX 3dnow2 | |||||
INIT_MMX 3dnowext | |||||
DECL_IMDCT POSROTATESHUF_3DNOW | DECL_IMDCT POSROTATESHUF_3DNOW | ||||
%endif | %endif | ||||
@@ -249,7 +249,7 @@ FLOAT_TO_INT16_INTERLEAVE2 sse2 | |||||
%macro PSWAPD_SSE 2 | %macro PSWAPD_SSE 2 | ||||
pshufw %1, %2, 0x4e | pshufw %1, %2, 0x4e | ||||
%endmacro | %endmacro | ||||
%macro PSWAPD_3DN1 2 | |||||
%macro PSWAPD_3DNOW 2 | |||||
movq %1, %2 | movq %1, %2 | ||||
psrlq %1, 32 | psrlq %1, 32 | ||||
punpckldq %1, %2 | punpckldq %1, %2 | ||||
@@ -306,10 +306,10 @@ cglobal float_to_int16_interleave6_%1, 2,8,0, dst, src, src1, src2, src3, src4, | |||||
%define pswapd PSWAPD_SSE | %define pswapd PSWAPD_SSE | ||||
FLOAT_TO_INT16_INTERLEAVE6 sse | FLOAT_TO_INT16_INTERLEAVE6 sse | ||||
%define cvtps2pi pf2id | %define cvtps2pi pf2id | ||||
%define pswapd PSWAPD_3DN1 | |||||
%define pswapd PSWAPD_3DNOW | |||||
FLOAT_TO_INT16_INTERLEAVE6 3dnow | FLOAT_TO_INT16_INTERLEAVE6 3dnow | ||||
%undef pswapd | %undef pswapd | ||||
FLOAT_TO_INT16_INTERLEAVE6 3dn2 | |||||
FLOAT_TO_INT16_INTERLEAVE6 3dnowext | |||||
%undef cvtps2pi | %undef cvtps2pi | ||||
;----------------------------------------------------------------------------- | ;----------------------------------------------------------------------------- | ||||
@@ -46,7 +46,7 @@ void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long l | |||||
void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len); | ||||
void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len); | ||||
void ff_float_to_int16_interleave6_3dn2(int16_t *dst, const float **src, int len); | |||||
void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len); | |||||
#define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse | ||||
@@ -74,9 +74,11 @@ FLOAT_TO_INT16_INTERLEAVE(3dnow) | |||||
FLOAT_TO_INT16_INTERLEAVE(sse) | FLOAT_TO_INT16_INTERLEAVE(sse) | ||||
FLOAT_TO_INT16_INTERLEAVE(sse2) | FLOAT_TO_INT16_INTERLEAVE(sse2) | ||||
static void float_to_int16_interleave_3dn2(int16_t *dst, const float **src, long len, int channels){ | |||||
static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src, | |||||
long len, int channels) | |||||
{ | |||||
if(channels==6) | if(channels==6) | ||||
ff_float_to_int16_interleave6_3dn2(dst, src, len); | |||||
ff_float_to_int16_interleave6_3dnowext(dst, src, len); | |||||
else | else | ||||
float_to_int16_interleave_3dnow(dst, src, len, channels); | float_to_int16_interleave_3dnow(dst, src, len, channels); | ||||
} | } | ||||
@@ -126,7 +128,7 @@ void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx) | |||||
} | } | ||||
if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { | if (HAVE_AMD3DNOWEXT && mm_flags & AV_CPU_FLAG_3DNOWEXT) { | ||||
if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | ||||
c->float_to_int16_interleave = float_to_int16_interleave_3dn2; | |||||
c->float_to_int16_interleave = float_to_int16_interleave_3dnowext; | |||||
} | } | ||||
} | } | ||||
if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { | if (HAVE_SSE && mm_flags & AV_CPU_FLAG_SSE) { | ||||
@@ -25,8 +25,10 @@ | |||||
/***********************************/ | /***********************************/ | ||||
/* IDCT */ | /* IDCT */ | ||||
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride); | |||||
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
int16_t *block, \ | |||||
int stride); | |||||
IDCT_ADD_FUNC(, 8, mmx) | IDCT_ADD_FUNC(, 8, mmx) | ||||
IDCT_ADD_FUNC(, 10, sse2) | IDCT_ADD_FUNC(, 10, sse2) | ||||
@@ -44,10 +46,10 @@ IDCT_ADD_FUNC(8, 10, avx) | |||||
#endif | #endif | ||||
#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ | |||||
(uint8_t *dst, const int *block_offset, \ | |||||
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | |||||
#define IDCT_ADD_REP_FUNC(NUM, REP, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ | |||||
(uint8_t *dst, const int *block_offset, \ | |||||
DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]); | |||||
IDCT_ADD_REP_FUNC(8, 4, 8, mmx) | IDCT_ADD_REP_FUNC(8, 4, 8, mmx) | ||||
IDCT_ADD_REP_FUNC(8, 4, 8, mmx2) | IDCT_ADD_REP_FUNC(8, 4, 8, mmx2) | ||||
@@ -68,10 +70,11 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx) | |||||
#endif | #endif | ||||
#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ | |||||
(uint8_t **dst, const int *block_offset, \ | |||||
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); | |||||
#define IDCT_ADD_REP_FUNC2(NUM, REP, DEPTH, OPT) \ | |||||
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ | |||||
(uint8_t **dst, const int *block_offset, \ | |||||
DCTELEM *block, int stride, const uint8_t nnzc[6 * 8]); | |||||
IDCT_ADD_REP_FUNC2(, 8, 8, mmx) | IDCT_ADD_REP_FUNC2(, 8, 8, mmx) | ||||
IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) | IDCT_ADD_REP_FUNC2(, 8, 8, mmx2) | ||||
IDCT_ADD_REP_FUNC2(, 8, 8, sse2) | IDCT_ADD_REP_FUNC2(, 8, 8, sse2) | ||||
@@ -80,7 +83,7 @@ IDCT_ADD_REP_FUNC2(, 8, 10, sse2) | |||||
IDCT_ADD_REP_FUNC2(, 8, 10, avx) | IDCT_ADD_REP_FUNC2(, 8, 10, avx) | ||||
#endif | #endif | ||||
void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul); | |||||
void ff_h264_luma_dc_dequant_idct_mmx(DCTELEM *output, DCTELEM *input, int qmul); | |||||
void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); | void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); | ||||
/***********************************/ | /***********************************/ | ||||
@@ -91,273 +94,292 @@ void ff_h264_loop_filter_strength_mmx2(int16_t bS[2][4][4], uint8_t nnz[40], | |||||
int bidir, int edges, int step, | int bidir, int edges, int step, | ||||
int mask_mv0, int mask_mv1, int field); | int mask_mv0, int mask_mv1, int field); | ||||
#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ | |||||
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
int alpha, int beta, int8_t *tc0); | |||||
#define LF_FUNC(DIR, TYPE, DEPTH, OPT) \ | |||||
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ | |||||
int stride, \ | |||||
int alpha, \ | |||||
int beta, \ | |||||
int8_t *tc0); | |||||
#define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ | #define LF_IFUNC(DIR, TYPE, DEPTH, OPT) \ | ||||
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *pix, int stride, \ | |||||
int alpha, int beta); | |||||
#define LF_FUNCS(type, depth)\ | |||||
LF_FUNC (h, chroma, depth, mmx2)\ | |||||
LF_IFUNC(h, chroma_intra, depth, mmx2)\ | |||||
LF_FUNC (v, chroma, depth, mmx2)\ | |||||
LF_IFUNC(v, chroma_intra, depth, mmx2)\ | |||||
LF_FUNC (h, luma, depth, mmx2)\ | |||||
LF_IFUNC(h, luma_intra, depth, mmx2)\ | |||||
LF_FUNC (h, luma, depth, sse2)\ | |||||
LF_IFUNC(h, luma_intra, depth, sse2)\ | |||||
LF_FUNC (v, luma, depth, sse2)\ | |||||
LF_IFUNC(v, luma_intra, depth, sse2)\ | |||||
LF_FUNC (h, chroma, depth, sse2)\ | |||||
LF_IFUNC(h, chroma_intra, depth, sse2)\ | |||||
LF_FUNC (v, chroma, depth, sse2)\ | |||||
LF_IFUNC(v, chroma_intra, depth, sse2)\ | |||||
LF_FUNC (h, luma, depth, avx)\ | |||||
LF_IFUNC(h, luma_intra, depth, avx)\ | |||||
LF_FUNC (v, luma, depth, avx)\ | |||||
LF_IFUNC(v, luma_intra, depth, avx)\ | |||||
LF_FUNC (h, chroma, depth, avx)\ | |||||
LF_IFUNC(h, chroma_intra, depth, avx)\ | |||||
LF_FUNC (v, chroma, depth, avx)\ | |||||
LF_IFUNC(v, chroma_intra, depth, avx) | |||||
LF_FUNCS( uint8_t, 8) | |||||
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## DEPTH ## _ ## OPT(uint8_t *pix, \ | |||||
int stride, \ | |||||
int alpha, \ | |||||
int beta); | |||||
#define LF_FUNCS(type, depth) \ | |||||
LF_FUNC(h, chroma, depth, mmx2) \ | |||||
LF_IFUNC(h, chroma_intra, depth, mmx2) \ | |||||
LF_FUNC(v, chroma, depth, mmx2) \ | |||||
LF_IFUNC(v, chroma_intra, depth, mmx2) \ | |||||
LF_FUNC(h, luma, depth, mmx2) \ | |||||
LF_IFUNC(h, luma_intra, depth, mmx2) \ | |||||
LF_FUNC(h, luma, depth, sse2) \ | |||||
LF_IFUNC(h, luma_intra, depth, sse2) \ | |||||
LF_FUNC(v, luma, depth, sse2) \ | |||||
LF_IFUNC(v, luma_intra, depth, sse2) \ | |||||
LF_FUNC(h, chroma, depth, sse2) \ | |||||
LF_IFUNC(h, chroma_intra, depth, sse2) \ | |||||
LF_FUNC(v, chroma, depth, sse2) \ | |||||
LF_IFUNC(v, chroma_intra, depth, sse2) \ | |||||
LF_FUNC(h, luma, depth, avx) \ | |||||
LF_IFUNC(h, luma_intra, depth, avx) \ | |||||
LF_FUNC(v, luma, depth, avx) \ | |||||
LF_IFUNC(v, luma_intra, depth, avx) \ | |||||
LF_FUNC(h, chroma, depth, avx) \ | |||||
LF_IFUNC(h, chroma_intra, depth, avx) \ | |||||
LF_FUNC(v, chroma, depth, avx) \ | |||||
LF_IFUNC(v, chroma_intra, depth, avx) | |||||
LF_FUNCS(uint8_t, 8) | |||||
LF_FUNCS(uint16_t, 10) | LF_FUNCS(uint16_t, 10) | ||||
#if ARCH_X86_32 && HAVE_YASM | #if ARCH_X86_32 && HAVE_YASM | ||||
LF_FUNC (v8, luma, 8, mmx2) | |||||
static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0) | |||||
LF_FUNC(v8, luma, 8, mmx2) | |||||
static void ff_deblock_v_luma_8_mmx2(uint8_t *pix, int stride, int alpha, | |||||
int beta, int8_t *tc0) | |||||
{ | { | ||||
if((tc0[0] & tc0[1]) >= 0) | |||||
ff_deblock_v8_luma_8_mmx2(pix+0, stride, alpha, beta, tc0); | |||||
if((tc0[2] & tc0[3]) >= 0) | |||||
ff_deblock_v8_luma_8_mmx2(pix+8, stride, alpha, beta, tc0+2); | |||||
if ((tc0[0] & tc0[1]) >= 0) | |||||
ff_deblock_v8_luma_8_mmx2(pix + 0, stride, alpha, beta, tc0); | |||||
if ((tc0[2] & tc0[3]) >= 0) | |||||
ff_deblock_v8_luma_8_mmx2(pix + 8, stride, alpha, beta, tc0 + 2); | |||||
} | } | ||||
LF_IFUNC(v8, luma_intra, 8, mmx2) | |||||
static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, int alpha, int beta) | |||||
LF_IFUNC(v8, luma_intra, 8, mmx2) | |||||
static void ff_deblock_v_luma_intra_8_mmx2(uint8_t *pix, int stride, | |||||
int alpha, int beta) | |||||
{ | { | ||||
ff_deblock_v8_luma_intra_8_mmx2(pix+0, stride, alpha, beta); | |||||
ff_deblock_v8_luma_intra_8_mmx2(pix+8, stride, alpha, beta); | |||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 0, stride, alpha, beta); | |||||
ff_deblock_v8_luma_intra_8_mmx2(pix + 8, stride, alpha, beta); | |||||
} | } | ||||
#endif /* ARCH_X86_32 */ | #endif /* ARCH_X86_32 */ | ||||
LF_FUNC (v, luma, 10, mmx2) | |||||
LF_IFUNC(v, luma_intra, 10, mmx2) | |||||
LF_FUNC(v, luma, 10, mmx2) | |||||
LF_IFUNC(v, luma_intra, 10, mmx2) | |||||
/***********************************/ | /***********************************/ | ||||
/* weighted prediction */ | /* weighted prediction */ | ||||
#define H264_WEIGHT(W, OPT) \ | |||||
void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, \ | |||||
int stride, int height, int log2_denom, int weight, int offset); | |||||
#define H264_WEIGHT(W, OPT) \ | |||||
void ff_h264_weight_ ## W ## _ ## OPT(uint8_t *dst, int stride, \ | |||||
int height, int log2_denom, \ | |||||
int weight, int offset); | |||||
#define H264_BIWEIGHT(W, OPT) \ | |||||
void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, \ | |||||
uint8_t *src, int stride, int height, int log2_denom, int weightd, \ | |||||
int weights, int offset); | |||||
#define H264_BIWEIGHT(W, OPT) \ | |||||
void ff_h264_biweight_ ## W ## _ ## OPT(uint8_t *dst, uint8_t *src, \ | |||||
int stride, int height, \ | |||||
int log2_denom, int weightd, \ | |||||
int weights, int offset); | |||||
#define H264_BIWEIGHT_MMX(W) \ | |||||
H264_WEIGHT (W, mmx2) \ | |||||
H264_BIWEIGHT(W, mmx2) | |||||
#define H264_BIWEIGHT_MMX(W) \ | |||||
H264_WEIGHT(W, mmx2) \ | |||||
H264_BIWEIGHT(W, mmx2) | |||||
#define H264_BIWEIGHT_MMX_SSE(W) \ | |||||
H264_BIWEIGHT_MMX(W) \ | |||||
H264_WEIGHT (W, sse2) \ | |||||
H264_BIWEIGHT (W, sse2) \ | |||||
H264_BIWEIGHT (W, ssse3) | |||||
#define H264_BIWEIGHT_MMX_SSE(W) \ | |||||
H264_BIWEIGHT_MMX(W) \ | |||||
H264_WEIGHT(W, sse2) \ | |||||
H264_BIWEIGHT(W, sse2) \ | |||||
H264_BIWEIGHT(W, ssse3) | |||||
H264_BIWEIGHT_MMX_SSE(16) | H264_BIWEIGHT_MMX_SSE(16) | ||||
H264_BIWEIGHT_MMX_SSE( 8) | |||||
H264_BIWEIGHT_MMX ( 4) | |||||
#define H264_WEIGHT_10(W, DEPTH, OPT) \ | |||||
void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
int stride, int height, int log2_denom, int weight, int offset); | |||||
#define H264_BIWEIGHT_10(W, DEPTH, OPT) \ | |||||
void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT \ | |||||
(uint8_t *dst, uint8_t *src, int stride, int height, int log2_denom, \ | |||||
int weightd, int weights, int offset); | |||||
#define H264_BIWEIGHT_10_SSE(W, DEPTH) \ | |||||
H264_WEIGHT_10 (W, DEPTH, sse2) \ | |||||
H264_WEIGHT_10 (W, DEPTH, sse4) \ | |||||
H264_BIWEIGHT_10(W, DEPTH, sse2) \ | |||||
H264_BIWEIGHT_10(W, DEPTH, sse4) | |||||
H264_BIWEIGHT_MMX_SSE(8) | |||||
H264_BIWEIGHT_MMX(4) | |||||
#define H264_WEIGHT_10(W, DEPTH, OPT) \ | |||||
void ff_h264_weight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
int stride, \ | |||||
int height, \ | |||||
int log2_denom, \ | |||||
int weight, \ | |||||
int offset); | |||||
#define H264_BIWEIGHT_10(W, DEPTH, OPT) \ | |||||
void ff_h264_biweight_ ## W ## _ ## DEPTH ## _ ## OPT(uint8_t *dst, \ | |||||
uint8_t *src, \ | |||||
int stride, \ | |||||
int height, \ | |||||
int log2_denom, \ | |||||
int weightd, \ | |||||
int weights, \ | |||||
int offset); | |||||
#define H264_BIWEIGHT_10_SSE(W, DEPTH) \ | |||||
H264_WEIGHT_10(W, DEPTH, sse2) \ | |||||
H264_WEIGHT_10(W, DEPTH, sse4) \ | |||||
H264_BIWEIGHT_10(W, DEPTH, sse2) \ | |||||
H264_BIWEIGHT_10(W, DEPTH, sse4) | |||||
H264_BIWEIGHT_10_SSE(16, 10) | H264_BIWEIGHT_10_SSE(16, 10) | ||||
H264_BIWEIGHT_10_SSE( 8, 10) | |||||
H264_BIWEIGHT_10_SSE( 4, 10) | |||||
H264_BIWEIGHT_10_SSE(8, 10) | |||||
H264_BIWEIGHT_10_SSE(4, 10) | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, const int chroma_format_idc) | |||||
void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth, | |||||
const int chroma_format_idc) | |||||
{ | { | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
int mm_flags = av_get_cpu_flags(); | int mm_flags = av_get_cpu_flags(); | ||||
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) { | |||||
if (chroma_format_idc == 1 && mm_flags & AV_CPU_FLAG_MMX2) | |||||
c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | c->h264_loop_filter_strength = ff_h264_loop_filter_strength_mmx2; | ||||
} | |||||
if (bit_depth == 8) { | if (bit_depth == 8) { | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
c->h264_idct_dc_add = | |||||
c->h264_idct_add = ff_h264_idct_add_8_mmx; | |||||
c->h264_idct8_dc_add = | |||||
c->h264_idct8_add = ff_h264_idct8_add_8_mmx; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; | |||||
if (mm_flags & AV_CPU_FLAG_CMOV) | |||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | |||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; | |||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
c->h264_idct_dc_add = | |||||
c->h264_idct_add = ff_h264_idct_add_8_mmx; | |||||
c->h264_idct8_dc_add = | |||||
c->h264_idct8_add = ff_h264_idct8_add_8_mmx; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx; | |||||
if (chroma_format_idc == 1) | if (chroma_format_idc == 1) | ||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2; | |||||
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmx2; | |||||
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_8_mmx2; | |||||
if (chroma_format_idc == 1) { | |||||
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmx2; | |||||
c->h264_h_loop_filter_chroma_intra= ff_deblock_h_chroma_intra_8_mmx2; | |||||
} | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx; | |||||
if (mm_flags & AV_CPU_FLAG_CMOV) | |||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_mmx; | |||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx2; | |||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_8_mmx2; | |||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_8_mmx2; | |||||
if (chroma_format_idc == 1) { | |||||
c->h264_h_loop_filter_chroma = ff_deblock_h_chroma_8_mmx2; | |||||
c->h264_h_loop_filter_chroma_intra = ff_deblock_h_chroma_intra_8_mmx2; | |||||
} | |||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
c->h264_v_loop_filter_luma= ff_deblock_v_luma_8_mmx2; | |||||
c->h264_h_loop_filter_luma= ff_deblock_h_luma_8_mmx2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; | |||||
#endif | |||||
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_mmx2; | |||||
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_mmx2; | |||||
c->weight_h264_pixels_tab[2]= ff_h264_weight_4_mmx2; | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_mmx2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_mmx2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_mmx2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_mmx2; | |||||
#endif /* ARCH_X86_32 */ | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_mmx2; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_mmx2; | |||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_mmx2; | |||||
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_mmx2; | |||||
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_mmx2; | |||||
c->biweight_h264_pixels_tab[2]= ff_h264_biweight_4_mmx2; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_mmx2; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_mmx2; | |||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_mmx2; | |||||
if (mm_flags&AV_CPU_FLAG_SSE2) { | |||||
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | |||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
c->h264_idct8_add = ff_h264_idct8_add_8_sse2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | |||||
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2; | |||||
c->h264_luma_dc_dequant_idct = ff_h264_luma_dc_dequant_idct_sse2; | |||||
c->weight_h264_pixels_tab[0]= ff_h264_weight_16_sse2; | |||||
c->weight_h264_pixels_tab[1]= ff_h264_weight_8_sse2; | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_sse2; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_sse2; | |||||
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_sse2; | |||||
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_sse2; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_sse2; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_sse2; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; | |||||
#endif | |||||
} | |||||
if (mm_flags&AV_CPU_FLAG_SSSE3) { | |||||
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16_ssse3; | |||||
c->biweight_h264_pixels_tab[1]= ff_h264_biweight_8_ssse3; | |||||
} | |||||
if (HAVE_AVX && mm_flags&AV_CPU_FLAG_AVX) { | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_sse2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_sse2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
} | |||||
if (mm_flags & AV_CPU_FLAG_SSSE3) { | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_ssse3; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_ssse3; | |||||
} | |||||
if (HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) { | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; | |||||
#endif | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_8_avx; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_8_avx; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_avx; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_avx; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
} | |||||
} | } | ||||
} | } | ||||
} | |||||
} else if (bit_depth == 10) { | } else if (bit_depth == 10) { | ||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
if (mm_flags & AV_CPU_FLAG_MMX) { | |||||
if (mm_flags & AV_CPU_FLAG_MMX2) { | |||||
#if ARCH_X86_32 | #if ARCH_X86_32 | ||||
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_mmx2; | |||||
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_mmx2; | |||||
c->h264_v_loop_filter_luma= ff_deblock_v_luma_10_mmx2; | |||||
c->h264_h_loop_filter_luma= ff_deblock_h_luma_10_mmx2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmx2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; | |||||
#endif | |||||
c->h264_idct_dc_add= ff_h264_idct_dc_add_10_mmx2; | |||||
if (mm_flags&AV_CPU_FLAG_SSE2) { | |||||
c->h264_idct_add = ff_h264_idct_add_10_sse2; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_sse2; | |||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_mmx2; | |||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_mmx2; | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_mmx2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_mmx2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_mmx2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_mmx2; | |||||
#endif /* ARCH_X86_32 */ | |||||
c->h264_idct_dc_add = ff_h264_idct_dc_add_10_mmx2; | |||||
if (mm_flags & AV_CPU_FLAG_SSE2) { | |||||
c->h264_idct_add = ff_h264_idct_add_10_sse2; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_sse2; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_10_sse2; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_sse2; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_sse2; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; | |||||
#endif | |||||
c->h264_idct8_add = ff_h264_idct8_add_10_sse2; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_10_sse2; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; | |||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse2; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse2; | |||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse2; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; | |||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse2; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse2; | |||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse2; | |||||
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_sse2; | |||||
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_sse2; | |||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_sse2; | |||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_sse2; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; | |||||
#endif | |||||
} | |||||
if (mm_flags&AV_CPU_FLAG_SSE4) { | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; | |||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; | |||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; | |||||
} | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_sse2; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_sse2; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_sse2; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_sse2; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
} | |||||
if (mm_flags & AV_CPU_FLAG_SSE4) { | |||||
c->weight_h264_pixels_tab[0] = ff_h264_weight_16_10_sse4; | |||||
c->weight_h264_pixels_tab[1] = ff_h264_weight_8_10_sse4; | |||||
c->weight_h264_pixels_tab[2] = ff_h264_weight_4_10_sse4; | |||||
c->biweight_h264_pixels_tab[0] = ff_h264_biweight_16_10_sse4; | |||||
c->biweight_h264_pixels_tab[1] = ff_h264_biweight_8_10_sse4; | |||||
c->biweight_h264_pixels_tab[2] = ff_h264_biweight_4_10_sse4; | |||||
} | |||||
#if HAVE_AVX | #if HAVE_AVX | ||||
if (mm_flags&AV_CPU_FLAG_AVX) { | |||||
c->h264_idct_dc_add = | |||||
c->h264_idct_add = ff_h264_idct_add_10_avx; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; | |||||
c->h264_idct_add16intra= ff_h264_idct_add16intra_10_avx; | |||||
if (mm_flags & AV_CPU_FLAG_AVX) { | |||||
c->h264_idct_dc_add = | |||||
c->h264_idct_add = ff_h264_idct_add_10_avx; | |||||
c->h264_idct8_dc_add = ff_h264_idct8_dc_add_10_avx; | |||||
c->h264_idct_add16 = ff_h264_idct_add16_10_avx; | |||||
if (chroma_format_idc == 1) | |||||
c->h264_idct_add8 = ff_h264_idct_add8_10_avx; | |||||
c->h264_idct_add16intra = ff_h264_idct_add16intra_10_avx; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_idct8_add = ff_h264_idct8_add_10_avx; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; | |||||
#endif | |||||
c->h264_idct8_add = ff_h264_idct8_add_10_avx; | |||||
c->h264_idct8_add4 = ff_h264_idct8_add4_10_avx; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_10_avx; | |||||
c->h264_v_loop_filter_chroma_intra= ff_deblock_v_chroma_intra_10_avx; | |||||
c->h264_v_loop_filter_chroma = ff_deblock_v_chroma_10_avx; | |||||
c->h264_v_loop_filter_chroma_intra = ff_deblock_v_chroma_intra_10_avx; | |||||
#if HAVE_ALIGNED_STACK | #if HAVE_ALIGNED_STACK | ||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; | |||||
#endif | |||||
} | |||||
c->h264_v_loop_filter_luma = ff_deblock_v_luma_10_avx; | |||||
c->h264_h_loop_filter_luma = ff_deblock_h_luma_10_avx; | |||||
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_10_avx; | |||||
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_10_avx; | |||||
#endif /* HAVE_ALIGNED_STACK */ | |||||
} | |||||
#endif /* HAVE_AVX */ | #endif /* HAVE_AVX */ | ||||
} | |||||
} | } | ||||
} | } | ||||
} | |||||
#endif | |||||
#endif /* HAVE_YASM */ | |||||
} | } |
@@ -301,12 +301,12 @@ cglobal prores_idct_put_10, 4, 4, %1 | |||||
RET | RET | ||||
%endmacro | %endmacro | ||||
%macro SIGNEXTEND 2-3 ; dstlow, dsthigh, tmp | |||||
%if cpuflag(sse4) | |||||
%macro SIGNEXTEND 2-3 | |||||
%if cpuflag(sse4) ; dstlow, dsthigh | |||||
movhlps %2, %1 | movhlps %2, %1 | ||||
pmovsxwd %1, %1 | pmovsxwd %1, %1 | ||||
pmovsxwd %2, %2 | pmovsxwd %2, %2 | ||||
%else ; sse2 | |||||
%elif cpuflag(sse2) ; dstlow, dsthigh, tmp | |||||
pxor %3, %3 | pxor %3, %3 | ||||
pcmpgtw %3, %1 | pcmpgtw %3, %1 | ||||
mova %2, %1 | mova %2, %1 | ||||
@@ -590,7 +590,7 @@ SECTION .note.GNU-stack noalloc noexec nowrite progbits | |||||
%assign cpuflags_mmx (1<<0) | %assign cpuflags_mmx (1<<0) | ||||
%assign cpuflags_mmx2 (1<<1) | cpuflags_mmx | %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx | ||||
%assign cpuflags_3dnow (1<<2) | cpuflags_mmx | %assign cpuflags_3dnow (1<<2) | cpuflags_mmx | ||||
%assign cpuflags_3dnow2 (1<<3) | cpuflags_3dnow | |||||
%assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow | |||||
%assign cpuflags_sse (1<<4) | cpuflags_mmx2 | %assign cpuflags_sse (1<<4) | cpuflags_mmx2 | ||||
%assign cpuflags_sse2 (1<<5) | cpuflags_sse | %assign cpuflags_sse2 (1<<5) | cpuflags_sse | ||||
%assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 | %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 | ||||
@@ -1,4 +1,4 @@ | |||||
dec0deb2425e908d232d2471acff04a3 *tests/data/fate/acodec-g723_1.g723_1 | dec0deb2425e908d232d2471acff04a3 *tests/data/fate/acodec-g723_1.g723_1 | ||||
4800 tests/data/fate/acodec-g723_1.g723_1 | 4800 tests/data/fate/acodec-g723_1.g723_1 | ||||
90b20555c962b638dad0e98ac2c05b25 *tests/data/fate/acodec-g723_1.out.wav | |||||
stddev: 8418.34 PSNR: 17.82 MAXDIFF:52968 bytes: 95992/ 96000 | |||||
87fd529c9e41914f73a865d147cc9516 *tests/data/fate/acodec-g723_1.out.wav | |||||
stddev: 8425.98 PSNR: 17.82 MAXDIFF:53268 bytes: 95992/ 96000 |