Signed-off-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.0
@@ -340,6 +340,9 @@ int swri_rematrix_init(SwrContext *s){ | |||||
} | } | ||||
s->matrix_ch[i][0]= ch_in; | s->matrix_ch[i][0]= ch_in; | ||||
} | } | ||||
if(HAVE_YASM && HAVE_MMX) swri_rematrix_init_x86(s); | |||||
return 0; | return 0; | ||||
} | } | ||||
@@ -351,12 +354,19 @@ void swri_rematrix_free(SwrContext *s){ | |||||
int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){ | int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy){ | ||||
int out_i, in_i, i, j; | int out_i, in_i, i, j; | ||||
int len1 = 0; | |||||
int off = 0; | |||||
if(s->mix_any_f) { | if(s->mix_any_f) { | ||||
s->mix_any_f(out->ch, in->ch, s->native_matrix, len); | s->mix_any_f(out->ch, in->ch, s->native_matrix, len); | ||||
return 0; | return 0; | ||||
} | } | ||||
if(s->mix_2_1_simd || s->mix_1_1_simd){ | |||||
len1= len&~15; | |||||
off = len1 * out->bps; | |||||
} | |||||
av_assert0(out->ch_count == av_get_channel_layout_nb_channels(s->out_ch_layout)); | av_assert0(out->ch_count == av_get_channel_layout_nb_channels(s->out_ch_layout)); | ||||
av_assert0(in ->ch_count == av_get_channel_layout_nb_channels(s-> in_ch_layout)); | av_assert0(in ->ch_count == av_get_channel_layout_nb_channels(s-> in_ch_layout)); | ||||
@@ -369,7 +379,10 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus | |||||
case 1: | case 1: | ||||
in_i= s->matrix_ch[out_i][1]; | in_i= s->matrix_ch[out_i][1]; | ||||
if(s->matrix[out_i][in_i]!=1.0){ | if(s->matrix[out_i][in_i]!=1.0){ | ||||
s->mix_1_1_f(out->ch[out_i], in->ch[in_i], s->native_matrix, in->ch_count*out_i + in_i, len); | |||||
if(s->mix_1_1_simd && len1) | |||||
s->mix_1_1_simd(out->ch[out_i] , in->ch[in_i] , s->native_matrix, in->ch_count*out_i + in_i, len1); | |||||
if(len != len1) | |||||
s->mix_1_1_f (out->ch[out_i]+off, in->ch[in_i]+off, s->native_matrix, in->ch_count*out_i + in_i, len-len1); | |||||
}else if(mustcopy){ | }else if(mustcopy){ | ||||
memcpy(out->ch[out_i], in->ch[in_i], len*out->bps); | memcpy(out->ch[out_i], in->ch[in_i], len*out->bps); | ||||
}else{ | }else{ | ||||
@@ -379,7 +392,12 @@ int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mus | |||||
case 2: { | case 2: { | ||||
int in_i1 = s->matrix_ch[out_i][1]; | int in_i1 = s->matrix_ch[out_i][1]; | ||||
int in_i2 = s->matrix_ch[out_i][2]; | int in_i2 = s->matrix_ch[out_i][2]; | ||||
s->mix_2_1_f(out->ch[out_i], in->ch[in_i1], in->ch[in_i2], s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len); | |||||
if(s->mix_2_1_simd && len1) | |||||
s->mix_2_1_simd(out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); | |||||
else | |||||
s->mix_2_1_f (out->ch[out_i] , in->ch[in_i1] , in->ch[in_i2] , s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len1); | |||||
if(len != len1) | |||||
s->mix_2_1_f (out->ch[out_i]+off, in->ch[in_i1]+off, in->ch[in_i2]+off, s->native_matrix, in->ch_count*out_i + in_i1, in->ch_count*out_i + in_i2, len-len1); | |||||
break;} | break;} | ||||
default: | default: | ||||
if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ | if(s->int_sample_fmt == AV_SAMPLE_FMT_FLTP){ | ||||
@@ -121,6 +121,7 @@ int swri_resample_double(struct ResampleContext *c,double *dst, const double * | |||||
int swri_rematrix_init(SwrContext *s); | int swri_rematrix_init(SwrContext *s); | ||||
void swri_rematrix_free(SwrContext *s); | void swri_rematrix_free(SwrContext *s); | ||||
int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy); | int swri_rematrix(SwrContext *s, AudioData *out, AudioData *in, int len, int mustcopy); | ||||
void swri_rematrix_init_x86(struct SwrContext *s); | |||||
void swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt); | void swri_get_dither(SwrContext *s, void *dst, int len, unsigned seed, enum AVSampleFormat out_fmt, enum AVSampleFormat in_fmt); | ||||
@@ -1,2 +1,3 @@ | |||||
YASM-OBJS += x86/swresample_x86.o\ | YASM-OBJS += x86/swresample_x86.o\ | ||||
x86/audio_convert.o\ | x86/audio_convert.o\ | ||||
x86/rematrix.o\ |
@@ -0,0 +1,66 @@ | |||||
;****************************************************************************** | |||||
;* Copyright (c) 2012 Michael Niedermayer | |||||
;* | |||||
;* This file is part of FFmpeg. | |||||
;* | |||||
;* FFmpeg is free software; you can redistribute it and/or | |||||
;* modify it under the terms of the GNU Lesser General Public | |||||
;* License as published by the Free Software Foundation; either | |||||
;* version 2.1 of the License, or (at your option) any later version. | |||||
;* | |||||
;* FFmpeg is distributed in the hope that it will be useful, | |||||
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
;* Lesser General Public License for more details. | |||||
;* | |||||
;* You should have received a copy of the GNU Lesser General Public | |||||
;* License along with FFmpeg; if not, write to the Free Software | |||||
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
;****************************************************************************** | |||||
%include "libavutil/x86/x86inc.asm" | |||||
%include "libavutil/x86/x86util.asm" | |||||
SECTION .text | |||||
%macro MIX1_FLT 1 | |||||
cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len | |||||
%ifidn %1, a | |||||
test inq, mmsize-1 | |||||
jne mix_1_1_float_u_int %+ SUFFIX | |||||
test outq, mmsize-1 | |||||
jne mix_1_1_float_u_int %+ SUFFIX | |||||
%else | |||||
mix_1_1_float_u_int %+ SUFFIX | |||||
%endif | |||||
VBROADCASTSS m2, [coeffpq + 4*indexq] | |||||
shl lenq , 2 | |||||
add inq , lenq | |||||
add outq , lenq | |||||
neg lenq | |||||
.next: | |||||
%ifidn %1, a | |||||
mulps m0, m2, [inq + lenq ] | |||||
mulps m1, m2, [inq + lenq + mmsize] | |||||
%else | |||||
movu m0, [inq + lenq ] | |||||
movu m1, [inq + lenq + mmsize] | |||||
mulps m0, m0, m2 | |||||
mulps m1, m1, m2 | |||||
%endif | |||||
mov%1 [outq + lenq ], m0 | |||||
mov%1 [outq + lenq + mmsize], m1 | |||||
add lenq, mmsize*2 | |||||
jl .next | |||||
REP_RET | |||||
%endmacro | |||||
INIT_XMM sse | |||||
MIX1_FLT u | |||||
MIX1_FLT a | |||||
%if HAVE_AVX | |||||
INIT_YMM avx | |||||
MIX1_FLT u | |||||
MIX1_FLT a | |||||
%endif |
@@ -142,3 +142,35 @@ MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE, sse) | |||||
} | } | ||||
} | } | ||||
} | } | ||||
#define D(type, simd) \ | |||||
mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\ | |||||
mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd; | |||||
D(float, sse) | |||||
D(float, avx) | |||||
D(int16, mmx) | |||||
void swri_rematrix_init_x86(struct SwrContext *s){ | |||||
int mm_flags = av_get_cpu_flags(); | |||||
int nb_in = av_get_channel_layout_nb_channels(s->in_ch_layout); | |||||
int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout); | |||||
int num = nb_in * nb_out; | |||||
int i,j; | |||||
s->mix_1_1_simd = NULL; | |||||
s->mix_2_1_simd = NULL; | |||||
if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){ | |||||
} else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){ | |||||
if(mm_flags & AV_CPU_FLAG_SSE) { | |||||
s->mix_1_1_simd = ff_mix_1_1_a_float_sse; | |||||
} | |||||
if(HAVE_AVX && mm_flags & AV_CPU_FLAG_AVX) { | |||||
s->mix_1_1_simd = ff_mix_1_1_a_float_avx; | |||||
} | |||||
s->native_simd_matrix = av_mallocz(num * sizeof(float)); | |||||
memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float)); | |||||
} | |||||
} |