* qatar/master: proresdsp: port x86 assembly to cpuflags. lavr: x86: improve non-SSE4 version of S16_TO_S32_SX macro lavfi: better channel layout negotiation alac: check for truncated packets alac: reverse lpc coeff order, simplify filter lavr: add x86-optimized mixing functions x86: add support for fmaddps fma4 instruction with abstraction to avx/sse tscc2: fix typo in array index build: use COMPILE template for HOSTOBJS build: do full flag handling for all compiler-type tools eval: fix printing of NaN in eval fate test. build: Rename aandct component to more descriptive aandcttables mpegaudio: bury inline asm under HAVE_INLINE_ASM. x86inc: automatically insert vzeroupper for YMM functions. rtmp: Check the buffer length of ping packets rtmp: Allow having more unknown data at the end of a chunk size packet without failing rtmp: Prevent reading outside of an allocate buffer when receiving server bandwidth packets Conflicts: Makefile configure libavcodec/x86/proresdsp.asm libavutil/eval.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.0
| @@ -11,7 +11,7 @@ ifndef V | |||
| Q = @ | |||
| ECHO = printf "$(1)\t%s\n" $(2) | |||
| BRIEF = CC CXX AS YASM AR LD HOSTCC STRIP CP | |||
| SILENT = DEPCC YASMDEP RM RANLIB | |||
| SILENT = DEPCC DEPAS DEPHOSTCC YASMDEP RM RANLIB | |||
| MSG = $@ | |||
| M = @$(call ECHO,$(TAG),$@); | |||
| $(foreach VAR,$(BRIEF), \ | |||
| @@ -26,15 +26,16 @@ ALLFFLIBS = avcodec avdevice avfilter avformat avresample avutil postproc swscal | |||
| IFLAGS := -I. -I$(SRC_PATH)/ | |||
| CPPFLAGS := $(IFLAGS) $(CPPFLAGS) | |||
| CFLAGS += $(ECFLAGS) | |||
| CCFLAGS = $(CFLAGS) | |||
| CCFLAGS = $(CPPFLAGS) $(CFLAGS) | |||
| ASFLAGS := $(CPPFLAGS) $(ASFLAGS) | |||
| CXXFLAGS := $(CFLAGS) $(CXXFLAGS) | |||
| YASMFLAGS += $(IFLAGS) -I$(SRC_PATH)/libavutil/x86/ -Pconfig.asm | |||
| HOSTCFLAGS += $(IFLAGS) | |||
| HOSTCCFLAGS = $(IFLAGS) $(HOSTCFLAGS) | |||
| LDFLAGS := $(ALLFFLIBS:%=-Llib%) $(LDFLAGS) | |||
| define COMPILE | |||
| $($(1)DEP) | |||
| $($(1)) $(CPPFLAGS) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< | |||
| $(call $(1)DEP,$(1)) | |||
| $($(1)) $($(1)FLAGS) $($(1)_DEPFLAGS) -c $($(1)_O) $< | |||
| endef | |||
| COMPILE_C = $(call COMPILE,CC) | |||
| @@ -101,7 +102,7 @@ checkheaders: $(filter-out $(SKIPHEADERS:.h=.ho),$(ALLHEADERS:.h=.ho)) | |||
| alltools: $(TOOLS) | |||
| $(HOSTOBJS): %.o: %.c | |||
| $(HOSTCC) $(HOSTCFLAGS) -c -o $@ $< | |||
| $(call COMPILE,HOSTCC) | |||
| $(HOSTPROGS): %$(HOSTEXESUF): %.o | |||
| $(HOSTCC) $(HOSTLDFLAGS) -o $@ $< $(HOSTLIBS) | |||
| @@ -117,4 +118,4 @@ CLEANSUFFIXES = *.d *.o *~ *.ho *.map *.ver *.gcno *.gcda | |||
| DISTCLEANSUFFIXES = *.pc | |||
| LIBSUFFIXES = *.a *.lib *.so *.so.* *.dylib *.dll *.def *.dll.a | |||
| -include $(wildcard $(OBJS:.o=.d) $(TESTOBJS:.o=.d)) | |||
| -include $(wildcard $(OBJS:.o=.d) $(HOSTOBJS:.o=.d) $(TESTOBJS:.o=.d)) | |||
| @@ -265,6 +265,7 @@ Optimization options (experts only): | |||
| --disable-sse disable SSE optimizations | |||
| --disable-ssse3 disable SSSE3 optimizations | |||
| --disable-avx disable AVX optimizations | |||
| --disable-fma4 disable FMA4 optimizations | |||
| --disable-armv5te disable armv5te optimizations | |||
| --disable-armv6 disable armv6 optimizations | |||
| --disable-armv6t2 disable armv6t2 optimizations | |||
| @@ -1173,6 +1174,7 @@ ARCH_EXT_LIST=' | |||
| armv6t2 | |||
| armvfp | |||
| avx | |||
| fma4 | |||
| mmi | |||
| mmx | |||
| mmx2 | |||
| @@ -1336,7 +1338,7 @@ HAVE_LIST=" | |||
| # options emitted with CONFIG_ prefix but not available on command line | |||
| CONFIG_EXTRA=" | |||
| aandct | |||
| aandcttables | |||
| avutil | |||
| golomb | |||
| gplv3 | |||
| @@ -1450,6 +1452,7 @@ mmx2_deps="mmx" | |||
| sse_deps="mmx" | |||
| ssse3_deps="sse" | |||
| avx_deps="ssse3" | |||
| fma4_deps="avx" | |||
| aligned_stack_if_any="ppc x86" | |||
| fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" | |||
| @@ -1477,7 +1480,7 @@ ac3_fixed_encoder_select="mdct ac3dsp" | |||
| alac_encoder_select="lpc" | |||
| amrnb_decoder_select="lsp" | |||
| amrwb_decoder_select="lsp" | |||
| amv_encoder_select="aandct" | |||
| amv_encoder_select="aandcttables" | |||
| atrac1_decoder_select="mdct sinewin" | |||
| atrac3_decoder_select="mdct" | |||
| binkaudio_dct_decoder_select="mdct rdft dct sinewin" | |||
| @@ -1487,13 +1490,13 @@ cook_decoder_select="mdct sinewin" | |||
| cscd_decoder_suggest="zlib" | |||
| dca_decoder_select="mdct" | |||
| dirac_decoder_select="dwt golomb" | |||
| dnxhd_encoder_select="aandct" | |||
| dnxhd_encoder_select="aandcttables" | |||
| dxa_decoder_select="zlib" | |||
| eac3_decoder_select="ac3_decoder" | |||
| eac3_encoder_select="mdct ac3dsp" | |||
| eamad_decoder_select="aandct" | |||
| eatgq_decoder_select="aandct" | |||
| eatqi_decoder_select="aandct" | |||
| eamad_decoder_select="aandcttables" | |||
| eatgq_decoder_select="aandcttables" | |||
| eatqi_decoder_select="aandcttables" | |||
| exr_decoder_select="zlib" | |||
| ffv1_decoder_select="golomb" | |||
| flac_decoder_select="golomb" | |||
| @@ -1505,9 +1508,9 @@ flashsv2_decoder_select="zlib" | |||
| flv_decoder_select="h263_decoder" | |||
| flv_encoder_select="h263_encoder" | |||
| fraps_decoder_select="huffman" | |||
| h261_encoder_select="aandct" | |||
| h261_encoder_select="aandcttables" | |||
| h263_decoder_select="h263_parser" | |||
| h263_encoder_select="aandct" | |||
| h263_encoder_select="aandcttables" | |||
| h263_vaapi_hwaccel_select="vaapi h263_decoder" | |||
| h263i_decoder_select="h263_decoder" | |||
| h263p_encoder_select="h263_encoder" | |||
| @@ -1523,9 +1526,9 @@ iac_decoder_select="fft mdct sinewin" | |||
| imc_decoder_select="fft mdct sinewin" | |||
| jpegls_decoder_select="golomb" | |||
| jpegls_encoder_select="golomb" | |||
| ljpeg_encoder_select="aandct" | |||
| ljpeg_encoder_select="aandcttables" | |||
| loco_decoder_select="golomb" | |||
| mjpeg_encoder_select="aandct" | |||
| mjpeg_encoder_select="aandcttables" | |||
| mlp_decoder_select="mlp_parser" | |||
| mp1_decoder_select="mpegaudiodsp" | |||
| mp1float_decoder_select="mpegaudiodsp" | |||
| @@ -1544,13 +1547,13 @@ mpeg_xvmc_decoder_deps="X11_extensions_XvMClib_h" | |||
| mpeg_xvmc_decoder_select="mpegvideo_decoder" | |||
| mpeg1_vdpau_decoder_select="vdpau mpeg1video_decoder" | |||
| mpeg1_vdpau_hwaccel_select="vdpau mpeg1video_decoder" | |||
| mpeg1video_encoder_select="aandct" | |||
| mpeg1video_encoder_select="aandcttables" | |||
| mpeg2_crystalhd_decoder_select="crystalhd" | |||
| mpeg2_dxva2_hwaccel_deps="dxva2api_h" | |||
| mpeg2_dxva2_hwaccel_select="dxva2 mpeg2video_decoder" | |||
| mpeg2_vdpau_hwaccel_select="vdpau mpeg2video_decoder" | |||
| mpeg2_vaapi_hwaccel_select="vaapi mpeg2video_decoder" | |||
| mpeg2video_encoder_select="aandct" | |||
| mpeg2video_encoder_select="aandcttables" | |||
| mpeg4_crystalhd_decoder_select="crystalhd" | |||
| mpeg4_decoder_select="h263_decoder mpeg4video_parser" | |||
| mpeg4_encoder_select="h263_encoder" | |||
| @@ -1580,11 +1583,11 @@ rv40_decoder_select="golomb h264chroma h264pred h264qpel" | |||
| shorten_decoder_select="golomb" | |||
| sipr_decoder_select="lsp" | |||
| snow_decoder_select="dwt" | |||
| snow_encoder_select="aandct dwt" | |||
| snow_encoder_select="aandcttables dwt" | |||
| sonic_decoder_select="golomb" | |||
| sonic_encoder_select="golomb" | |||
| sonic_ls_encoder_select="golomb" | |||
| svq1_encoder_select="aandct" | |||
| svq1_encoder_select="aandcttables" | |||
| svq3_decoder_select="golomb h264chroma h264dsp h264pred h264qpel" | |||
| svq3_decoder_suggest="zlib" | |||
| theora_decoder_select="vp3_decoder" | |||
| @@ -1965,6 +1968,8 @@ ldflags_filter=echo | |||
| AS_O='-o $@' | |||
| CC_O='-o $@' | |||
| CXX_O='-o $@' | |||
| LD_O='-o $@' | |||
| HOSTCC_O='-o $@' | |||
| host_cflags='-D_ISOC99_SOURCE -D_XOPEN_SOURCE=600 -O3 -g' | |||
| host_libs='-lm' | |||
| @@ -1975,8 +1980,8 @@ target_path='$(CURDIR)' | |||
| # since the object filename is not given with the -MM flag, the compiler | |||
| # is only able to print the basename, and we must add the path ourselves | |||
| DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)' | |||
| DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -MM' | |||
| DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "/^\#.*/d" -e "s,^[[:space:]]*$(*F)\\.o,$(@D)/$(*F).o," > $(@:.o=.d)' | |||
| DEPFLAGS='-MM' | |||
| # find source path | |||
| if test -f configure; then | |||
| @@ -2319,120 +2324,150 @@ tms470_flags(){ | |||
| done | |||
| } | |||
| if $cc -v 2>&1 | grep -q '^gcc.*LLVM'; then | |||
| cc_type=llvm_gcc | |||
| gcc_extra_ver=$(expr "$($cc --version | head -n1)" : '.*\((.*)\)') | |||
| cc_ident="llvm-gcc $($cc -dumpversion) $gcc_extra_ver" | |||
| CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| cflags_speed='-O3' | |||
| cflags_size='-Os' | |||
| elif $cc -v 2>&1 | grep -qi ^gcc; then | |||
| cc_type=gcc | |||
| gcc_version=$($cc --version | head -n1) | |||
| gcc_basever=$($cc -dumpversion) | |||
| gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)') | |||
| gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)") | |||
| cc_ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver") | |||
| if ! $cc -dumpversion | grep -q '^2\.'; then | |||
| CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| probe_cc(){ | |||
| pfx=$1 | |||
| _cc=$2 | |||
| unset _type _ident _cc_o _flags _cflags _ldflags _depflags _DEPCMD _DEPFLAGS | |||
| _flags_filter=echo | |||
| if $_cc -v 2>&1 | grep -q '^gcc.*LLVM'; then | |||
| _type=llvm_gcc | |||
| gcc_extra_ver=$(expr "$($_cc --version | head -n1)" : '.*\((.*)\)') | |||
| _ident="llvm-gcc $($_cc -dumpversion) $gcc_extra_ver" | |||
| _depflags='-MMD -MF $(@:.o=.d) -MT $@' | |||
| _cflags_speed='-O3' | |||
| _cflags_size='-Os' | |||
| elif $_cc -v 2>&1 | grep -qi ^gcc; then | |||
| _type=gcc | |||
| gcc_version=$($_cc --version | head -n1) | |||
| gcc_basever=$($_cc -dumpversion) | |||
| gcc_pkg_ver=$(expr "$gcc_version" : '[^ ]* \(([^)]*)\)') | |||
| gcc_ext_ver=$(expr "$gcc_version" : ".*$gcc_pkg_ver $gcc_basever \\(.*\\)") | |||
| _ident=$(cleanws "gcc $gcc_basever $gcc_pkg_ver $gcc_ext_ver") | |||
| if ! $_cc -dumpversion | grep -q '^2\.'; then | |||
| _depflags='-MMD -MF $(@:.o=.d) -MT $@' | |||
| fi | |||
| _cflags_speed='-O3' | |||
| _cflags_size='-Os' | |||
| elif $_cc --version 2>/dev/null | grep -q Intel; then | |||
| _type=icc | |||
| _ident=$($_cc --version | head -n1) | |||
| _depflags='-MMD' | |||
| _cflags_speed='-O3' | |||
| _cflags_size='-Os' | |||
| _cflags_noopt='-O1' | |||
| elif $_cc -v 2>&1 | grep -q xlc; then | |||
| _type=xlc | |||
| _ident=$($_cc -qversion 2>/dev/null | head -n1) | |||
| _cflags_speed='-O5' | |||
| _cflags_size='-O5 -qcompact' | |||
| elif $_cc -V 2>/dev/null | grep -q Compaq; then | |||
| _type=ccc | |||
| _ident=$($_cc -V | head -n1 | cut -d' ' -f1-3) | |||
| _DEPFLAGS='-M' | |||
| debuglevel=3 | |||
| _ldflags='-Wl,-z,now' # calls to libots crash without this | |||
| _cflags_speed='-fast' | |||
| _cflags_size='-O1' | |||
| elif $_cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then | |||
| test -d "$sysroot" || die "No valid sysroot specified." | |||
| _type=armcc | |||
| _ident=$($_cc --vsn | head -n1) | |||
| armcc_conf="$PWD/armcc.conf" | |||
| $_cc --arm_linux_configure \ | |||
| --arm_linux_config_file="$armcc_conf" \ | |||
| --configure_sysroot="$sysroot" \ | |||
| --configure_cpp_headers="$sysinclude" >>$logfile 2>&1 || | |||
| die "Error creating armcc configuration file." | |||
| $_cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc | |||
| _flags="--arm_linux_config_file=$armcc_conf --translate_gcc" | |||
| as_default="${cross_prefix}gcc" | |||
| _depflags='-MMD' | |||
| _cflags_speed='-O3' | |||
| _cflags_size='-Os' | |||
| elif $_cc -version 2>/dev/null | grep -q TMS470; then | |||
| _type=tms470 | |||
| _ident=$($_cc -version | head -n1 | tr -s ' ') | |||
| _flags='--gcc --abi=eabi -me' | |||
| _cflags='-D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__=' | |||
| _cc_o='-fe=$@' | |||
| as_default="${cross_prefix}gcc" | |||
| ld_default="${cross_prefix}gcc" | |||
| _depflags='-ppa -ppd=$(@:.o=.d)' | |||
| _cflags_speed='-O3 -mf=5' | |||
| _cflags_size='-O3 -mf=2' | |||
| _flags_filter=tms470_flags | |||
| elif $_cc -v 2>&1 | grep -q clang; then | |||
| _type=clang | |||
| _ident=$($_cc --version | head -n1) | |||
| _depflags='-MMD' | |||
| _cflags_speed='-O3' | |||
| _cflags_size='-Os' | |||
| elif $_cc -V 2>&1 | grep -q Sun; then | |||
| _type=suncc | |||
| _ident=$($_cc -V 2>&1 | head -n1 | cut -d' ' -f 2-) | |||
| _DEPCMD='$(DEP$(1)) $(DEP$(1)FLAGS) $($(1)DEP_FLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)' | |||
| _DEPFLAGS='-xM1' | |||
| _ldflags='-std=c99' | |||
| _cflags_speed='-O5' | |||
| _cflags_size='-O5 -xspace' | |||
| _flags_filter=suncc_flags | |||
| elif $_cc -v 2>&1 | grep -q 'PathScale\|Path64'; then | |||
| _type=pathscale | |||
| _ident=$($_cc -v 2>&1 | head -n1 | tr -d :) | |||
| _depflags='-MMD -MF $(@:.o=.d) -MT $@' | |||
| _cflags_speed='-O2' | |||
| _cflags_size='-Os' | |||
| _flags_filter='filter_out -Wdisabled-optimization' | |||
| elif $_cc -v 2>&1 | grep -q Open64; then | |||
| _type=open64 | |||
| _ident=$($_cc -v 2>&1 | head -n1 | tr -d :) | |||
| _depflags='-MMD -MF $(@:.o=.d) -MT $@' | |||
| _cflags_speed='-O2' | |||
| _cflags_size='-Os' | |||
| _flags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros' | |||
| elif $_cc -V 2>&1 | grep -q Portland; then | |||
| _type=pgi | |||
| _ident="PGI $($_cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')" | |||
| opt_common='-alias=ansi -Mlre -Mpre' | |||
| _cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common" | |||
| _cflags_size="-O2 -Munroll=c:1 $opt_common" | |||
| _cflags_noopt="-O1" | |||
| _flags_filter=pgi_flags | |||
| fi | |||
| cflags_speed='-O3' | |||
| cflags_size='-Os' | |||
| elif $cc --version 2>/dev/null | grep -q Intel; then | |||
| cc_type=icc | |||
| cc_ident=$($cc --version | head -n1) | |||
| CC_DEPFLAGS='-MMD' | |||
| AS_DEPFLAGS='-MMD' | |||
| cflags_speed='-O3' | |||
| cflags_size='-Os' | |||
| cflags_noopt='-O1' | |||
| elif $cc -v 2>&1 | grep -q xlc; then | |||
| cc_type=xlc | |||
| cc_ident=$($cc -qversion 2>/dev/null | head -n1) | |||
| cflags_speed='-O5' | |||
| cflags_size='-O5 -qcompact' | |||
| elif $cc -V 2>/dev/null | grep -q Compaq; then | |||
| cc_type=ccc | |||
| cc_ident=$($cc -V | head -n1 | cut -d' ' -f1-3) | |||
| DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -M' | |||
| debuglevel=3 | |||
| add_ldflags -Wl,-z,now # calls to libots crash without this | |||
| cflags_speed='-fast' | |||
| cflags_size='-O1' | |||
| elif $cc --vsn 2>/dev/null | grep -q "ARM C/C++ Compiler"; then | |||
| test -d "$sysroot" || die "No valid sysroot specified." | |||
| cc_type=armcc | |||
| cc_ident=$($cc --vsn | head -n1) | |||
| armcc_conf="$PWD/armcc.conf" | |||
| $cc --arm_linux_configure \ | |||
| --arm_linux_config_file="$armcc_conf" \ | |||
| --configure_sysroot="$sysroot" \ | |||
| --configure_cpp_headers="$sysinclude" >>$logfile 2>&1 || | |||
| die "Error creating armcc configuration file." | |||
| $cc --vsn | grep -q RVCT && armcc_opt=rvct || armcc_opt=armcc | |||
| cc="$cc --arm_linux_config_file=$armcc_conf --translate_gcc" | |||
| as_default="${cross_prefix}gcc" | |||
| CC_DEPFLAGS='-MMD' | |||
| AS_DEPFLAGS='-MMD' | |||
| cflags_speed='-O3' | |||
| cflags_size='-Os' | |||
| asflags_filter="filter_out -W${armcc_opt}*" | |||
| elif $cc -version 2>/dev/null | grep -q TMS470; then | |||
| cc_type=tms470 | |||
| cc_ident=$($cc -version | head -n1 | tr -s ' ') | |||
| cc="$cc --gcc --abi=eabi -me" | |||
| CC_O='-fe=$@' | |||
| as_default="${cross_prefix}gcc" | |||
| ld_default="${cross_prefix}gcc" | |||
| add_cflags -D__gnuc_va_list=va_list -D__USER_LABEL_PREFIX__= | |||
| CC_DEPFLAGS='-ppa -ppd=$(@:.o=.d)' | |||
| AS_DEPFLAGS='-MMD' | |||
| cflags_speed='-O3 -mf=5' | |||
| cflags_size='-O3 -mf=2' | |||
| cflags_filter=tms470_flags | |||
| elif $cc -v 2>&1 | grep -q clang; then | |||
| cc_type=clang | |||
| cc_ident=$($cc --version | head -n1) | |||
| CC_DEPFLAGS='-MMD' | |||
| AS_DEPFLAGS='-MMD' | |||
| cflags_speed='-O3' | |||
| cflags_size='-Os' | |||
| elif $cc -V 2>&1 | grep -q Sun; then | |||
| cc_type=suncc | |||
| cc_ident=$($cc -V 2>&1 | head -n1 | cut -d' ' -f 2-) | |||
| DEPEND_CMD='$(DEPCC) $(DEPFLAGS) $< | sed -e "1s,^.*: ,$@: ," -e "\$$!s,\$$, \\\," -e "1!s,^.*: , ," > $(@:.o=.d)' | |||
| DEPFLAGS='$(CPPFLAGS) $(CFLAGS) -xM1' | |||
| add_ldflags -xc99 | |||
| cflags_speed='-O5' | |||
| cflags_size='-O5 -xspace' | |||
| cflags_filter=suncc_flags | |||
| elif $cc -v 2>&1 | grep -q 'PathScale\|Path64'; then | |||
| cc_type=pathscale | |||
| cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :) | |||
| CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| cflags_speed='-O2' | |||
| cflags_size='-Os' | |||
| cflags_filter='filter_out -Wdisabled-optimization' | |||
| elif $cc -v 2>&1 | grep -q Open64; then | |||
| cc_type=open64 | |||
| cc_ident=$($cc -v 2>&1 | head -n1 | tr -d :) | |||
| CC_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| AS_DEPFLAGS='-MMD -MF $(@:.o=.d) -MT $@' | |||
| cflags_speed='-O2' | |||
| cflags_size='-Os' | |||
| cflags_filter='filter_out -Wdisabled-optimization|-Wtype-limits|-fno-signed-zeros' | |||
| elif $cc -V 2>&1 | grep -q Portland; then | |||
| cc_type=pgi | |||
| cc_ident="PGI $($cc -V 2>&1 | awk '/^pgcc/ { print $2; exit }')" | |||
| opt_common='-alias=ansi -Mlre -Mpre' | |||
| cflags_speed="-O3 -Mautoinline -Munroll=c:4 $opt_common" | |||
| cflags_size="-O2 -Munroll=c:1 $opt_common" | |||
| cflags_noopt="-O1" | |||
| cflags_filter=pgi_flags | |||
| fi | |||
| eval ${pfx}_type=\$_type | |||
| eval ${pfx}_ident=\$_ident | |||
| } | |||
| set_ccvars(){ | |||
| eval ${1}_O=\${_cc_o-\${${1}_O}} | |||
| if [ -n "$_depflags" ]; then | |||
| eval ${1}_DEPFLAGS=\$_depflags | |||
| else | |||
| eval ${1}DEP=\${_DEPCMD:-\$DEPCMD} | |||
| eval ${1}DEP_FLAGS=\${_DEPFLAGS:-\$DEPFLAGS} | |||
| eval DEP${1}FLAGS=\$_flags | |||
| fi | |||
| } | |||
| probe_cc cc "$cc" | |||
| cflags_filter=$_flags_filter | |||
| cflags_speed=$_cflags_speed | |||
| cflags_size=$_cflags_size | |||
| cflags_noopt=$_cflags_noopt | |||
| add_cflags $_flags $_cflags | |||
| cc_ldflags=$_ldflags | |||
| set_ccvars CC | |||
| probe_cc hostcc "$host_cc" | |||
| host_cflags_filter=$_flags_filter | |||
| host_ldflags_filter=$_flags_filter | |||
| add_host_cflags $_flags $_cflags | |||
| add_host_ldflags $_flags $_ldflags | |||
| set_ccvars HOSTCC | |||
| test -n "$cc_type" && enable $cc_type || | |||
| warn "Unknown C compiler $cc, unable to select optimal CFLAGS" | |||
| @@ -2442,9 +2477,23 @@ test -n "$cc_type" && enable $cc_type || | |||
| : ${ld_default:=$cc} | |||
| set_default ar as dep_cc ld | |||
| test -n "$CC_DEPFLAGS" || CCDEP=$DEPEND_CMD | |||
| test -n "$CXX_DEPFLAGS" || CXXDEP=$DEPEND_CMD | |||
| test -n "$AS_DEPFLAGS" || ASDEP=$DEPEND_CMD | |||
| probe_cc as "$as" | |||
| asflags_filter=$_flags_filter | |||
| add_asflags $_flags $_cflags | |||
| set_ccvars AS | |||
| probe_cc ld "$ld" | |||
| ldflags_filter=$_flags_filter | |||
| add_ldflags $_flags $_ldflags | |||
| test "$cc_type" != "$ld_type" && add_ldflags $cc_ldflags | |||
| LD_O=${_cc_o-$LD_O} | |||
| if [ -z "$CC_DEPFLAGS" ] && [ "$dep_cc" != "$cc" ]; then | |||
| probe_cc depcc "$dep_cc" | |||
| CCDEP=${_DEPCMD:-$DEPCMD} | |||
| CCDEP_FLAGS=${_DEPFLAGS:=$DEPFLAGS} | |||
| DEPCCFLAGS=$_flags | |||
| fi | |||
| add_cflags $extra_cflags | |||
| add_cxxflags $extra_cxxflags | |||
| @@ -3140,6 +3189,7 @@ EOF | |||
| check_yasm "pextrd [eax], xmm0, 1" && enable yasm || | |||
| die "yasm not found, use --disable-yasm for a crippled build" | |||
| check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx | |||
| check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 | |||
| fi | |||
| case "$cpu" in | |||
| @@ -3673,6 +3723,7 @@ if enabled x86; then | |||
| echo "SSE enabled ${sse-no}" | |||
| echo "SSSE3 enabled ${ssse3-no}" | |||
| echo "AVX enabled ${avx-no}" | |||
| echo "FMA4 enabled ${fma4-no}" | |||
| echo "CMOV enabled ${cmov-no}" | |||
| echo "CMOV is fast ${fast_cmov-no}" | |||
| echo "EBX available ${ebx_available-no}" | |||
| @@ -3814,6 +3865,9 @@ CXX=$cxx | |||
| AS=$as | |||
| LD=$ld | |||
| DEPCC=$dep_cc | |||
| DEPCCFLAGS=$DEPCCFLAGS \$(CPPFLAGS) | |||
| DEPAS=$as | |||
| DEPASFLAGS=$DEPASFLAGS \$(CPPFLAGS) | |||
| YASM=$yasmexe | |||
| YASMDEP=$yasmexe | |||
| AR=$ar | |||
| @@ -3825,9 +3879,10 @@ CPPFLAGS=$CPPFLAGS | |||
| CFLAGS=$CFLAGS | |||
| CXXFLAGS=$CXXFLAGS | |||
| ASFLAGS=$ASFLAGS | |||
| AS_O=$CC_O | |||
| AS_O=$AS_O | |||
| CC_O=$CC_O | |||
| CXX_O=$CXX_O | |||
| LD_O=$LD_O | |||
| LDFLAGS=$LDFLAGS | |||
| FFSERVERLDFLAGS=$FFSERVERLDFLAGS | |||
| SHFLAGS=$SHFLAGS | |||
| @@ -3842,10 +3897,11 @@ SLIBPREF=$SLIBPREF | |||
| SLIBSUF=$SLIBSUF | |||
| EXESUF=$EXESUF | |||
| EXTRA_VERSION=$extra_version | |||
| DEPFLAGS=$DEPFLAGS | |||
| CCDEP=$CCDEP | |||
| CXXDEP=$CXXDEP | |||
| CCDEP_FLAGS=$CCDEP_FLAGS | |||
| ASDEP=$ASDEP | |||
| ASDEP_FLAGS=$ASDEP_FLAGS | |||
| CC_DEPFLAGS=$CC_DEPFLAGS | |||
| AS_DEPFLAGS=$AS_DEPFLAGS | |||
| HOSTCC=$host_cc | |||
| @@ -3853,6 +3909,12 @@ HOSTCFLAGS=$host_cflags | |||
| HOSTEXESUF=$HOSTEXESUF | |||
| HOSTLDFLAGS=$host_ldflags | |||
| HOSTLIBS=$host_libs | |||
| DEPHOSTCC=$host_cc | |||
| DEPHOSTCCFLAGS=$DEPHOSTCCFLAGS \$(HOSTCCFLAGS) | |||
| HOSTCCDEP=$HOSTCCDEP | |||
| HOSTCCDEP_FLAGS=$HOSTCCDEP_FLAGS | |||
| HOSTCC_DEPFLAGS=$HOSTCC_DEPFLAGS | |||
| HOSTCC_O=$HOSTCC_O | |||
| TARGET_EXEC=$target_exec | |||
| TARGET_PATH=$target_path | |||
| SDL_LIBS=$sdl_libs | |||
| @@ -28,8 +28,6 @@ doc/%.txt: doc/%.texi | |||
| $(Q)$(TEXIDEP) | |||
| $(M)makeinfo --force --no-headers -o $@ $< 2>/dev/null | |||
| doc/print_options.o: libavformat/options_table.h libavcodec/options_table.h | |||
| GENTEXI = format codec | |||
| GENTEXI := $(GENTEXI:%=doc/avoptions_%.texi) | |||
| @@ -32,7 +32,7 @@ OBJS = allcodecs.o \ | |||
| utils.o \ | |||
| # parts needed for many different codecs | |||
| OBJS-$(CONFIG_AANDCT) += aandcttab.o | |||
| OBJS-$(CONFIG_AANDCTTABLES) += aandcttab.o | |||
| OBJS-$(CONFIG_AC3DSP) += ac3dsp.o | |||
| OBJS-$(CONFIG_CRYSTALHD) += crystalhd.o | |||
| OBJS-$(CONFIG_ENCODERS) += faandct.o jfdctfst.o jfdctint.o | |||
| @@ -200,6 +200,7 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out, | |||
| int lpc_order, int lpc_quant) | |||
| { | |||
| int i; | |||
| int32_t *pred = buffer_out; | |||
| /* first sample always copies */ | |||
| *buffer_out = *error_buffer; | |||
| @@ -223,37 +224,35 @@ static void lpc_prediction(int32_t *error_buffer, int32_t *buffer_out, | |||
| } | |||
| /* read warm-up samples */ | |||
| for (i = 0; i < lpc_order; i++) { | |||
| buffer_out[i + 1] = sign_extend(buffer_out[i] + error_buffer[i + 1], | |||
| bps); | |||
| } | |||
| for (i = 1; i <= lpc_order; i++) | |||
| buffer_out[i] = sign_extend(buffer_out[i - 1] + error_buffer[i], bps); | |||
| /* NOTE: 4 and 8 are very common cases that could be optimized. */ | |||
| for (i = lpc_order; i < nb_samples - 1; i++) { | |||
| for (; i < nb_samples; i++) { | |||
| int j; | |||
| int val = 0; | |||
| int error_val = error_buffer[i + 1]; | |||
| int error_val = error_buffer[i]; | |||
| int error_sign; | |||
| int d = buffer_out[i - lpc_order]; | |||
| int d = *pred++; | |||
| /* LPC prediction */ | |||
| for (j = 0; j < lpc_order; j++) | |||
| val += (buffer_out[i - j] - d) * lpc_coefs[j]; | |||
| val += (pred[j] - d) * lpc_coefs[j]; | |||
| val = (val + (1 << (lpc_quant - 1))) >> lpc_quant; | |||
| val += d + error_val; | |||
| buffer_out[i + 1] = sign_extend(val, bps); | |||
| buffer_out[i] = sign_extend(val, bps); | |||
| /* adapt LPC coefficients */ | |||
| error_sign = sign_only(error_val); | |||
| if (error_sign) { | |||
| for (j = lpc_order - 1; j >= 0 && error_val * error_sign > 0; j--) { | |||
| for (j = 0; j < lpc_order && error_val * error_sign > 0; j++) { | |||
| int sign; | |||
| val = d - buffer_out[i - j]; | |||
| val = d - pred[j]; | |||
| sign = sign_only(val) * error_sign; | |||
| lpc_coefs[j] -= sign; | |||
| val *= sign; | |||
| error_val -= (val >> lpc_quant) * (lpc_order - j); | |||
| error_val -= (val >> lpc_quant) * (j + 1); | |||
| } | |||
| } | |||
| } | |||
| @@ -356,7 +355,7 @@ static int decode_element(AVCodecContext *avctx, void *data, int ch_index, | |||
| lpc_order[ch] = get_bits(&alac->gb, 5); | |||
| /* read the predictor table */ | |||
| for (i = 0; i < lpc_order[ch]; i++) | |||
| for (i = lpc_order[ch] - 1; i >= 0; i--) | |||
| lpc_coefs[ch][i] = get_sbits(&alac->gb, 16); | |||
| } | |||
| @@ -477,16 +476,19 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data, | |||
| ALACContext *alac = avctx->priv_data; | |||
| enum RawDataBlockType element; | |||
| int channels; | |||
| int ch, ret; | |||
| int ch, ret, got_end; | |||
| init_get_bits(&alac->gb, avpkt->data, avpkt->size * 8); | |||
| got_end = 0; | |||
| alac->nb_samples = 0; | |||
| ch = 0; | |||
| while (get_bits_left(&alac->gb)) { | |||
| while (get_bits_left(&alac->gb) >= 3) { | |||
| element = get_bits(&alac->gb, 3); | |||
| if (element == TYPE_END) | |||
| if (element == TYPE_END) { | |||
| got_end = 1; | |||
| break; | |||
| } | |||
| if (element > TYPE_CPE && element != TYPE_LFE) { | |||
| av_log(avctx, AV_LOG_ERROR, "syntax element unsupported: %d", element); | |||
| return AVERROR_PATCHWELCOME; | |||
| @@ -501,11 +503,15 @@ static int alac_decode_frame(AVCodecContext *avctx, void *data, | |||
| ret = decode_element(avctx, data, | |||
| alac_channel_layout_offsets[alac->channels - 1][ch], | |||
| channels); | |||
| if (ret < 0) | |||
| if (ret < 0 && get_bits_left(&alac->gb)) | |||
| return ret; | |||
| ch += channels; | |||
| } | |||
| if (!got_end) { | |||
| av_log(avctx, AV_LOG_ERROR, "no end tag found. incomplete packet.\n"); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| if (avpkt->size * 8 - get_bits_count(&alac->gb) > 8) { | |||
| av_log(avctx, AV_LOG_ERROR, "Error : %d bits left\n", | |||
| @@ -298,8 +298,8 @@ static int tscc2_decode_frame(AVCodecContext *avctx, void *data, | |||
| if (!size) { | |||
| int skip_row = 1, j, off = i * c->mb_width; | |||
| for (j = 0; j < c->mb_width; j++) { | |||
| if (c->slice_quants[off + i] == 1 || | |||
| c->slice_quants[off + i] == 2) { | |||
| if (c->slice_quants[off + j] == 1 || | |||
| c->slice_quants[off + j] == 2) { | |||
| skip_row = 0; | |||
| break; | |||
| } | |||
| @@ -1158,12 +1158,7 @@ ALIGN 16 | |||
| add src1q, 2*mmsize | |||
| sub lenq, 2*mmsize | |||
| jge .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -1193,12 +1188,7 @@ ALIGN 16 | |||
| sub lenq, 2*mmsize | |||
| jge .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -1243,10 +1233,6 @@ cglobal butterflies_float_interleave, 4,4,3, dst, src0, src1, len | |||
| %endif | |||
| add lenq, mmsize | |||
| jl .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %endif | |||
| .end: | |||
| REP_RET | |||
| %endmacro | |||
| @@ -750,9 +750,6 @@ section .text | |||
| ; The others pass args in registers and don't spill anything. | |||
| cglobal fft_dispatch%2, 2,5,8, z, nbits | |||
| FFT_DISPATCH fullsuffix, nbits | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| RET | |||
| %endmacro ; DECL_FFT | |||
| @@ -957,9 +954,6 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i | |||
| %1 r0, r1, r6, rtcos, rtsin | |||
| %if ARCH_X86_64 == 0 | |||
| add esp, 12 | |||
| %endif | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| RET | |||
| %endmacro | |||
| @@ -36,6 +36,8 @@ void ff_four_imdct36_float_avx(float *out, float *buf, float *in, float *win, | |||
| DECLARE_ALIGNED(16, static float, mdct_win_sse)[2][4][4*40]; | |||
| #if HAVE_INLINE_ASM | |||
| #define MACS(rt, ra, rb) rt+=(ra)*(rb) | |||
| #define MLSS(rt, ra, rb) rt-=(ra)*(rb) | |||
| @@ -178,6 +180,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out, | |||
| *out = sum; | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| #define DECL_IMDCT_BLOCKS(CPU1, CPU2) \ | |||
| static void imdct36_blocks_ ## CPU1(float *out, float *buf, float *in, \ | |||
| @@ -241,9 +244,11 @@ void ff_mpadsp_init_mmx(MPADSPContext *s) | |||
| } | |||
| } | |||
| #if HAVE_INLINE_ASM | |||
| if (mm_flags & AV_CPU_FLAG_SSE2) { | |||
| s->apply_window_float = apply_window_mp3; | |||
| } | |||
| #endif /* HAVE_INLINE_ASM */ | |||
| #if HAVE_YASM | |||
| if (0) { | |||
| #if HAVE_AVX | |||
| @@ -83,8 +83,7 @@ section .text align=16 | |||
| ; %1 = row or col (for rounding variable) | |||
| ; %2 = number of bits to shift at the end | |||
| ; %3 = optimization | |||
| %macro IDCT_1D 3 | |||
| %macro IDCT_1D 2 | |||
| ; a0 = (W4 * row[0]) + (1 << (15 - 1)); | |||
| ; a1 = a0; | |||
| ; a2 = a0; | |||
| @@ -235,8 +234,8 @@ section .text align=16 | |||
| ; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride, | |||
| ; DCTELEM *block, const int16_t *qmat); | |||
| %macro idct_put_fn 2 | |||
| cglobal prores_idct_put_10_%1, 4, 4, %2 | |||
| %macro idct_put_fn 1 | |||
| cglobal prores_idct_put_10, 4, 4, %1 | |||
| movsxd r1, r1d | |||
| pxor m15, m15 ; zero | |||
| @@ -252,7 +251,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 | |||
| pmullw m13,[r3+64] | |||
| pmullw m12,[r3+96] | |||
| IDCT_1D row, 15, %1 | |||
| IDCT_1D row, 15 | |||
| ; transpose for second part of IDCT | |||
| TRANSPOSE8x8W 8, 0, 1, 2, 4, 11, 9, 10, 3 | |||
| @@ -267,7 +266,7 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 | |||
| ; for (i = 0; i < 8; i++) | |||
| ; idctSparseColAdd(dest + i, line_size, block + i); | |||
| IDCT_1D col, 18, %1 | |||
| IDCT_1D col, 18 | |||
| ; clip/store | |||
| mova m3, [pw_4] | |||
| @@ -302,13 +301,27 @@ cglobal prores_idct_put_10_%1, 4, 4, %2 | |||
| RET | |||
| %endmacro | |||
| INIT_XMM | |||
| idct_put_fn sse2, 16 | |||
| INIT_XMM | |||
| idct_put_fn sse4, 16 | |||
| %macro SIGNEXTEND 2-3 ; dstlow, dsthigh, tmp | |||
| %if cpuflag(sse4) | |||
| movhlps %2, %1 | |||
| pmovsxwd %1, %1 | |||
| pmovsxwd %2, %2 | |||
| %else ; sse2 | |||
| pxor %3, %3 | |||
| pcmpgtw %3, %1 | |||
| mova %2, %1 | |||
| punpcklwd %1, %3 | |||
| punpckhwd %2, %3 | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse2 | |||
| idct_put_fn 16 | |||
| INIT_XMM sse4 | |||
| idct_put_fn 16 | |||
| %if HAVE_AVX | |||
| INIT_AVX | |||
| idct_put_fn avx, 16 | |||
| INIT_XMM avx | |||
| idct_put_fn 16 | |||
| %endif | |||
| %endif | |||
| @@ -578,11 +578,44 @@ static void swap_samplerates(AVFilterGraph *graph) | |||
| swap_samplerates_on_filter(graph->filters[i]); | |||
| } | |||
| #define CH_CENTER_PAIR (AV_CH_FRONT_LEFT_OF_CENTER | AV_CH_FRONT_RIGHT_OF_CENTER) | |||
| #define CH_FRONT_PAIR (AV_CH_FRONT_LEFT | AV_CH_FRONT_RIGHT) | |||
| #define CH_STEREO_PAIR (AV_CH_STEREO_LEFT | AV_CH_STEREO_RIGHT) | |||
| #define CH_WIDE_PAIR (AV_CH_WIDE_LEFT | AV_CH_WIDE_RIGHT) | |||
| #define CH_SIDE_PAIR (AV_CH_SIDE_LEFT | AV_CH_SIDE_RIGHT) | |||
| #define CH_DIRECT_PAIR (AV_CH_SURROUND_DIRECT_LEFT | AV_CH_SURROUND_DIRECT_RIGHT) | |||
| #define CH_BACK_PAIR (AV_CH_BACK_LEFT | AV_CH_BACK_RIGHT) | |||
| /* allowable substitutions for channel pairs when comparing layouts, | |||
| * ordered by priority for both values */ | |||
| static const uint64_t ch_subst[][2] = { | |||
| { CH_FRONT_PAIR, CH_CENTER_PAIR }, | |||
| { CH_FRONT_PAIR, CH_WIDE_PAIR }, | |||
| { CH_FRONT_PAIR, AV_CH_FRONT_CENTER }, | |||
| { CH_CENTER_PAIR, CH_FRONT_PAIR }, | |||
| { CH_CENTER_PAIR, CH_WIDE_PAIR }, | |||
| { CH_CENTER_PAIR, AV_CH_FRONT_CENTER }, | |||
| { CH_WIDE_PAIR, CH_FRONT_PAIR }, | |||
| { CH_WIDE_PAIR, CH_CENTER_PAIR }, | |||
| { CH_WIDE_PAIR, AV_CH_FRONT_CENTER }, | |||
| { AV_CH_FRONT_CENTER, CH_FRONT_PAIR }, | |||
| { AV_CH_FRONT_CENTER, CH_CENTER_PAIR }, | |||
| { AV_CH_FRONT_CENTER, CH_WIDE_PAIR }, | |||
| { CH_SIDE_PAIR, CH_DIRECT_PAIR }, | |||
| { CH_SIDE_PAIR, CH_BACK_PAIR }, | |||
| { CH_SIDE_PAIR, AV_CH_BACK_CENTER }, | |||
| { CH_BACK_PAIR, CH_DIRECT_PAIR }, | |||
| { CH_BACK_PAIR, CH_SIDE_PAIR }, | |||
| { CH_BACK_PAIR, AV_CH_BACK_CENTER }, | |||
| { AV_CH_BACK_CENTER, CH_BACK_PAIR }, | |||
| { AV_CH_BACK_CENTER, CH_DIRECT_PAIR }, | |||
| { AV_CH_BACK_CENTER, CH_SIDE_PAIR }, | |||
| }; | |||
| static void swap_channel_layouts_on_filter(AVFilterContext *filter) | |||
| { | |||
| AVFilterLink *link = NULL; | |||
| uint64_t chlayout; | |||
| int i, j; | |||
| int i, j, k; | |||
| for (i = 0; i < filter->nb_inputs; i++) { | |||
| link = filter->inputs[i]; | |||
| @@ -594,27 +627,55 @@ static void swap_channel_layouts_on_filter(AVFilterContext *filter) | |||
| if (i == filter->nb_inputs) | |||
| return; | |||
| chlayout = link->out_channel_layouts->channel_layouts[0]; | |||
| for (i = 0; i < filter->nb_outputs; i++) { | |||
| AVFilterLink *outlink = filter->outputs[i]; | |||
| int best_idx, best_score = INT_MIN; | |||
| int best_idx, best_score = INT_MIN, best_count_diff = INT_MAX; | |||
| if (outlink->type != AVMEDIA_TYPE_AUDIO || | |||
| outlink->in_channel_layouts->nb_channel_layouts < 2) | |||
| continue; | |||
| for (j = 0; j < outlink->in_channel_layouts->nb_channel_layouts; j++) { | |||
| uint64_t in_chlayout = link->out_channel_layouts->channel_layouts[0]; | |||
| uint64_t out_chlayout = outlink->in_channel_layouts->channel_layouts[j]; | |||
| int matched_channels = av_get_channel_layout_nb_channels(chlayout & | |||
| out_chlayout); | |||
| int extra_channels = av_get_channel_layout_nb_channels(out_chlayout & | |||
| (~chlayout)); | |||
| int score = matched_channels - extra_channels; | |||
| int in_channels = av_get_channel_layout_nb_channels(in_chlayout); | |||
| int out_channels = av_get_channel_layout_nb_channels(out_chlayout); | |||
| int count_diff = out_channels - in_channels; | |||
| int matched_channels, extra_channels; | |||
| int score = 0; | |||
| /* channel substitution */ | |||
| for (k = 0; k < FF_ARRAY_ELEMS(ch_subst); k++) { | |||
| uint64_t cmp0 = ch_subst[k][0]; | |||
| uint64_t cmp1 = ch_subst[k][1]; | |||
| if (( in_chlayout & cmp0) && (!(out_chlayout & cmp0)) && | |||
| (out_chlayout & cmp1) && (!( in_chlayout & cmp1))) { | |||
| in_chlayout &= ~cmp0; | |||
| out_chlayout &= ~cmp1; | |||
| /* add score for channel match, minus a deduction for | |||
| having to do the substitution */ | |||
| score += 10 * av_get_channel_layout_nb_channels(cmp1) - 2; | |||
| } | |||
| } | |||
| if (score > best_score) { | |||
| /* no penalty for LFE channel mismatch */ | |||
| if ( (in_chlayout & AV_CH_LOW_FREQUENCY) && | |||
| (out_chlayout & AV_CH_LOW_FREQUENCY)) | |||
| score += 10; | |||
| in_chlayout &= ~AV_CH_LOW_FREQUENCY; | |||
| out_chlayout &= ~AV_CH_LOW_FREQUENCY; | |||
| matched_channels = av_get_channel_layout_nb_channels(in_chlayout & | |||
| out_chlayout); | |||
| extra_channels = av_get_channel_layout_nb_channels(out_chlayout & | |||
| (~in_chlayout)); | |||
| score += 10 * matched_channels - 5 * extra_channels; | |||
| if (score > best_score || | |||
| (count_diff < best_count_diff && score == best_score)) { | |||
| best_score = score; | |||
| best_idx = j; | |||
| best_count_diff = count_diff; | |||
| } | |||
| } | |||
| FFSWAP(uint64_t, outlink->in_channel_layouts->channel_layouts[0], | |||
| @@ -515,6 +515,12 @@ static int gen_pong(URLContext *s, RTMPContext *rt, RTMPPacket *ppkt) | |||
| uint8_t *p; | |||
| int ret; | |||
| if (ppkt->data_size < 6) { | |||
| av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n", | |||
| ppkt->data_size); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| if ((ret = ff_rtmp_packet_create(&pkt, RTMP_NETWORK_CHANNEL, RTMP_PT_PING, | |||
| ppkt->timestamp + 1, 6)) < 0) | |||
| return ret; | |||
| @@ -885,9 +891,9 @@ static int handle_chunk_size(URLContext *s, RTMPPacket *pkt) | |||
| RTMPContext *rt = s->priv_data; | |||
| int ret; | |||
| if (pkt->data_size != 4) { | |||
| if (pkt->data_size < 4) { | |||
| av_log(s, AV_LOG_ERROR, | |||
| "Chunk size change packet is not 4 bytes long (%d)\n", | |||
| "Too short chunk size change packet (%d)\n", | |||
| pkt->data_size); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| @@ -913,6 +919,12 @@ static int handle_ping(URLContext *s, RTMPPacket *pkt) | |||
| RTMPContext *rt = s->priv_data; | |||
| int t, ret; | |||
| if (pkt->data_size < 2) { | |||
| av_log(s, AV_LOG_ERROR, "Too short ping packet (%d)\n", | |||
| pkt->data_size); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| t = AV_RB16(pkt->data); | |||
| if (t == 6) { | |||
| if ((ret = gen_pong(s, rt, pkt)) < 0) | |||
| @@ -950,6 +962,13 @@ static int handle_server_bw(URLContext *s, RTMPPacket *pkt) | |||
| { | |||
| RTMPContext *rt = s->priv_data; | |||
| if (pkt->data_size < 4) { | |||
| av_log(s, AV_LOG_ERROR, | |||
| "Too short server bandwidth report packet (%d)\n", | |||
| pkt->data_size); | |||
| return AVERROR_INVALIDDATA; | |||
| } | |||
| rt->server_bw = AV_RB32(pkt->data); | |||
| if (rt->server_bw <= 0) { | |||
| av_log(s, AV_LOG_ERROR, "Incorrect server bandwidth %d\n", | |||
| @@ -246,9 +246,10 @@ static int handle_buffered_output(AVAudioResampleContext *avr, | |||
| return 0; | |||
| } | |||
| int avresample_convert(AVAudioResampleContext *avr, void **output, | |||
| int out_plane_size, int out_samples, void **input, | |||
| int in_plane_size, int in_samples) | |||
| int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, | |||
| void **output, int out_plane_size, | |||
| int out_samples, void **input, | |||
| int in_plane_size, int in_samples) | |||
| { | |||
| AudioData input_buffer; | |||
| AudioData output_buffer; | |||
| @@ -145,12 +145,7 @@ cglobal conv_s32_to_flt, 3,3,3, dst, src, len | |||
| mova [dstq+lenq+mmsize], m2 | |||
| add lenq, mmsize*2 | |||
| jl .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse2 | |||
| @@ -218,12 +213,7 @@ cglobal conv_flt_to_s32, 3,3,5, dst, src, len | |||
| mova [dstq+lenq+3*mmsize], m3 | |||
| add lenq, mmsize*4 | |||
| jl .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse2 | |||
| @@ -51,12 +51,7 @@ cglobal mix_2_to_1_fltp_flt, 3,4,6, src, matrix, len, src1 | |||
| add srcq, mmsize*2 | |||
| sub lend, mmsize*2/4 | |||
| jg .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -175,12 +170,7 @@ cglobal mix_1_to_2_fltp_flt, 3,5,4, src0, matrix0, len, src1, matrix1 | |||
| add src0q, mmsize | |||
| sub lend, mmsize/4 | |||
| jg .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -236,3 +226,296 @@ MIX_1_TO_2_S16P_FLT | |||
| INIT_XMM avx | |||
| MIX_1_TO_2_S16P_FLT | |||
| %endif | |||
| ;----------------------------------------------------------------------------- | |||
| ; void ff_mix_3_8_to_1_2_fltp/s16p_flt(float/int16_t **src, float **matrix, | |||
| ; int len, int out_ch, int in_ch); | |||
| ;----------------------------------------------------------------------------- | |||
| %macro MIX_3_8_TO_1_2_FLT 3 ; %1 = in channels, %2 = out channels, %3 = s16p or fltp | |||
| ; define some names to make the code clearer | |||
| %assign in_channels %1 | |||
| %assign out_channels %2 | |||
| %assign stereo out_channels - 1 | |||
| %ifidn %3, s16p | |||
| %assign is_s16 1 | |||
| %else | |||
| %assign is_s16 0 | |||
| %endif | |||
| ; determine how many matrix elements must go on the stack vs. mmregs | |||
| %assign matrix_elements in_channels * out_channels | |||
| %if is_s16 | |||
| %if stereo | |||
| %assign needed_mmregs 7 | |||
| %else | |||
| %assign needed_mmregs 5 | |||
| %endif | |||
| %else | |||
| %if stereo | |||
| %assign needed_mmregs 4 | |||
| %else | |||
| %assign needed_mmregs 3 | |||
| %endif | |||
| %endif | |||
| %assign matrix_elements_mm num_mmregs - needed_mmregs | |||
| %if matrix_elements < matrix_elements_mm | |||
| %assign matrix_elements_mm matrix_elements | |||
| %endif | |||
| %if matrix_elements_mm < matrix_elements | |||
| %assign matrix_elements_stack matrix_elements - matrix_elements_mm | |||
| %else | |||
| %assign matrix_elements_stack 0 | |||
| %endif | |||
| cglobal mix_%1_to_%2_%3_flt, 3,in_channels+2,needed_mmregs+matrix_elements_mm, src0, src1, len, src2, src3, src4, src5, src6, src7 | |||
| ; get aligned stack space if needed | |||
| %if matrix_elements_stack > 0 | |||
| %if mmsize == 32 | |||
| %assign bkpreg %1 + 1 | |||
| %define bkpq r %+ bkpreg %+ q | |||
| mov bkpq, rsp | |||
| and rsp, ~(mmsize-1) | |||
| sub rsp, matrix_elements_stack * mmsize | |||
| %else | |||
| %assign pad matrix_elements_stack * mmsize + (mmsize - gprsize) - (stack_offset & (mmsize - gprsize)) | |||
| SUB rsp, pad | |||
| %endif | |||
| %endif | |||
| ; load matrix pointers | |||
| %define matrix0q r1q | |||
| %define matrix1q r3q | |||
| %if stereo | |||
| mov matrix1q, [matrix0q+gprsize] | |||
| %endif | |||
| mov matrix0q, [matrix0q] | |||
| ; define matrix coeff names | |||
| %assign %%i 0 | |||
| %assign %%j needed_mmregs | |||
| %rep in_channels | |||
| %if %%i >= matrix_elements_mm | |||
| CAT_XDEFINE mx_stack_0_, %%i, 1 | |||
| CAT_XDEFINE mx_0_, %%i, [rsp+(%%i-matrix_elements_mm)*mmsize] | |||
| %else | |||
| CAT_XDEFINE mx_stack_0_, %%i, 0 | |||
| CAT_XDEFINE mx_0_, %%i, m %+ %%j | |||
| %assign %%j %%j+1 | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| %if stereo | |||
| %assign %%i 0 | |||
| %rep in_channels | |||
| %if in_channels + %%i >= matrix_elements_mm | |||
| CAT_XDEFINE mx_stack_1_, %%i, 1 | |||
| CAT_XDEFINE mx_1_, %%i, [rsp+(in_channels+%%i-matrix_elements_mm)*mmsize] | |||
| %else | |||
| CAT_XDEFINE mx_stack_1_, %%i, 0 | |||
| CAT_XDEFINE mx_1_, %%i, m %+ %%j | |||
| %assign %%j %%j+1 | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| %endif | |||
| ; load/splat matrix coeffs | |||
| %assign %%i 0 | |||
| %rep in_channels | |||
| %if mx_stack_0_ %+ %%i | |||
| VBROADCASTSS m0, [matrix0q+4*%%i] | |||
| mova mx_0_ %+ %%i, m0 | |||
| %else | |||
| VBROADCASTSS mx_0_ %+ %%i, [matrix0q+4*%%i] | |||
| %endif | |||
| %if stereo | |||
| %if mx_stack_1_ %+ %%i | |||
| VBROADCASTSS m0, [matrix1q+4*%%i] | |||
| mova mx_1_ %+ %%i, m0 | |||
| %else | |||
| VBROADCASTSS mx_1_ %+ %%i, [matrix1q+4*%%i] | |||
| %endif | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| ; load channel pointers to registers as offsets from the first channel pointer | |||
| %if ARCH_X86_64 | |||
| movsxd lenq, r2d | |||
| %endif | |||
| shl lenq, 2-is_s16 | |||
| %assign %%i 1 | |||
| %rep (in_channels - 1) | |||
| %if ARCH_X86_32 && in_channels >= 7 && %%i >= 5 | |||
| mov src5q, [src0q+%%i*gprsize] | |||
| add src5q, lenq | |||
| mov src %+ %%i %+ m, src5q | |||
| %else | |||
| mov src %+ %%i %+ q, [src0q+%%i*gprsize] | |||
| add src %+ %%i %+ q, lenq | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| mov src0q, [src0q] | |||
| add src0q, lenq | |||
| neg lenq | |||
| .loop | |||
| ; for x86-32 with 7-8 channels we do not have enough gp registers for all src | |||
| ; pointers, so we have to load some of them from the stack each time | |||
| %define copy_src_from_stack ARCH_X86_32 && in_channels >= 7 && %%i >= 5 | |||
| %if is_s16 | |||
| ; mix with s16p input | |||
| mova m0, [src0q+lenq] | |||
| S16_TO_S32_SX 0, 1 | |||
| cvtdq2ps m0, m0 | |||
| cvtdq2ps m1, m1 | |||
| %if stereo | |||
| mulps m2, m0, mx_1_0 | |||
| mulps m3, m1, mx_1_0 | |||
| %endif | |||
| mulps m0, m0, mx_0_0 | |||
| mulps m1, m1, mx_0_0 | |||
| %assign %%i 1 | |||
| %rep (in_channels - 1) | |||
| %if copy_src_from_stack | |||
| %define src_ptr src5q | |||
| %else | |||
| %define src_ptr src %+ %%i %+ q | |||
| %endif | |||
| %if stereo | |||
| %if copy_src_from_stack | |||
| mov src_ptr, src %+ %%i %+ m | |||
| %endif | |||
| mova m4, [src_ptr+lenq] | |||
| S16_TO_S32_SX 4, 5 | |||
| cvtdq2ps m4, m4 | |||
| cvtdq2ps m5, m5 | |||
| fmaddps m2, m4, mx_1_ %+ %%i, m2, m6 | |||
| fmaddps m3, m5, mx_1_ %+ %%i, m3, m6 | |||
| fmaddps m0, m4, mx_0_ %+ %%i, m0, m4 | |||
| fmaddps m1, m5, mx_0_ %+ %%i, m1, m5 | |||
| %else | |||
| %if copy_src_from_stack | |||
| mov src_ptr, src %+ %%i %+ m | |||
| %endif | |||
| mova m2, [src_ptr+lenq] | |||
| S16_TO_S32_SX 2, 3 | |||
| cvtdq2ps m2, m2 | |||
| cvtdq2ps m3, m3 | |||
| fmaddps m0, m2, mx_0_ %+ %%i, m0, m4 | |||
| fmaddps m1, m3, mx_0_ %+ %%i, m1, m4 | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| %if stereo | |||
| cvtps2dq m2, m2 | |||
| cvtps2dq m3, m3 | |||
| packssdw m2, m3 | |||
| mova [src1q+lenq], m2 | |||
| %endif | |||
| cvtps2dq m0, m0 | |||
| cvtps2dq m1, m1 | |||
| packssdw m0, m1 | |||
| mova [src0q+lenq], m0 | |||
| %else | |||
| ; mix with fltp input | |||
| %if stereo || mx_stack_0_0 | |||
| mova m0, [src0q+lenq] | |||
| %endif | |||
| %if stereo | |||
| mulps m1, m0, mx_1_0 | |||
| %endif | |||
| %if stereo || mx_stack_0_0 | |||
| mulps m0, m0, mx_0_0 | |||
| %else | |||
| mulps m0, [src0q+lenq], mx_0_0 | |||
| %endif | |||
| %assign %%i 1 | |||
| %rep (in_channels - 1) | |||
| %if copy_src_from_stack | |||
| %define src_ptr src5q | |||
| mov src_ptr, src %+ %%i %+ m | |||
| %else | |||
| %define src_ptr src %+ %%i %+ q | |||
| %endif | |||
| ; avoid extra load for mono if matrix is in a mm register | |||
| %if stereo || mx_stack_0_ %+ %%i | |||
| mova m2, [src_ptr+lenq] | |||
| %endif | |||
| %if stereo | |||
| fmaddps m1, m2, mx_1_ %+ %%i, m1, m3 | |||
| %endif | |||
| %if stereo || mx_stack_0_ %+ %%i | |||
| fmaddps m0, m2, mx_0_ %+ %%i, m0, m2 | |||
| %else | |||
| fmaddps m0, mx_0_ %+ %%i, [src_ptr+lenq], m0, m1 | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| mova [src0q+lenq], m0 | |||
| %if stereo | |||
| mova [src1q+lenq], m1 | |||
| %endif | |||
| %endif | |||
| add lenq, mmsize | |||
| jl .loop | |||
| ; restore stack pointer | |||
| %if matrix_elements_stack > 0 | |||
| %if mmsize == 32 | |||
| mov rsp, bkpq | |||
| %else | |||
| ADD rsp, pad | |||
| %endif | |||
| %endif | |||
| ; zero ymm high halves | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| RET | |||
| %endmacro | |||
| %macro MIX_3_8_TO_1_2_FLT_FUNCS 0 | |||
| %assign %%i 3 | |||
| %rep 6 | |||
| INIT_XMM sse | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, fltp | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, fltp | |||
| INIT_XMM sse2 | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| INIT_XMM sse4 | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| ; do not use ymm AVX or FMA4 in x86-32 for 6 or more channels due to stack alignment issues | |||
| %if HAVE_AVX | |||
| %if ARCH_X86_64 || %%i < 6 | |||
| INIT_YMM avx | |||
| %else | |||
| INIT_XMM avx | |||
| %endif | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, fltp | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, fltp | |||
| INIT_XMM avx | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| %endif | |||
| %if HAVE_FMA4 | |||
| %if ARCH_X86_64 || %%i < 6 | |||
| INIT_YMM fma4 | |||
| %else | |||
| INIT_XMM fma4 | |||
| %endif | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, fltp | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, fltp | |||
| INIT_XMM fma4 | |||
| MIX_3_8_TO_1_2_FLT %%i, 1, s16p | |||
| MIX_3_8_TO_1_2_FLT %%i, 2, s16p | |||
| %endif | |||
| %assign %%i %%i+1 | |||
| %endrep | |||
| %endmacro | |||
| MIX_3_8_TO_1_2_FLT_FUNCS | |||
| @@ -47,6 +47,129 @@ extern void ff_mix_1_to_2_s16p_flt_sse4(int16_t **src, float **matrix, int len, | |||
| extern void ff_mix_1_to_2_s16p_flt_avx (int16_t **src, float **matrix, int len, | |||
| int out_ch, int in_ch); | |||
| #define DEFINE_MIX_3_8_TO_1_2(chan) \ | |||
| extern void ff_mix_ ## chan ## _to_1_fltp_flt_sse(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_fltp_flt_sse(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse2(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse2(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_s16p_flt_sse4(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_s16p_flt_sse4(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_fltp_flt_avx(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_fltp_flt_avx(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_s16p_flt_avx(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_s16p_flt_avx(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_fltp_flt_fma4(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_fltp_flt_fma4(float **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| \ | |||
| extern void ff_mix_ ## chan ## _to_1_s16p_flt_fma4(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); \ | |||
| extern void ff_mix_ ## chan ## _to_2_s16p_flt_fma4(int16_t **src, \ | |||
| float **matrix, int len, \ | |||
| int out_ch, int in_ch); | |||
| DEFINE_MIX_3_8_TO_1_2(3) | |||
| DEFINE_MIX_3_8_TO_1_2(4) | |||
| DEFINE_MIX_3_8_TO_1_2(5) | |||
| DEFINE_MIX_3_8_TO_1_2(6) | |||
| DEFINE_MIX_3_8_TO_1_2(7) | |||
| DEFINE_MIX_3_8_TO_1_2(8) | |||
| #define SET_MIX_3_8_TO_1_2(chan) \ | |||
| if (mm_flags & AV_CPU_FLAG_SSE && HAVE_SSE) { \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, 16, 4, "SSE", \ | |||
| ff_mix_ ## chan ## _to_1_fltp_flt_sse); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, 16, 4, "SSE", \ | |||
| ff_mix_## chan ##_to_2_fltp_flt_sse); \ | |||
| } \ | |||
| if (mm_flags & AV_CPU_FLAG_SSE2 && HAVE_SSE) { \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, 16, 8, "SSE2", \ | |||
| ff_mix_ ## chan ## _to_1_s16p_flt_sse2); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, 16, 8, "SSE2", \ | |||
| ff_mix_ ## chan ## _to_2_s16p_flt_sse2); \ | |||
| } \ | |||
| if (mm_flags & AV_CPU_FLAG_SSE4 && HAVE_SSE) { \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, 16, 8, "SSE4", \ | |||
| ff_mix_ ## chan ## _to_1_s16p_flt_sse4); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, 16, 8, "SSE4", \ | |||
| ff_mix_ ## chan ## _to_2_s16p_flt_sse4); \ | |||
| } \ | |||
| if (mm_flags & AV_CPU_FLAG_AVX && HAVE_AVX) { \ | |||
| int ptr_align = 32; \ | |||
| int smp_align = 8; \ | |||
| if (ARCH_X86_32 || chan >= 6) { \ | |||
| ptr_align = 16; \ | |||
| smp_align = 4; \ | |||
| } \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, ptr_align, smp_align, "AVX", \ | |||
| ff_mix_ ## chan ## _to_1_fltp_flt_avx); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, ptr_align, smp_align, "AVX", \ | |||
| ff_mix_ ## chan ## _to_2_fltp_flt_avx); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, 16, 8, "AVX", \ | |||
| ff_mix_ ## chan ## _to_1_s16p_flt_avx); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, 16, 8, "AVX", \ | |||
| ff_mix_ ## chan ## _to_2_s16p_flt_avx); \ | |||
| } \ | |||
| if (mm_flags & AV_CPU_FLAG_FMA4 && HAVE_FMA4) { \ | |||
| int ptr_align = 32; \ | |||
| int smp_align = 8; \ | |||
| if (ARCH_X86_32 || chan >= 6) { \ | |||
| ptr_align = 16; \ | |||
| smp_align = 4; \ | |||
| } \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, ptr_align, smp_align, "FMA4", \ | |||
| ff_mix_ ## chan ## _to_1_fltp_flt_fma4); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_FLTP, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, ptr_align, smp_align, "FMA4", \ | |||
| ff_mix_ ## chan ## _to_2_fltp_flt_fma4); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 1, 16, 8, "FMA4", \ | |||
| ff_mix_ ## chan ## _to_1_s16p_flt_fma4); \ | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT,\ | |||
| chan, 2, 16, 8, "FMA4", \ | |||
| ff_mix_ ## chan ## _to_2_s16p_flt_fma4); \ | |||
| } | |||
| av_cold void ff_audio_mix_init_x86(AudioMix *am) | |||
| { | |||
| #if HAVE_YASM | |||
| @@ -80,5 +203,12 @@ av_cold void ff_audio_mix_init_x86(AudioMix *am) | |||
| ff_audio_mix_set_func(am, AV_SAMPLE_FMT_S16P, AV_MIX_COEFF_TYPE_FLT, | |||
| 1, 2, 16, 8, "AVX", ff_mix_1_to_2_s16p_flt_avx); | |||
| } | |||
| SET_MIX_3_8_TO_1_2(3) | |||
| SET_MIX_3_8_TO_1_2(4) | |||
| SET_MIX_3_8_TO_1_2(5) | |||
| SET_MIX_3_8_TO_1_2(6) | |||
| SET_MIX_3_8_TO_1_2(7) | |||
| SET_MIX_3_8_TO_1_2(8) | |||
| #endif | |||
| } | |||
| @@ -26,7 +26,8 @@ | |||
| pmovsxwd m%1, m%1 | |||
| SWAP %1, %2 | |||
| %else | |||
| punpckhwd m%2, m%1 | |||
| mova m%2, m%1 | |||
| punpckhwd m%2, m%2 | |||
| punpcklwd m%1, m%1 | |||
| psrad m%2, 16 | |||
| psrad m%1, 16 | |||
| @@ -797,11 +797,10 @@ int main(int argc, char **argv) | |||
| av_expr_parse_and_eval(&d, *expr, | |||
| const_names, const_values, | |||
| NULL, NULL, NULL, NULL, NULL, 0, NULL); | |||
| if(isnan(d)){ | |||
| if (isnan(d)) | |||
| printf("'%s' -> nan\n\n", *expr); | |||
| }else{ | |||
| else | |||
| printf("'%s' -> %f\n\n", *expr, d); | |||
| } | |||
| } | |||
| av_expr_parse_and_eval(&d, "1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", | |||
| @@ -42,12 +42,7 @@ ALIGN 16 | |||
| sub lenq, 2*mmsize | |||
| jge .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -88,12 +83,7 @@ cglobal vector_fmac_scalar, 4,4,3, dst, src, mul, len | |||
| mova [dstq+lenq+mmsize], m2 | |||
| sub lenq, 2*mmsize | |||
| jge .loop | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| RET | |||
| %else | |||
| REP_RET | |||
| %endif | |||
| %endmacro | |||
| INIT_XMM sse | |||
| @@ -392,11 +392,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 120 | |||
| %macro RET 0 | |||
| WIN64_RESTORE_XMM_INTERNAL rsp | |||
| POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| ret | |||
| %endmacro | |||
| %macro REP_RET 0 | |||
| %if regs_used > 7 || xmm_regs_used > 6 | |||
| %if regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 | |||
| RET | |||
| %else | |||
| rep ret | |||
| @@ -433,11 +436,14 @@ DECLARE_REG 14, R15, R15D, R15W, R15B, 72 | |||
| %macro RET 0 | |||
| POP_IF_USED 14, 13, 12, 11, 10, 9 | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| ret | |||
| %endmacro | |||
| %macro REP_RET 0 | |||
| %if regs_used > 9 | |||
| %if regs_used > 9 || mmsize == 32 | |||
| RET | |||
| %else | |||
| rep ret | |||
| @@ -479,11 +485,14 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 | |||
| %macro RET 0 | |||
| POP_IF_USED 6, 5, 4, 3 | |||
| %if mmsize == 32 | |||
| vzeroupper | |||
| %endif | |||
| ret | |||
| %endmacro | |||
| %macro REP_RET 0 | |||
| %if regs_used > 3 | |||
| %if regs_used > 3 || mmsize == 32 | |||
| RET | |||
| %else | |||
| rep ret | |||
| @@ -1126,16 +1135,22 @@ AVX_INSTR pfmul, 1, 0, 1 | |||
| %undef j | |||
| %macro FMA_INSTR 3 | |||
| %macro %1 4-7 %1, %2, %3 | |||
| %if cpuflag(xop) | |||
| v%5 %1, %2, %3, %4 | |||
| %macro %1 5-8 %1, %2, %3 | |||
| %if cpuflag(xop) || cpuflag(fma4) | |||
| v%6 %1, %2, %3, %4 | |||
| %else | |||
| %6 %1, %2, %3 | |||
| %7 %1, %4 | |||
| %ifidn %1, %4 | |||
| %7 %5, %2, %3 | |||
| %8 %1, %4, %5 | |||
| %else | |||
| %7 %1, %2, %3 | |||
| %8 %1, %4 | |||
| %endif | |||
| %endif | |||
| %endmacro | |||
| %endmacro | |||
| FMA_INSTR fmaddps, mulps, addps | |||
| FMA_INSTR pmacsdd, pmulld, paddd | |||
| FMA_INSTR pmacsww, pmullw, paddw | |||
| FMA_INSTR pmadcswd, pmaddwd, paddd | |||
| @@ -15,9 +15,6 @@ ffservertest: ffserver$(EXESUF) tests/vsynth1/00.pgm tests/data/asynth1.sw | |||
| OBJDIRS += tests/data tests/vsynth1 | |||
| # Required due to missing automatic dependency tracking for HOSTOBJS. | |||
| tests/rotozoom.o tests/videogen.o: tests/utils.c | |||
| tests/vsynth1/00.pgm: tests/videogen$(HOSTEXESUF) | tests/vsynth1 | |||
| $(M)./$< 'tests/vsynth1/' | |||