Conveniently (together with Justin's earlier patches), this makes our vorbis decoder entirely independent of dsputil.tags/n1.2
@@ -388,7 +388,7 @@ OBJS-$(CONFIG_VCR1_DECODER) += vcr1.o | |||||
OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o | OBJS-$(CONFIG_VMDAUDIO_DECODER) += vmdav.o | ||||
OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o | OBJS-$(CONFIG_VMDVIDEO_DECODER) += vmdav.o | ||||
OBJS-$(CONFIG_VMNC_DECODER) += vmnc.o | OBJS-$(CONFIG_VMNC_DECODER) += vmnc.o | ||||
OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbis.o \ | |||||
OBJS-$(CONFIG_VORBIS_DECODER) += vorbisdec.o vorbisdsp.o vorbis.o \ | |||||
vorbis_data.o xiph.o | vorbis_data.o xiph.o | ||||
OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ | OBJS-$(CONFIG_VORBIS_ENCODER) += vorbisenc.o vorbis.o \ | ||||
vorbis_data.o | vorbis_data.o | ||||
@@ -15,6 +15,7 @@ OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_init_arm.o | |||||
ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ARMV6-OBJS-$(CONFIG_MPEGAUDIODSP) += arm/mpegaudiodsp_fixed_armv6.o | ||||
OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | OBJS-$(CONFIG_MPEGVIDEO) += arm/mpegvideo_arm.o | ||||
OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_init_arm.o | |||||
OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_init_arm.o | ||||
OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o | OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_init_arm.o | ||||
OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o | OBJS-$(CONFIG_VP6_DECODER) += arm/vp56dsp_init_arm.o | ||||
@@ -86,6 +87,8 @@ NEON-OBJS-$(CONFIG_RV40_DECODER) += arm/rv34dsp_neon.o \ | |||||
arm/rv40dsp_neon.o \ | arm/rv40dsp_neon.o \ | ||||
arm/h264cmc_neon.o \ | arm/h264cmc_neon.o \ | ||||
NEON-OBJS-$(CONFIG_VORBIS_DECODER) += arm/vorbisdsp_neon.o | |||||
NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o | NEON-OBJS-$(CONFIG_VP3DSP) += arm/vp3dsp_neon.o | ||||
NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ | NEON-OBJS-$(CONFIG_VP5_DECODER) += arm/vp56dsp_neon.o \ | ||||
@@ -154,8 +154,6 @@ void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | |||||
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, | ||||
int32_t max, unsigned int len); | int32_t max, unsigned int len); | ||||
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); | |||||
int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); | int32_t ff_scalarproduct_int16_neon(const int16_t *v1, const int16_t *v2, int len); | ||||
int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, | int32_t ff_scalarproduct_and_madd_int16_neon(int16_t *v1, const int16_t *v2, | ||||
const int16_t *v3, int len, int mul); | const int16_t *v3, int len, int mul); | ||||
@@ -307,9 +305,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||||
c->vector_clipf = ff_vector_clipf_neon; | c->vector_clipf = ff_vector_clipf_neon; | ||||
c->vector_clip_int32 = ff_vector_clip_int32_neon; | c->vector_clip_int32 = ff_vector_clip_int32_neon; | ||||
if (CONFIG_VORBIS_DECODER) | |||||
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; | |||||
c->scalarproduct_int16 = ff_scalarproduct_int16_neon; | c->scalarproduct_int16 = ff_scalarproduct_int16_neon; | ||||
c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; | c->scalarproduct_and_madd_int16 = ff_scalarproduct_and_madd_int16_neon; | ||||
@@ -19,7 +19,6 @@ | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
*/ | */ | ||||
#include "config.h" | |||||
#include "libavutil/arm/asm.S" | #include "libavutil/arm/asm.S" | ||||
function ff_clear_block_neon, export=1 | function ff_clear_block_neon, export=1 | ||||
@@ -532,69 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 | |||||
bx lr | bx lr | ||||
endfunc | endfunc | ||||
#if CONFIG_VORBIS_DECODER | |||||
function ff_vorbis_inverse_coupling_neon, export=1 | |||||
vmov.i32 q10, #1<<31 | |||||
subs r2, r2, #4 | |||||
mov r3, r0 | |||||
mov r12, r1 | |||||
beq 3f | |||||
vld1.32 {d24-d25},[r1,:128]! | |||||
vld1.32 {d22-d23},[r0,:128]! | |||||
vcle.s32 q8, q12, #0 | |||||
vand q9, q11, q10 | |||||
veor q12, q12, q9 | |||||
vand q2, q12, q8 | |||||
vbic q3, q12, q8 | |||||
vadd.f32 q12, q11, q2 | |||||
vsub.f32 q11, q11, q3 | |||||
1: vld1.32 {d2-d3}, [r1,:128]! | |||||
vld1.32 {d0-d1}, [r0,:128]! | |||||
vcle.s32 q8, q1, #0 | |||||
vand q9, q0, q10 | |||||
veor q1, q1, q9 | |||||
vst1.32 {d24-d25},[r3, :128]! | |||||
vst1.32 {d22-d23},[r12,:128]! | |||||
vand q2, q1, q8 | |||||
vbic q3, q1, q8 | |||||
vadd.f32 q1, q0, q2 | |||||
vsub.f32 q0, q0, q3 | |||||
subs r2, r2, #8 | |||||
ble 2f | |||||
vld1.32 {d24-d25},[r1,:128]! | |||||
vld1.32 {d22-d23},[r0,:128]! | |||||
vcle.s32 q8, q12, #0 | |||||
vand q9, q11, q10 | |||||
veor q12, q12, q9 | |||||
vst1.32 {d2-d3}, [r3, :128]! | |||||
vst1.32 {d0-d1}, [r12,:128]! | |||||
vand q2, q12, q8 | |||||
vbic q3, q12, q8 | |||||
vadd.f32 q12, q11, q2 | |||||
vsub.f32 q11, q11, q3 | |||||
b 1b | |||||
2: vst1.32 {d2-d3}, [r3, :128]! | |||||
vst1.32 {d0-d1}, [r12,:128]! | |||||
it lt | |||||
bxlt lr | |||||
3: vld1.32 {d2-d3}, [r1,:128] | |||||
vld1.32 {d0-d1}, [r0,:128] | |||||
vcle.s32 q8, q1, #0 | |||||
vand q9, q0, q10 | |||||
veor q1, q1, q9 | |||||
vand q2, q1, q8 | |||||
vbic q3, q1, q8 | |||||
vadd.f32 q1, q0, q2 | |||||
vsub.f32 q0, q0, q3 | |||||
vst1.32 {d2-d3}, [r0,:128]! | |||||
vst1.32 {d0-d1}, [r1,:128]! | |||||
bx lr | |||||
endfunc | |||||
#endif | |||||
function ff_butterflies_float_neon, export=1 | function ff_butterflies_float_neon, export=1 | ||||
1: vld1.32 {q0},[r0,:128] | 1: vld1.32 {q0},[r0,:128] | ||||
vld1.32 {q1},[r1,:128] | vld1.32 {q1},[r1,:128] | ||||
@@ -0,0 +1,36 @@ | |||||
/* | |||||
* ARM NEON optimised DSP functions | |||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/attributes.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/arm/cpu.h" | |||||
#include "libavcodec/vorbisdsp.h" | |||||
void ff_vorbis_inverse_coupling_neon(float *mag, float *ang, int blocksize); | |||||
void ff_vorbisdsp_init_arm(VorbisDSPContext *c) | |||||
{ | |||||
int cpu_flags = av_get_cpu_flags(); | |||||
if (have_neon(cpu_flags)) { | |||||
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling_neon; | |||||
} | |||||
} |
@@ -0,0 +1,83 @@ | |||||
/* | |||||
* ARM NEON optimised DSP functions | |||||
* Copyright (c) 2008 Mans Rullgard <mans@mansr.com> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "libavutil/arm/asm.S" | |||||
function ff_vorbis_inverse_coupling_neon, export=1 | |||||
vmov.i32 q10, #1<<31 | |||||
subs r2, r2, #4 | |||||
mov r3, r0 | |||||
mov r12, r1 | |||||
beq 3f | |||||
vld1.32 {d24-d25},[r1,:128]! | |||||
vld1.32 {d22-d23},[r0,:128]! | |||||
vcle.s32 q8, q12, #0 | |||||
vand q9, q11, q10 | |||||
veor q12, q12, q9 | |||||
vand q2, q12, q8 | |||||
vbic q3, q12, q8 | |||||
vadd.f32 q12, q11, q2 | |||||
vsub.f32 q11, q11, q3 | |||||
1: vld1.32 {d2-d3}, [r1,:128]! | |||||
vld1.32 {d0-d1}, [r0,:128]! | |||||
vcle.s32 q8, q1, #0 | |||||
vand q9, q0, q10 | |||||
veor q1, q1, q9 | |||||
vst1.32 {d24-d25},[r3, :128]! | |||||
vst1.32 {d22-d23},[r12,:128]! | |||||
vand q2, q1, q8 | |||||
vbic q3, q1, q8 | |||||
vadd.f32 q1, q0, q2 | |||||
vsub.f32 q0, q0, q3 | |||||
subs r2, r2, #8 | |||||
ble 2f | |||||
vld1.32 {d24-d25},[r1,:128]! | |||||
vld1.32 {d22-d23},[r0,:128]! | |||||
vcle.s32 q8, q12, #0 | |||||
vand q9, q11, q10 | |||||
veor q12, q12, q9 | |||||
vst1.32 {d2-d3}, [r3, :128]! | |||||
vst1.32 {d0-d1}, [r12,:128]! | |||||
vand q2, q12, q8 | |||||
vbic q3, q12, q8 | |||||
vadd.f32 q12, q11, q2 | |||||
vsub.f32 q11, q11, q3 | |||||
b 1b | |||||
2: vst1.32 {d2-d3}, [r3, :128]! | |||||
vst1.32 {d0-d1}, [r12,:128]! | |||||
it lt | |||||
bxlt lr | |||||
3: vld1.32 {d2-d3}, [r1,:128] | |||||
vld1.32 {d0-d1}, [r0,:128] | |||||
vcle.s32 q8, q1, #0 | |||||
vand q9, q0, q10 | |||||
veor q1, q1, q9 | |||||
vand q2, q1, q8 | |||||
vbic q3, q1, q8 | |||||
vadd.f32 q1, q0, q2 | |||||
vsub.f32 q0, q0, q3 | |||||
vst1.32 {d2-d3}, [r0,:128]! | |||||
vst1.32 {d0-d1}, [r1,:128]! | |||||
bx lr | |||||
endfunc |
@@ -36,7 +36,6 @@ | |||||
#include "mathops.h" | #include "mathops.h" | ||||
#include "mpegvideo.h" | #include "mpegvideo.h" | ||||
#include "config.h" | #include "config.h" | ||||
#include "vorbis.h" | |||||
uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; | ||||
uint32_t ff_squareTbl[512] = {0, }; | uint32_t ff_squareTbl[512] = {0, }; | ||||
@@ -2817,9 +2816,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
c->try_8x8basis= try_8x8basis_c; | c->try_8x8basis= try_8x8basis_c; | ||||
c->add_8x8basis= add_8x8basis_c; | c->add_8x8basis= add_8x8basis_c; | ||||
#if CONFIG_VORBIS_DECODER | |||||
c->vorbis_inverse_coupling = ff_vorbis_inverse_coupling; | |||||
#endif | |||||
c->vector_fmul_reverse = vector_fmul_reverse_c; | c->vector_fmul_reverse = vector_fmul_reverse_c; | ||||
c->vector_fmul_add = vector_fmul_add_c; | c->vector_fmul_add = vector_fmul_add_c; | ||||
c->vector_clipf = vector_clipf_c; | c->vector_clipf = vector_clipf_c; | ||||
@@ -346,8 +346,6 @@ typedef struct DSPContext { | |||||
void (*h261_loop_filter)(uint8_t *src, int stride); | void (*h261_loop_filter)(uint8_t *src, int stride); | ||||
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ | |||||
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | |||||
/* assume len is a multiple of 16, and arrays are 32-byte aligned */ | /* assume len is a multiple of 16, and arrays are 32-byte aligned */ | ||||
void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); | void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len); | ||||
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | /* assume len is a multiple of 8, and src arrays are 16-byte aligned */ | ||||
@@ -1,6 +1,7 @@ | |||||
OBJS += ppc/dsputil_ppc.o \ | OBJS += ppc/dsputil_ppc.o \ | ||||
ppc/videodsp_ppc.o \ | ppc/videodsp_ppc.o \ | ||||
OBJS-$(CONFIG_VORBIS_DECODER) += ppc/vorbisdsp_altivec.o | |||||
OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | OBJS-$(CONFIG_VP3DSP) += ppc/vp3dsp_altivec.o | ||||
FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | FFT-OBJS-$(HAVE_GNU_AS) += ppc/fft_altivec_s.o | ||||
@@ -1283,29 +1283,6 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui | |||||
return score; | return score; | ||||
} | } | ||||
static void vorbis_inverse_coupling_altivec(float *mag, float *ang, | |||||
int blocksize) | |||||
{ | |||||
int i; | |||||
vector float m, a; | |||||
vector bool int t0, t1; | |||||
const vector unsigned int v_31 = //XXX | |||||
vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); | |||||
for (i = 0; i < blocksize; i += 4) { | |||||
m = vec_ld(0, mag+i); | |||||
a = vec_ld(0, ang+i); | |||||
t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); | |||||
t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); | |||||
a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); | |||||
t0 = (vector bool int)vec_and(a, t1); | |||||
t1 = (vector bool int)vec_andc(a, t1); | |||||
a = vec_sub(m, (vector float)t1); | |||||
m = vec_add(m, (vector float)t0); | |||||
vec_stl(a, 0, ang+i); | |||||
vec_stl(m, 0, mag+i); | |||||
} | |||||
} | |||||
/* next one assumes that ((line_size % 8) == 0) */ | /* next one assumes that ((line_size % 8) == 0) */ | ||||
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, int line_size, int h) | ||||
{ | { | ||||
@@ -1403,6 +1380,4 @@ void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx) | |||||
c->hadamard8_diff[0] = hadamard8_diff16_altivec; | c->hadamard8_diff[0] = hadamard8_diff16_altivec; | ||||
c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | c->hadamard8_diff[1] = hadamard8_diff8x8_altivec; | ||||
if (CONFIG_VORBIS_DECODER) | |||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; | |||||
} | } |
@@ -0,0 +1,62 @@ | |||||
/* | |||||
* Copyright (c) 2006 Luca Barbato <lu_zero@gentoo.org> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "config.h" | |||||
#if HAVE_ALTIVEC_H | |||||
#include <altivec.h> | |||||
#endif | |||||
#include "libavutil/cpu.h" | |||||
#include "libavutil/ppc/types_altivec.h" | |||||
#include "libavutil/ppc/util_altivec.h" | |||||
#include "libavcodec/vorbisdsp.h" | |||||
#if HAVE_ALTIVEC | |||||
static void vorbis_inverse_coupling_altivec(float *mag, float *ang, | |||||
int blocksize) | |||||
{ | |||||
int i; | |||||
vector float m, a; | |||||
vector bool int t0, t1; | |||||
const vector unsigned int v_31 = //XXX | |||||
vec_add(vec_add(vec_splat_u32(15),vec_splat_u32(15)),vec_splat_u32(1)); | |||||
for (i = 0; i < blocksize; i += 4) { | |||||
m = vec_ld(0, mag+i); | |||||
a = vec_ld(0, ang+i); | |||||
t0 = vec_cmple(m, (vector float)vec_splat_u32(0)); | |||||
t1 = vec_cmple(a, (vector float)vec_splat_u32(0)); | |||||
a = vec_xor(a, (vector float) vec_sl((vector unsigned int)t0, v_31)); | |||||
t0 = (vector bool int)vec_and(a, t1); | |||||
t1 = (vector bool int)vec_andc(a, t1); | |||||
a = vec_sub(m, (vector float)t1); | |||||
m = vec_add(m, (vector float)t0); | |||||
vec_stl(a, 0, ang+i); | |||||
vec_stl(m, 0, mag+i); | |||||
} | |||||
} | |||||
#endif /* HAVE_ALTIVEC */ | |||||
void ff_vorbisdsp_init_ppc(VorbisDSPContext* c) | |||||
{ | |||||
#if HAVE_ALTIVEC | |||||
if (av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) { | |||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_altivec; | |||||
} | |||||
#endif /* HAVE_ALTIVEC */ | |||||
} |
@@ -29,12 +29,12 @@ | |||||
#include "libavutil/float_dsp.h" | #include "libavutil/float_dsp.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "dsputil.h" | |||||
#include "fft.h" | #include "fft.h" | ||||
#include "fmtconvert.h" | #include "fmtconvert.h" | ||||
#include "internal.h" | #include "internal.h" | ||||
#include "vorbis.h" | #include "vorbis.h" | ||||
#include "vorbisdsp.h" | |||||
#include "xiph.h" | #include "xiph.h" | ||||
#define V_NB_BITS 8 | #define V_NB_BITS 8 | ||||
@@ -125,7 +125,7 @@ typedef struct vorbis_context_s { | |||||
AVCodecContext *avccontext; | AVCodecContext *avccontext; | ||||
AVFrame frame; | AVFrame frame; | ||||
GetBitContext gb; | GetBitContext gb; | ||||
DSPContext dsp; | |||||
VorbisDSPContext dsp; | |||||
AVFloatDSPContext fdsp; | AVFloatDSPContext fdsp; | ||||
FmtConvertContext fmt_conv; | FmtConvertContext fmt_conv; | ||||
@@ -981,7 +981,7 @@ static av_cold int vorbis_decode_init(AVCodecContext *avccontext) | |||||
int hdr_type, ret; | int hdr_type, ret; | ||||
vc->avccontext = avccontext; | vc->avccontext = avccontext; | ||||
ff_dsputil_init(&vc->dsp, avccontext); | |||||
ff_vorbisdsp_init(&vc->dsp); | |||||
avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT); | avpriv_float_dsp_init(&vc->fdsp, avccontext->flags & CODEC_FLAG_BITEXACT); | ||||
ff_fmt_convert_init(&vc->fmt_conv, avccontext); | ff_fmt_convert_init(&vc->fmt_conv, avccontext); | ||||
@@ -0,0 +1,33 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "config.h" | |||||
#include "vorbisdsp.h" | |||||
#include "vorbis.h" | |||||
void ff_vorbisdsp_init(VorbisDSPContext *dsp) | |||||
{ | |||||
dsp->vorbis_inverse_coupling = ff_vorbis_inverse_coupling; | |||||
if (ARCH_X86) | |||||
ff_vorbisdsp_init_x86(dsp); | |||||
if (ARCH_PPC) | |||||
ff_vorbisdsp_init_ppc(dsp); | |||||
if (ARCH_ARM) | |||||
ff_vorbisdsp_init_arm(dsp); | |||||
} |
@@ -0,0 +1,34 @@ | |||||
/* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#ifndef AVCODEC_VORBISDSP_H | |||||
#define AVCODEC_VORBISDSP_H | |||||
typedef struct VorbisDSPContext { | |||||
/* assume len is a multiple of 4, and arrays are 16-byte aligned */ | |||||
void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); | |||||
} VorbisDSPContext; | |||||
void ff_vorbisdsp_init(VorbisDSPContext *dsp); | |||||
/* for internal use only */ | |||||
void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp); | |||||
void ff_vorbisdsp_init_arm(VorbisDSPContext *dsp); | |||||
void ff_vorbisdsp_init_ppc(VorbisDSPContext *dsp); | |||||
#endif /* AVCODEC_VORBISDSP_H */ |
@@ -20,6 +20,7 @@ OBJS-$(CONFIG_RV40_DECODER) += x86/rv34dsp_init.o \ | |||||
OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | OBJS-$(CONFIG_TRUEHD_DECODER) += x86/mlpdsp.o | ||||
OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o | OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_init.o | ||||
OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | OBJS-$(CONFIG_VIDEODSP) += x86/videodsp_init.o | ||||
OBJS-$(CONFIG_VORBIS_DECODER) += x86/vorbisdsp_init.o | |||||
OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | OBJS-$(CONFIG_VP3DSP) += x86/vp3dsp_init.o | ||||
OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o | OBJS-$(CONFIG_VP5_DECODER) += x86/vp56dsp_init.o | ||||
OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o | OBJS-$(CONFIG_VP6_DECODER) += x86/vp56dsp_init.o | ||||
@@ -1829,65 +1829,6 @@ void ff_avg_vc1_mspel_mc00_mmxext(uint8_t *dst, const uint8_t *src, | |||||
avg_pixels8_mmxext(dst, src, stride, 8); | avg_pixels8_mmxext(dst, src, stride, 8); | ||||
} | } | ||||
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) | |||||
{ | |||||
int i; | |||||
__asm__ volatile ("pxor %%mm7, %%mm7":); | |||||
for (i = 0; i < blocksize; i += 2) { | |||||
__asm__ volatile ( | |||||
"movq %0, %%mm0 \n\t" | |||||
"movq %1, %%mm1 \n\t" | |||||
"movq %%mm0, %%mm2 \n\t" | |||||
"movq %%mm1, %%mm3 \n\t" | |||||
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 | |||||
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 | |||||
"pslld $31, %%mm2 \n\t" // keep only the sign bit | |||||
"pxor %%mm2, %%mm1 \n\t" | |||||
"movq %%mm3, %%mm4 \n\t" | |||||
"pand %%mm1, %%mm3 \n\t" | |||||
"pandn %%mm1, %%mm4 \n\t" | |||||
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) | |||||
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) | |||||
"movq %%mm3, %1 \n\t" | |||||
"movq %%mm0, %0 \n\t" | |||||
: "+m"(mag[i]), "+m"(ang[i]) | |||||
:: "memory" | |||||
); | |||||
} | |||||
__asm__ volatile ("femms"); | |||||
} | |||||
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | |||||
{ | |||||
int i; | |||||
__asm__ volatile ( | |||||
"movaps %0, %%xmm5 \n\t" | |||||
:: "m"(ff_pdw_80000000[0]) | |||||
); | |||||
for (i = 0; i < blocksize; i += 4) { | |||||
__asm__ volatile ( | |||||
"movaps %0, %%xmm0 \n\t" | |||||
"movaps %1, %%xmm1 \n\t" | |||||
"xorps %%xmm2, %%xmm2 \n\t" | |||||
"xorps %%xmm3, %%xmm3 \n\t" | |||||
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 | |||||
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 | |||||
"andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit | |||||
"xorps %%xmm2, %%xmm1 \n\t" | |||||
"movaps %%xmm3, %%xmm4 \n\t" | |||||
"andps %%xmm1, %%xmm3 \n\t" | |||||
"andnps %%xmm1, %%xmm4 \n\t" | |||||
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) | |||||
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) | |||||
"movaps %%xmm3, %1 \n\t" | |||||
"movaps %%xmm0, %0 \n\t" | |||||
: "+m"(mag[i]), "+m"(ang[i]) | |||||
:: "memory" | |||||
); | |||||
} | |||||
} | |||||
static void vector_clipf_sse(float *dst, const float *src, | static void vector_clipf_sse(float *dst, const float *src, | ||||
float min, float max, int len) | float min, float max, int len) | ||||
{ | { | ||||
@@ -2238,8 +2179,6 @@ static void dsputil_init_3dnow(DSPContext *c, AVCodecContext *avctx, | |||||
c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; | c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; | ||||
c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; | c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; | ||||
} | } | ||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | |||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
#if HAVE_YASM | #if HAVE_YASM | ||||
@@ -2263,8 +2202,6 @@ static void dsputil_init_sse(DSPContext *c, AVCodecContext *avctx, int mm_flags) | |||||
} | } | ||||
} | } | ||||
c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | |||||
c->vector_clipf = vector_clipf_sse; | c->vector_clipf = vector_clipf_sse; | ||||
#endif /* HAVE_INLINE_ASM */ | #endif /* HAVE_INLINE_ASM */ | ||||
@@ -0,0 +1,101 @@ | |||||
/* | |||||
* Copyright (C) 2006 Loren Merritt <lorenm@u.washington.edu> | |||||
* | |||||
* This file is part of Libav. | |||||
* | |||||
* Libav is free software; you can redistribute it and/or | |||||
* modify it under the terms of the GNU Lesser General Public | |||||
* License as published by the Free Software Foundation; either | |||||
* version 2.1 of the License, or (at your option) any later version. | |||||
* | |||||
* Libav is distributed in the hope that it will be useful, | |||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | |||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |||||
* Lesser General Public License for more details. | |||||
* | |||||
* You should have received a copy of the GNU Lesser General Public | |||||
* License along with Libav; if not, write to the Free Software | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||||
*/ | |||||
#include "config.h" | |||||
#include "libavutil/cpu.h" | |||||
#include "libavcodec/vorbisdsp.h" | |||||
#include "dsputil_mmx.h" // for ff_pdw_80000000 | |||||
#if HAVE_INLINE_ASM | |||||
#if ARCH_X86_32 | |||||
static void vorbis_inverse_coupling_3dnow(float *mag, float *ang, int blocksize) | |||||
{ | |||||
int i; | |||||
__asm__ volatile ("pxor %%mm7, %%mm7":); | |||||
for (i = 0; i < blocksize; i += 2) { | |||||
__asm__ volatile ( | |||||
"movq %0, %%mm0 \n\t" | |||||
"movq %1, %%mm1 \n\t" | |||||
"movq %%mm0, %%mm2 \n\t" | |||||
"movq %%mm1, %%mm3 \n\t" | |||||
"pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 | |||||
"pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 | |||||
"pslld $31, %%mm2 \n\t" // keep only the sign bit | |||||
"pxor %%mm2, %%mm1 \n\t" | |||||
"movq %%mm3, %%mm4 \n\t" | |||||
"pand %%mm1, %%mm3 \n\t" | |||||
"pandn %%mm1, %%mm4 \n\t" | |||||
"pfadd %%mm0, %%mm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) | |||||
"pfsub %%mm4, %%mm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) | |||||
"movq %%mm3, %1 \n\t" | |||||
"movq %%mm0, %0 \n\t" | |||||
: "+m"(mag[i]), "+m"(ang[i]) | |||||
:: "memory" | |||||
); | |||||
} | |||||
__asm__ volatile ("femms"); | |||||
} | |||||
#endif | |||||
static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) | |||||
{ | |||||
int i; | |||||
__asm__ volatile ( | |||||
"movaps %0, %%xmm5 \n\t" | |||||
:: "m"(ff_pdw_80000000[0]) | |||||
); | |||||
for (i = 0; i < blocksize; i += 4) { | |||||
__asm__ volatile ( | |||||
"movaps %0, %%xmm0 \n\t" | |||||
"movaps %1, %%xmm1 \n\t" | |||||
"xorps %%xmm2, %%xmm2 \n\t" | |||||
"xorps %%xmm3, %%xmm3 \n\t" | |||||
"cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 | |||||
"cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 | |||||
"andps %%xmm5, %%xmm2 \n\t" // keep only the sign bit | |||||
"xorps %%xmm2, %%xmm1 \n\t" | |||||
"movaps %%xmm3, %%xmm4 \n\t" | |||||
"andps %%xmm1, %%xmm3 \n\t" | |||||
"andnps %%xmm1, %%xmm4 \n\t" | |||||
"addps %%xmm0, %%xmm3 \n\t" // a = m + ((a < 0) & (a ^ sign(m))) | |||||
"subps %%xmm4, %%xmm0 \n\t" // m = m + ((a > 0) & (a ^ sign(m))) | |||||
"movaps %%xmm3, %1 \n\t" | |||||
"movaps %%xmm0, %0 \n\t" | |||||
: "+m"(mag[i]), "+m"(ang[i]) | |||||
:: "memory" | |||||
); | |||||
} | |||||
} | |||||
#endif | |||||
void ff_vorbisdsp_init_x86(VorbisDSPContext *dsp) | |||||
{ | |||||
#if HAVE_INLINE_ASM | |||||
int mm_flags = av_get_cpu_flags(); | |||||
#if ARCH_X86_32 | |||||
if (mm_flags & AV_CPU_FLAG_3DNOW) | |||||
dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_3dnow; | |||||
#endif /* ARCH_X86_32 */ | |||||
if (mm_flags & AV_CPU_FLAG_SSE) | |||||
dsp->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; | |||||
#endif /* HAVE_INLINE_ASM */ | |||||
} |