* commit '5959bfaca396ecaf63a8123055f499688b79cae3': floatdsp: move butterflies_float from dsputil to avfloatdsp. Conflicts: libavcodec/dsputil.c libavcodec/dsputil.h libavcodec/imc.c libavcodec/mpegaudiodec.c Merged-by: Michael Niedermayer <michaelni@gmx.at>tags/n1.2
@@ -1729,9 +1729,9 @@ static void apply_mid_side_stereo(AACContext *ac, ChannelElement *cpe) | |||||
if (cpe->ms_mask[idx] && | if (cpe->ms_mask[idx] && | ||||
cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { | cpe->ch[0].band_type[idx] < NOISE_BT && cpe->ch[1].band_type[idx] < NOISE_BT) { | ||||
for (group = 0; group < ics->group_len[g]; group++) { | for (group = 0; group < ics->group_len[g]; group++) { | ||||
ac->dsp.butterflies_float(ch0 + group * 128 + offsets[i], | |||||
ch1 + group * 128 + offsets[i], | |||||
offsets[i+1] - offsets[i]); | |||||
ac->fdsp.butterflies_float(ch0 + group * 128 + offsets[i], | |||||
ch1 + group * 128 + offsets[i], | |||||
offsets[i+1] - offsets[i]); | |||||
} | } | ||||
} | } | ||||
} | } | ||||
@@ -142,7 +142,6 @@ void ff_avg_h264_chroma_mc8_neon(uint8_t *, uint8_t *, int, int, int, int); | |||||
void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc4_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | void ff_avg_h264_chroma_mc2_neon(uint8_t *, uint8_t *, int, int, int, int); | ||||
void ff_butterflies_float_neon(float *v1, float *v2, int len); | |||||
float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | float ff_scalarproduct_float_neon(const float *v1, const float *v2, int len); | ||||
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, | ||||
@@ -294,7 +293,6 @@ void ff_dsputil_init_neon(DSPContext *c, AVCodecContext *avctx) | |||||
c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | c->avg_h264_qpel_pixels_tab[1][15] = ff_avg_h264_qpel8_mc33_neon; | ||||
} | } | ||||
c->butterflies_float = ff_butterflies_float_neon; | |||||
c->scalarproduct_float = ff_scalarproduct_float_neon; | c->scalarproduct_float = ff_scalarproduct_float_neon; | ||||
c->vector_clipf = ff_vector_clipf_neon; | c->vector_clipf = ff_vector_clipf_neon; | ||||
c->vector_clip_int32 = ff_vector_clip_int32_neon; | c->vector_clip_int32 = ff_vector_clip_int32_neon; | ||||
@@ -531,18 +531,6 @@ function ff_add_pixels_clamped_neon, export=1 | |||||
bx lr | bx lr | ||||
endfunc | endfunc | ||||
function ff_butterflies_float_neon, export=1 | |||||
1: vld1.32 {q0},[r0,:128] | |||||
vld1.32 {q1},[r1,:128] | |||||
vsub.f32 q2, q0, q1 | |||||
vadd.f32 q1, q0, q1 | |||||
vst1.32 {q2},[r1,:128]! | |||||
vst1.32 {q1},[r0,:128]! | |||||
subs r2, r2, #4 | |||||
bgt 1b | |||||
bx lr | |||||
endfunc | |||||
function ff_scalarproduct_float_neon, export=1 | function ff_scalarproduct_float_neon, export=1 | ||||
vmov.f32 q2, #0.0 | vmov.f32 q2, #0.0 | ||||
1: vld1.32 {q0},[r0,:128]! | 1: vld1.32 {q0},[r0,:128]! | ||||
@@ -2483,17 +2483,6 @@ WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c) | |||||
WRAPPER8_16_SQ(rd8x8_c, rd16_c) | WRAPPER8_16_SQ(rd8x8_c, rd16_c) | ||||
WRAPPER8_16_SQ(bit8x8_c, bit16_c) | WRAPPER8_16_SQ(bit8x8_c, bit16_c) | ||||
static void butterflies_float_c(float *av_restrict v1, float *av_restrict v2, | |||||
int len) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < len; i++) { | |||||
float t = v1[i] - v2[i]; | |||||
v1[i] += v2[i]; | |||||
v2[i] = t; | |||||
} | |||||
} | |||||
float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) | float ff_scalarproduct_float_c(const float *v1, const float *v2, int len) | ||||
{ | { | ||||
float p = 0.0; | float p = 0.0; | ||||
@@ -2887,7 +2876,6 @@ av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx) | |||||
c->apply_window_int16 = apply_window_int16_c; | c->apply_window_int16 = apply_window_int16_c; | ||||
c->vector_clip_int32 = vector_clip_int32_c; | c->vector_clip_int32 = vector_clip_int32_c; | ||||
c->scalarproduct_float = ff_scalarproduct_float_c; | c->scalarproduct_float = ff_scalarproduct_float_c; | ||||
c->butterflies_float = butterflies_float_c; | |||||
c->shrink[0]= av_image_copy_plane; | c->shrink[0]= av_image_copy_plane; | ||||
c->shrink[1]= ff_shrink22; | c->shrink[1]= ff_shrink22; | ||||
@@ -367,13 +367,6 @@ typedef struct DSPContext { | |||||
* @param len length of vectors, multiple of 4 | * @param len length of vectors, multiple of 4 | ||||
*/ | */ | ||||
float (*scalarproduct_float)(const float *v1, const float *v2, int len); | float (*scalarproduct_float)(const float *v1, const float *v2, int len); | ||||
/** | |||||
* Calculate the sum and difference of two vectors of floats. | |||||
* @param v1 first input vector, sum output, 16-byte aligned | |||||
* @param v2 second input vector, difference output, 16-byte aligned | |||||
* @param len length of vectors, multiple of 4 | |||||
*/ | |||||
void (*butterflies_float)(float *av_restrict v1, float *av_restrict v2, int len); | |||||
/* (I)DCT */ | /* (I)DCT */ | ||||
void (*fdct)(DCTELEM *block/* align 16*/); | void (*fdct)(DCTELEM *block/* align 16*/); | ||||
@@ -36,6 +36,7 @@ | |||||
#include <stdio.h> | #include <stdio.h> | ||||
#include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
#include "libavutil/float_dsp.h" | |||||
#include "libavutil/libm.h" | #include "libavutil/libm.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -96,6 +97,7 @@ typedef struct { | |||||
GetBitContext gb; | GetBitContext gb; | ||||
DSPContext dsp; | DSPContext dsp; | ||||
AVFloatDSPContext fdsp; | |||||
FFTContext fft; | FFTContext fft; | ||||
DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2]; | DECLARE_ALIGNED(32, FFTComplex, samples)[COEFFS / 2]; | ||||
float *out_samples; | float *out_samples; | ||||
@@ -245,6 +247,7 @@ static av_cold int imc_decode_init(AVCodecContext *avctx) | |||||
return ret; | return ret; | ||||
} | } | ||||
ff_dsputil_init(&q->dsp, avctx); | ff_dsputil_init(&q->dsp, avctx); | ||||
avpriv_float_dsp_init(&q->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | |||||
avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; | avctx->sample_fmt = AV_SAMPLE_FMT_FLTP; | ||||
avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO | avctx->channel_layout = avctx->channels == 1 ? AV_CH_LAYOUT_MONO | ||||
: AV_CH_LAYOUT_STEREO; | : AV_CH_LAYOUT_STEREO; | ||||
@@ -967,8 +970,8 @@ static int imc_decode_frame(AVCodecContext *avctx, void *data, | |||||
} | } | ||||
if (avctx->channels == 2) { | if (avctx->channels == 2) { | ||||
q->dsp.butterflies_float((float *)q->frame.extended_data[0], | |||||
(float *)q->frame.extended_data[1], COEFFS); | |||||
q->fdsp.butterflies_float((float *)q->frame.extended_data[0], | |||||
(float *)q->frame.extended_data[1], COEFFS); | |||||
} | } | ||||
*got_frame_ptr = 1; | *got_frame_ptr = 1; | ||||
@@ -26,6 +26,7 @@ | |||||
#include "libavutil/avassert.h" | #include "libavutil/avassert.h" | ||||
#include "libavutil/channel_layout.h" | #include "libavutil/channel_layout.h" | ||||
#include "libavutil/float_dsp.h" | |||||
#include "libavutil/libm.h" | #include "libavutil/libm.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
@@ -84,7 +85,7 @@ typedef struct MPADecodeContext { | |||||
int err_recognition; | int err_recognition; | ||||
AVCodecContext* avctx; | AVCodecContext* avctx; | ||||
MPADSPContext mpadsp; | MPADSPContext mpadsp; | ||||
DSPContext dsp; | |||||
AVFloatDSPContext fdsp; | |||||
AVFrame frame; | AVFrame frame; | ||||
} MPADecodeContext; | } MPADecodeContext; | ||||
@@ -441,8 +442,8 @@ static av_cold int decode_init(AVCodecContext * avctx) | |||||
s->avctx = avctx; | s->avctx = avctx; | ||||
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | |||||
ff_mpadsp_init(&s->mpadsp); | ff_mpadsp_init(&s->mpadsp); | ||||
ff_dsputil_init(&s->dsp, avctx); | |||||
if (avctx->request_sample_fmt == OUT_FMT && | if (avctx->request_sample_fmt == OUT_FMT && | ||||
avctx->codec_id != AV_CODEC_ID_MP3ON4) | avctx->codec_id != AV_CODEC_ID_MP3ON4) | ||||
@@ -1164,7 +1165,7 @@ found2: | |||||
/* NOTE: the 1/sqrt(2) normalization factor is included in the | /* NOTE: the 1/sqrt(2) normalization factor is included in the | ||||
global gain */ | global gain */ | ||||
#if CONFIG_FLOAT | #if CONFIG_FLOAT | ||||
s-> dsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); | |||||
s->fdsp.butterflies_float(g0->sb_hybrid, g1->sb_hybrid, 576); | |||||
#else | #else | ||||
tab0 = g0->sb_hybrid; | tab0 = g0->sb_hybrid; | ||||
tab1 = g1->sb_hybrid; | tab1 = g1->sb_hybrid; | ||||
@@ -178,7 +178,6 @@ static const ModeTab mode_44_48 = { | |||||
typedef struct TwinContext { | typedef struct TwinContext { | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
AVFrame frame; | AVFrame frame; | ||||
DSPContext dsp; | |||||
AVFloatDSPContext fdsp; | AVFloatDSPContext fdsp; | ||||
FFTContext mdct_ctx[3]; | FFTContext mdct_ctx[3]; | ||||
@@ -693,7 +692,7 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype, | |||||
if (tctx->avctx->channels == 2) { | if (tctx->avctx->channels == 2) { | ||||
memcpy(&out[1][0], &prev_buf[2*mtab->size], size1 * sizeof(out[1][0])); | memcpy(&out[1][0], &prev_buf[2*mtab->size], size1 * sizeof(out[1][0])); | ||||
memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0])); | memcpy(&out[1][size1], &tctx->curr_frame[2*mtab->size], size2 * sizeof(out[1][0])); | ||||
tctx->dsp.butterflies_float(out[0], out[1], mtab->size); | |||||
tctx->fdsp.butterflies_float(out[0], out[1], mtab->size); | |||||
} | } | ||||
} | } | ||||
@@ -1162,7 +1161,6 @@ static av_cold int twin_decode_init(AVCodecContext *avctx) | |||||
return -1; | return -1; | ||||
} | } | ||||
ff_dsputil_init(&tctx->dsp, avctx); | |||||
avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | avpriv_float_dsp_init(&tctx->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
if ((ret = init_mdct_win(tctx))) { | if ((ret = init_mdct_win(tctx))) { | ||||
av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); | av_log(avctx, AV_LOG_ERROR, "Error initializing MDCT\n"); | ||||
@@ -82,7 +82,6 @@ int ff_wma_init(AVCodecContext *avctx, int flags2) | |||||
|| avctx->bit_rate <= 0) | || avctx->bit_rate <= 0) | ||||
return -1; | return -1; | ||||
ff_dsputil_init(&s->dsp, avctx); | |||||
ff_fmt_convert_init(&s->fmt_conv, avctx); | ff_fmt_convert_init(&s->fmt_conv, avctx); | ||||
avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | avpriv_float_dsp_init(&s->fdsp, avctx->flags & CODEC_FLAG_BITEXACT); | ||||
@@ -132,7 +132,6 @@ typedef struct WMACodecContext { | |||||
float lsp_pow_e_table[256]; | float lsp_pow_e_table[256]; | ||||
float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | ||||
float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | ||||
DSPContext dsp; | |||||
FmtConvertContext fmt_conv; | FmtConvertContext fmt_conv; | ||||
AVFloatDSPContext fdsp; | AVFloatDSPContext fdsp; | ||||
@@ -731,7 +731,7 @@ static int wma_decode_block(WMACodecContext *s) | |||||
s->channel_coded[0] = 1; | s->channel_coded[0] = 1; | ||||
} | } | ||||
s->dsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); | |||||
s->fdsp.butterflies_float(s->coefs[0], s->coefs[1], s->block_len); | |||||
} | } | ||||
next: | next: | ||||
@@ -41,6 +41,8 @@ void ff_vector_fmul_add_neon(float *dst, const float *src0, const float *src1, | |||||
void ff_vector_fmul_reverse_neon(float *dst, const float *src0, | void ff_vector_fmul_reverse_neon(float *dst, const float *src0, | ||||
const float *src1, int len); | const float *src1, int len); | ||||
void ff_butterflies_float_neon(float *v1, float *v2, int len); | |||||
void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | ||||
{ | { | ||||
fdsp->vector_fmul = ff_vector_fmul_neon; | fdsp->vector_fmul = ff_vector_fmul_neon; | ||||
@@ -49,4 +51,5 @@ void ff_float_dsp_init_neon(AVFloatDSPContext *fdsp) | |||||
fdsp->vector_fmul_window = ff_vector_fmul_window_neon; | fdsp->vector_fmul_window = ff_vector_fmul_window_neon; | ||||
fdsp->vector_fmul_add = ff_vector_fmul_add_neon; | fdsp->vector_fmul_add = ff_vector_fmul_add_neon; | ||||
fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_neon; | ||||
fdsp->butterflies_float = ff_butterflies_float_neon; | |||||
} | } |
@@ -244,3 +244,15 @@ function ff_vector_fmul_reverse_neon, export=1 | |||||
2: vst1.32 {q8-q9}, [r0,:128]! | 2: vst1.32 {q8-q9}, [r0,:128]! | ||||
bx lr | bx lr | ||||
endfunc | endfunc | ||||
function ff_butterflies_float_neon, export=1 | |||||
1: vld1.32 {q0},[r0,:128] | |||||
vld1.32 {q1},[r1,:128] | |||||
vsub.f32 q2, q0, q1 | |||||
vadd.f32 q1, q0, q1 | |||||
vst1.32 {q2},[r1,:128]! | |||||
vst1.32 {q1},[r0,:128]! | |||||
subs r2, r2, #4 | |||||
bgt 1b | |||||
bx lr | |||||
endfunc |
@@ -92,6 +92,18 @@ static void vector_fmul_reverse_c(float *dst, const float *src0, | |||||
dst[i] = src0[i] * src1[-i]; | dst[i] = src0[i] * src1[-i]; | ||||
} | } | ||||
static void butterflies_float_c(float *restrict v1, float *restrict v2, | |||||
int len) | |||||
{ | |||||
int i; | |||||
for (i = 0; i < len; i++) { | |||||
float t = v1[i] - v2[i]; | |||||
v1[i] += v2[i]; | |||||
v2[i] = t; | |||||
} | |||||
} | |||||
void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | ||||
{ | { | ||||
fdsp->vector_fmul = vector_fmul_c; | fdsp->vector_fmul = vector_fmul_c; | ||||
@@ -101,6 +113,7 @@ void avpriv_float_dsp_init(AVFloatDSPContext *fdsp, int bit_exact) | |||||
fdsp->vector_fmul_window = vector_fmul_window_c; | fdsp->vector_fmul_window = vector_fmul_window_c; | ||||
fdsp->vector_fmul_add = vector_fmul_add_c; | fdsp->vector_fmul_add = vector_fmul_add_c; | ||||
fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | fdsp->vector_fmul_reverse = vector_fmul_reverse_c; | ||||
fdsp->butterflies_float = butterflies_float_c; | |||||
#if ARCH_ARM | #if ARCH_ARM | ||||
ff_float_dsp_init_arm(fdsp); | ff_float_dsp_init_arm(fdsp); | ||||
@@ -137,6 +137,15 @@ typedef struct AVFloatDSPContext { | |||||
*/ | */ | ||||
void (*vector_fmul_reverse)(float *dst, const float *src0, | void (*vector_fmul_reverse)(float *dst, const float *src0, | ||||
const float *src1, int len); | const float *src1, int len); | ||||
/** | |||||
* Calculate the sum and difference of two vectors of floats. | |||||
* | |||||
* @param v1 first input vector, sum output, 16-byte aligned | |||||
* @param v2 second input vector, difference output, 16-byte aligned | |||||
* @param len length of vectors, multiple of 4 | |||||
*/ | |||||
void (*butterflies_float)(float *restrict v1, float *restrict v2, int len); | |||||
} AVFloatDSPContext; | } AVFloatDSPContext; | ||||
/** | /** | ||||