Handles strides (needed for Opus transients), does pre-reindexing and folding without needing a copy. Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>tags/n3.3
@@ -2107,7 +2107,7 @@ CONFIG_EXTRA=" | |||
huffyuvencdsp | |||
idctdsp | |||
iirfilter | |||
imdct15 | |||
mdct15 | |||
intrax8 | |||
iso_media | |||
ividsp | |||
@@ -2349,7 +2349,7 @@ vc1dsp_select="h264chroma qpeldsp startcode" | |||
rdft_select="fft" | |||
# decoders / encoders | |||
aac_decoder_select="imdct15 mdct sinewin" | |||
aac_decoder_select="mdct15 mdct sinewin" | |||
aac_fixed_decoder_select="mdct sinewin" | |||
aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin" | |||
aac_latm_decoder_select="aac_decoder aac_latm_parser" | |||
@@ -2491,7 +2491,7 @@ nellymoser_encoder_select="audio_frame_queue mdct sinewin" | |||
nuv_decoder_select="idctdsp lzo" | |||
on2avc_decoder_select="mdct" | |||
opus_decoder_deps="swresample" | |||
opus_decoder_select="imdct15" | |||
opus_decoder_select="mdct15" | |||
png_decoder_select="zlib" | |||
png_encoder_select="llvidencdsp zlib" | |||
prores_decoder_select="blockdsp idctdsp" | |||
@@ -84,7 +84,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | |||
OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o | |||
OBJS-$(CONFIG_IDCTDSP) += idctdsp.o simple_idct.o jrevdct.o | |||
OBJS-$(CONFIG_IIRFILTER) += iirfilter.o | |||
OBJS-$(CONFIG_IMDCT15) += imdct15.o | |||
OBJS-$(CONFIG_MDCT15) += mdct15.o | |||
OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | |||
OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o | |||
OBJS-$(CONFIG_JNI) += ffjni.o jni.o | |||
@@ -36,7 +36,7 @@ | |||
#include "libavutil/fixed_dsp.h" | |||
#include "avcodec.h" | |||
#if !USE_FIXED | |||
#include "imdct15.h" | |||
#include "mdct15.h" | |||
#endif | |||
#include "fft.h" | |||
#include "mpeg4audio.h" | |||
@@ -327,7 +327,7 @@ struct AACContext { | |||
#if USE_FIXED | |||
AVFixedDSPContext *fdsp; | |||
#else | |||
IMDCT15Context *mdct480; | |||
MDCT15Context *mdct480; | |||
AVFloatDSPContext *fdsp; | |||
#endif /* USE_FIXED */ | |||
int random_state; | |||
@@ -42,7 +42,7 @@ | |||
#include "internal.h" | |||
#include "get_bits.h" | |||
#include "fft.h" | |||
#include "imdct15.h" | |||
#include "mdct15.h" | |||
#include "lpc.h" | |||
#include "kbdwin.h" | |||
#include "sinewin.h" | |||
@@ -1207,7 +1207,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) | |||
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); | |||
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); | |||
#if !USE_FIXED | |||
ret = ff_imdct15_init(&ac->mdct480, 5); | |||
ret = ff_mdct15_init(&ac->mdct480, 1, 5, -1.0f); | |||
if (ret < 0) | |||
return ret; | |||
#endif | |||
@@ -3217,7 +3217,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) | |||
ff_mdct_end(&ac->mdct_ld); | |||
ff_mdct_end(&ac->mdct_ltp); | |||
#if !USE_FIXED | |||
ff_imdct15_uninit(&ac->mdct480); | |||
ff_mdct15_uninit(&ac->mdct480); | |||
#endif | |||
av_freep(&ac->fdsp); | |||
return 0; | |||
@@ -33,7 +33,8 @@ | |||
#include "libavutil/attributes.h" | |||
#include "libavutil/common.h" | |||
#include "imdct15.h" | |||
#include "avfft.h" | |||
#include "mdct15.h" | |||
// complex c = a * b | |||
#define CMUL3(cre, cim, are, aim, bre, bim) \ | |||
@@ -44,9 +45,9 @@ do { \ | |||
#define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) | |||
av_cold void ff_imdct15_uninit(IMDCT15Context **ps) | |||
av_cold void ff_mdct15_uninit(MDCT15Context **ps) | |||
{ | |||
IMDCT15Context *s = *ps; | |||
MDCT15Context *s = *ps; | |||
if (!s) | |||
return; | |||
@@ -61,10 +62,12 @@ av_cold void ff_imdct15_uninit(IMDCT15Context **ps) | |||
av_freep(ps); | |||
} | |||
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, | |||
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride); | |||
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, | |||
ptrdiff_t stride, float scale); | |||
static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||
static inline int init_pfa_reindex_tabs(MDCT15Context *s) | |||
{ | |||
int i, j; | |||
const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ | |||
@@ -85,7 +88,7 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||
for (j = 0; j < 15; j++) { | |||
const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; | |||
const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; | |||
const int k_pre = 15*i + (j - q_pre*15)*l_ptwo; | |||
const int k_pre = 15*i + ((j - q_pre*15) << b_ptwo); | |||
const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; | |||
s->pfa_prereindex[i*15 + j] = k_pre; | |||
s->pfa_postreindex[k_post] = l_ptwo*j + i; | |||
@@ -95,9 +98,10 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||
return 0; | |||
} | |||
av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale) | |||
{ | |||
IMDCT15Context *s; | |||
MDCT15Context *s; | |||
double alpha, theta; | |||
int len2 = 15 * (1 << N); | |||
int len = 2 * len2; | |||
int i; | |||
@@ -113,9 +117,11 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||
s->fft_n = N - 1; | |||
s->len4 = len2 / 2; | |||
s->len2 = len2; | |||
s->inverse = inverse; | |||
s->mdct = mdct15; | |||
s->imdct_half = imdct15_half; | |||
if (ff_fft_init(&s->ptwo_fft, N - 1, 1) < 0) | |||
if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0) | |||
goto fail; | |||
if (init_pfa_reindex_tabs(s)) | |||
@@ -129,15 +135,20 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||
if (!s->twiddle_exptab) | |||
goto fail; | |||
theta = 0.125f + (scale < 0 ? s->len4 : 0); | |||
scale = sqrt(fabs(scale)); | |||
for (i = 0; i < s->len4; i++) { | |||
s->twiddle_exptab[i].re = cos(2 * M_PI * (i + 0.125f + s->len4) / len); | |||
s->twiddle_exptab[i].im = sin(2 * M_PI * (i + 0.125f + s->len4) / len); | |||
alpha = 2 * M_PI * (i + theta) / len; | |||
s->twiddle_exptab[i].re = cos(alpha) * scale; | |||
s->twiddle_exptab[i].im = sin(alpha) * scale; | |||
} | |||
/* 15-point FFT exptab */ | |||
for (i = 0; i < 19; i++) { | |||
if (i < 15) { | |||
double theta = (2.0f * M_PI * i) / 15.0f; | |||
if (!s->inverse) | |||
theta *= -1; | |||
s->exptab[i].re = cos(theta); | |||
s->exptab[i].im = sin(theta); | |||
} else { /* Wrap around to simplify fft15 */ | |||
@@ -152,15 +163,17 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||
s->exptab[20].im = sin(1.0f * M_PI / 5.0f); | |||
/* Invert the phase for an inverse transform, do nothing for a forward transform */ | |||
s->exptab[19].im *= -1; | |||
s->exptab[20].im *= -1; | |||
if (s->inverse) { | |||
s->exptab[19].im *= -1; | |||
s->exptab[20].im *= -1; | |||
} | |||
*ps = s; | |||
return 0; | |||
fail: | |||
ff_imdct15_uninit(&s); | |||
ff_mdct15_uninit(&s); | |||
return AVERROR(ENOMEM); | |||
} | |||
@@ -211,8 +224,7 @@ static inline void fft5(const FFTComplex exptab[2], FFTComplex *out, | |||
out[4].im = in[0].im + z0[3].im; | |||
} | |||
static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, | |||
const FFTComplex *in, size_t stride) | |||
static void fft15(const FFTComplex exptab[22], FFTComplex *out, const FFTComplex *in, size_t stride) | |||
{ | |||
int k; | |||
FFTComplex tmp1[5], tmp2[5], tmp3[5]; | |||
@@ -241,7 +253,51 @@ static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, | |||
} | |||
} | |||
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, | |||
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) | |||
{ | |||
int i, j; | |||
const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1; | |||
const int l_ptwo = 1 << s->ptwo_fft.nbits; | |||
FFTComplex fft15in[15]; | |||
/* Folding and pre-reindexing */ | |||
for (i = 0; i < l_ptwo; i++) { | |||
for (j = 0; j < 15; j++) { | |||
float re, im; | |||
const int k = s->pfa_prereindex[i*15 + j]; | |||
if (k < len8) { | |||
re = -src[2*k+len3] - src[len3-1-2*k]; | |||
im = -src[len4+2*k] + src[len4-1-2*k]; | |||
} else { | |||
re = src[2*k-len4] - src[1*len3-1-2*k]; | |||
im = -src[2*k+len4] - src[5*len4-1-2*k]; | |||
} | |||
CMUL3(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im); | |||
} | |||
fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo); | |||
} | |||
/* Then a 15xN FFT (where N is a power of two) */ | |||
for (i = 0; i < 15; i++) | |||
s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); | |||
/* Reindex again, apply twiddles and output */ | |||
for (i = 0; i < len8; i++) { | |||
float re0, im0, re1, im1; | |||
const int i0 = len8 + i, i1 = len8 - i - 1; | |||
const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; | |||
CMUL3(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); | |||
CMUL3(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); | |||
dst[2*i1*stride ] = re0; | |||
dst[2*i1*stride + stride] = im0; | |||
dst[2*i0*stride ] = re1; | |||
dst[2*i0*stride + stride] = im1; | |||
} | |||
} | |||
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, | |||
ptrdiff_t stride, float scale) | |||
{ | |||
FFTComplex fft15in[15]; |
@@ -1,4 +1,6 @@ | |||
/* | |||
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> | |||
* | |||
* This file is part of FFmpeg. | |||
* | |||
* FFmpeg is free software; you can redistribute it and/or | |||
@@ -16,17 +18,18 @@ | |||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |||
*/ | |||
#ifndef AVCODEC_IMDCT15_H | |||
#define AVCODEC_IMDCT15_H | |||
#ifndef AVCODEC_MDCT15_H | |||
#define AVCODEC_MDCT15_H | |||
#include <stddef.h> | |||
#include "fft.h" | |||
typedef struct IMDCT15Context { | |||
typedef struct MDCT15Context { | |||
int fft_n; | |||
int len2; | |||
int len4; | |||
int inverse; | |||
int *pfa_prereindex; | |||
int *pfa_postreindex; | |||
@@ -39,21 +42,26 @@ typedef struct IMDCT15Context { | |||
/* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */ | |||
FFTComplex exptab[21]; | |||
/** | |||
* Calculate a full 2N -> N MDCT | |||
*/ | |||
void (*mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride); | |||
/** | |||
* Calculate the middle half of the iMDCT | |||
*/ | |||
void (*imdct_half)(struct IMDCT15Context *s, float *dst, const float *src, | |||
void (*imdct_half)(struct MDCT15Context *s, float *dst, const float *src, | |||
ptrdiff_t src_stride, float scale); | |||
} IMDCT15Context; | |||
} MDCT15Context; | |||
/** | |||
* Init an iMDCT of the length 2 * 15 * (2^N) | |||
* Init an (i)MDCT of the length 2 * 15 * (2^N) | |||
*/ | |||
int ff_imdct15_init(IMDCT15Context **s, int N); | |||
int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale); | |||
/** | |||
* Free an iMDCT. | |||
* Frees a context | |||
*/ | |||
void ff_imdct15_uninit(IMDCT15Context **s); | |||
void ff_mdct15_uninit(MDCT15Context **ps); | |||
#endif /* AVCODEC_IMDCT15_H */ | |||
#endif /* AVCODEC_MDCT15_H */ |
@@ -29,7 +29,7 @@ | |||
#include "libavutil/float_dsp.h" | |||
#include "libavutil/libm.h" | |||
#include "imdct15.h" | |||
#include "mdct15.h" | |||
#include "opus.h" | |||
#include "opustab.h" | |||
@@ -63,7 +63,7 @@ typedef struct CeltFrame { | |||
struct CeltContext { | |||
// constant values that do not change during context lifetime | |||
AVCodecContext *avctx; | |||
IMDCT15Context *imdct[4]; | |||
MDCT15Context *imdct[4]; | |||
AVFloatDSPContext *dsp; | |||
int output_channels; | |||
@@ -1596,7 +1596,7 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, | |||
int silence = 0; | |||
int transient = 0; | |||
int anticollapse = 0; | |||
IMDCT15Context *imdct; | |||
MDCT15Context *imdct; | |||
float imdct_scale = 1.0; | |||
if (coded_channels != 1 && coded_channels != 2) { | |||
@@ -1792,7 +1792,7 @@ void ff_celt_free(CeltContext **ps) | |||
return; | |||
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) | |||
ff_imdct15_uninit(&s->imdct[i]); | |||
ff_mdct15_uninit(&s->imdct[i]); | |||
av_freep(&s->dsp); | |||
av_freep(ps); | |||
@@ -1817,7 +1817,7 @@ int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels) | |||
s->output_channels = output_channels; | |||
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) { | |||
ret = ff_imdct15_init(&s->imdct[i], i + 3); | |||
ret = ff_mdct15_init(&s->imdct[i], 1, i + 3, -1.0f); | |||
if (ret < 0) | |||
goto fail; | |||
} | |||