Handles strides (needed for Opus transients), does pre-reindexing and folding without needing a copy. Signed-off-by: Rostislav Pehlivanov <atomnuker@gmail.com>tags/n3.3
@@ -2107,7 +2107,7 @@ CONFIG_EXTRA=" | |||||
huffyuvencdsp | huffyuvencdsp | ||||
idctdsp | idctdsp | ||||
iirfilter | iirfilter | ||||
imdct15 | |||||
mdct15 | |||||
intrax8 | intrax8 | ||||
iso_media | iso_media | ||||
ividsp | ividsp | ||||
@@ -2349,7 +2349,7 @@ vc1dsp_select="h264chroma qpeldsp startcode" | |||||
rdft_select="fft" | rdft_select="fft" | ||||
# decoders / encoders | # decoders / encoders | ||||
aac_decoder_select="imdct15 mdct sinewin" | |||||
aac_decoder_select="mdct15 mdct sinewin" | |||||
aac_fixed_decoder_select="mdct sinewin" | aac_fixed_decoder_select="mdct sinewin" | ||||
aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin" | aac_encoder_select="audio_frame_queue iirfilter lpc mdct sinewin" | ||||
aac_latm_decoder_select="aac_decoder aac_latm_parser" | aac_latm_decoder_select="aac_decoder aac_latm_parser" | ||||
@@ -2491,7 +2491,7 @@ nellymoser_encoder_select="audio_frame_queue mdct sinewin" | |||||
nuv_decoder_select="idctdsp lzo" | nuv_decoder_select="idctdsp lzo" | ||||
on2avc_decoder_select="mdct" | on2avc_decoder_select="mdct" | ||||
opus_decoder_deps="swresample" | opus_decoder_deps="swresample" | ||||
opus_decoder_select="imdct15" | |||||
opus_decoder_select="mdct15" | |||||
png_decoder_select="zlib" | png_decoder_select="zlib" | ||||
png_encoder_select="llvidencdsp zlib" | png_encoder_select="llvidencdsp zlib" | ||||
prores_decoder_select="blockdsp idctdsp" | prores_decoder_select="blockdsp idctdsp" | ||||
@@ -84,7 +84,7 @@ OBJS-$(CONFIG_HUFFYUVDSP) += huffyuvdsp.o | |||||
OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o | OBJS-$(CONFIG_HUFFYUVENCDSP) += huffyuvencdsp.o | ||||
OBJS-$(CONFIG_IDCTDSP) += idctdsp.o simple_idct.o jrevdct.o | OBJS-$(CONFIG_IDCTDSP) += idctdsp.o simple_idct.o jrevdct.o | ||||
OBJS-$(CONFIG_IIRFILTER) += iirfilter.o | OBJS-$(CONFIG_IIRFILTER) += iirfilter.o | ||||
OBJS-$(CONFIG_IMDCT15) += imdct15.o | |||||
OBJS-$(CONFIG_MDCT15) += mdct15.o | |||||
OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | OBJS-$(CONFIG_INTRAX8) += intrax8.o intrax8dsp.o | ||||
OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o | OBJS-$(CONFIG_IVIDSP) += ivi_dsp.o | ||||
OBJS-$(CONFIG_JNI) += ffjni.o jni.o | OBJS-$(CONFIG_JNI) += ffjni.o jni.o | ||||
@@ -36,7 +36,7 @@ | |||||
#include "libavutil/fixed_dsp.h" | #include "libavutil/fixed_dsp.h" | ||||
#include "avcodec.h" | #include "avcodec.h" | ||||
#if !USE_FIXED | #if !USE_FIXED | ||||
#include "imdct15.h" | |||||
#include "mdct15.h" | |||||
#endif | #endif | ||||
#include "fft.h" | #include "fft.h" | ||||
#include "mpeg4audio.h" | #include "mpeg4audio.h" | ||||
@@ -327,7 +327,7 @@ struct AACContext { | |||||
#if USE_FIXED | #if USE_FIXED | ||||
AVFixedDSPContext *fdsp; | AVFixedDSPContext *fdsp; | ||||
#else | #else | ||||
IMDCT15Context *mdct480; | |||||
MDCT15Context *mdct480; | |||||
AVFloatDSPContext *fdsp; | AVFloatDSPContext *fdsp; | ||||
#endif /* USE_FIXED */ | #endif /* USE_FIXED */ | ||||
int random_state; | int random_state; | ||||
@@ -42,7 +42,7 @@ | |||||
#include "internal.h" | #include "internal.h" | ||||
#include "get_bits.h" | #include "get_bits.h" | ||||
#include "fft.h" | #include "fft.h" | ||||
#include "imdct15.h" | |||||
#include "mdct15.h" | |||||
#include "lpc.h" | #include "lpc.h" | ||||
#include "kbdwin.h" | #include "kbdwin.h" | ||||
#include "sinewin.h" | #include "sinewin.h" | ||||
@@ -1207,7 +1207,7 @@ static av_cold int aac_decode_init(AVCodecContext *avctx) | |||||
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); | AAC_RENAME_32(ff_mdct_init)(&ac->mdct_small, 8, 1, 1.0 / RANGE15(128.0)); | ||||
AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); | AAC_RENAME_32(ff_mdct_init)(&ac->mdct_ltp, 11, 0, RANGE15(-2.0)); | ||||
#if !USE_FIXED | #if !USE_FIXED | ||||
ret = ff_imdct15_init(&ac->mdct480, 5); | |||||
ret = ff_mdct15_init(&ac->mdct480, 1, 5, -1.0f); | |||||
if (ret < 0) | if (ret < 0) | ||||
return ret; | return ret; | ||||
#endif | #endif | ||||
@@ -3217,7 +3217,7 @@ static av_cold int aac_decode_close(AVCodecContext *avctx) | |||||
ff_mdct_end(&ac->mdct_ld); | ff_mdct_end(&ac->mdct_ld); | ||||
ff_mdct_end(&ac->mdct_ltp); | ff_mdct_end(&ac->mdct_ltp); | ||||
#if !USE_FIXED | #if !USE_FIXED | ||||
ff_imdct15_uninit(&ac->mdct480); | |||||
ff_mdct15_uninit(&ac->mdct480); | |||||
#endif | #endif | ||||
av_freep(&ac->fdsp); | av_freep(&ac->fdsp); | ||||
return 0; | return 0; | ||||
@@ -33,7 +33,8 @@ | |||||
#include "libavutil/attributes.h" | #include "libavutil/attributes.h" | ||||
#include "libavutil/common.h" | #include "libavutil/common.h" | ||||
#include "imdct15.h" | |||||
#include "avfft.h" | |||||
#include "mdct15.h" | |||||
// complex c = a * b | // complex c = a * b | ||||
#define CMUL3(cre, cim, are, aim, bre, bim) \ | #define CMUL3(cre, cim, are, aim, bre, bim) \ | ||||
@@ -44,9 +45,9 @@ do { \ | |||||
#define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) | #define CMUL(c, a, b) CMUL3((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) | ||||
av_cold void ff_imdct15_uninit(IMDCT15Context **ps) | |||||
av_cold void ff_mdct15_uninit(MDCT15Context **ps) | |||||
{ | { | ||||
IMDCT15Context *s = *ps; | |||||
MDCT15Context *s = *ps; | |||||
if (!s) | if (!s) | ||||
return; | return; | ||||
@@ -61,10 +62,12 @@ av_cold void ff_imdct15_uninit(IMDCT15Context **ps) | |||||
av_freep(ps); | av_freep(ps); | ||||
} | } | ||||
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, | |||||
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride); | |||||
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, | |||||
ptrdiff_t stride, float scale); | ptrdiff_t stride, float scale); | ||||
static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||||
static inline int init_pfa_reindex_tabs(MDCT15Context *s) | |||||
{ | { | ||||
int i, j; | int i, j; | ||||
const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ | const int b_ptwo = s->ptwo_fft.nbits; /* Bits for the power of two FFTs */ | ||||
@@ -85,7 +88,7 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||||
for (j = 0; j < 15; j++) { | for (j = 0; j < 15; j++) { | ||||
const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; | const int q_pre = ((l_ptwo * j)/15 + i) >> b_ptwo; | ||||
const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; | const int q_post = (((j*inv_1)/15) + (i*inv_2)) >> b_ptwo; | ||||
const int k_pre = 15*i + (j - q_pre*15)*l_ptwo; | |||||
const int k_pre = 15*i + ((j - q_pre*15) << b_ptwo); | |||||
const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; | const int k_post = i*inv_2*15 + j*inv_1 - 15*q_post*l_ptwo; | ||||
s->pfa_prereindex[i*15 + j] = k_pre; | s->pfa_prereindex[i*15 + j] = k_pre; | ||||
s->pfa_postreindex[k_post] = l_ptwo*j + i; | s->pfa_postreindex[k_post] = l_ptwo*j + i; | ||||
@@ -95,9 +98,10 @@ static inline int init_pfa_reindex_tabs(IMDCT15Context *s) | |||||
return 0; | return 0; | ||||
} | } | ||||
av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||||
av_cold int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale) | |||||
{ | { | ||||
IMDCT15Context *s; | |||||
MDCT15Context *s; | |||||
double alpha, theta; | |||||
int len2 = 15 * (1 << N); | int len2 = 15 * (1 << N); | ||||
int len = 2 * len2; | int len = 2 * len2; | ||||
int i; | int i; | ||||
@@ -113,9 +117,11 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||||
s->fft_n = N - 1; | s->fft_n = N - 1; | ||||
s->len4 = len2 / 2; | s->len4 = len2 / 2; | ||||
s->len2 = len2; | s->len2 = len2; | ||||
s->inverse = inverse; | |||||
s->mdct = mdct15; | |||||
s->imdct_half = imdct15_half; | s->imdct_half = imdct15_half; | ||||
if (ff_fft_init(&s->ptwo_fft, N - 1, 1) < 0) | |||||
if (ff_fft_init(&s->ptwo_fft, N - 1, s->inverse) < 0) | |||||
goto fail; | goto fail; | ||||
if (init_pfa_reindex_tabs(s)) | if (init_pfa_reindex_tabs(s)) | ||||
@@ -129,15 +135,20 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||||
if (!s->twiddle_exptab) | if (!s->twiddle_exptab) | ||||
goto fail; | goto fail; | ||||
theta = 0.125f + (scale < 0 ? s->len4 : 0); | |||||
scale = sqrt(fabs(scale)); | |||||
for (i = 0; i < s->len4; i++) { | for (i = 0; i < s->len4; i++) { | ||||
s->twiddle_exptab[i].re = cos(2 * M_PI * (i + 0.125f + s->len4) / len); | |||||
s->twiddle_exptab[i].im = sin(2 * M_PI * (i + 0.125f + s->len4) / len); | |||||
alpha = 2 * M_PI * (i + theta) / len; | |||||
s->twiddle_exptab[i].re = cos(alpha) * scale; | |||||
s->twiddle_exptab[i].im = sin(alpha) * scale; | |||||
} | } | ||||
/* 15-point FFT exptab */ | /* 15-point FFT exptab */ | ||||
for (i = 0; i < 19; i++) { | for (i = 0; i < 19; i++) { | ||||
if (i < 15) { | if (i < 15) { | ||||
double theta = (2.0f * M_PI * i) / 15.0f; | double theta = (2.0f * M_PI * i) / 15.0f; | ||||
if (!s->inverse) | |||||
theta *= -1; | |||||
s->exptab[i].re = cos(theta); | s->exptab[i].re = cos(theta); | ||||
s->exptab[i].im = sin(theta); | s->exptab[i].im = sin(theta); | ||||
} else { /* Wrap around to simplify fft15 */ | } else { /* Wrap around to simplify fft15 */ | ||||
@@ -152,15 +163,17 @@ av_cold int ff_imdct15_init(IMDCT15Context **ps, int N) | |||||
s->exptab[20].im = sin(1.0f * M_PI / 5.0f); | s->exptab[20].im = sin(1.0f * M_PI / 5.0f); | ||||
/* Invert the phase for an inverse transform, do nothing for a forward transform */ | /* Invert the phase for an inverse transform, do nothing for a forward transform */ | ||||
s->exptab[19].im *= -1; | |||||
s->exptab[20].im *= -1; | |||||
if (s->inverse) { | |||||
s->exptab[19].im *= -1; | |||||
s->exptab[20].im *= -1; | |||||
} | |||||
*ps = s; | *ps = s; | ||||
return 0; | return 0; | ||||
fail: | fail: | ||||
ff_imdct15_uninit(&s); | |||||
ff_mdct15_uninit(&s); | |||||
return AVERROR(ENOMEM); | return AVERROR(ENOMEM); | ||||
} | } | ||||
@@ -211,8 +224,7 @@ static inline void fft5(const FFTComplex exptab[2], FFTComplex *out, | |||||
out[4].im = in[0].im + z0[3].im; | out[4].im = in[0].im + z0[3].im; | ||||
} | } | ||||
static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, | |||||
const FFTComplex *in, size_t stride) | |||||
static void fft15(const FFTComplex exptab[22], FFTComplex *out, const FFTComplex *in, size_t stride) | |||||
{ | { | ||||
int k; | int k; | ||||
FFTComplex tmp1[5], tmp2[5], tmp3[5]; | FFTComplex tmp1[5], tmp2[5], tmp3[5]; | ||||
@@ -241,7 +253,51 @@ static inline void fft15(const FFTComplex exptab[22], FFTComplex *out, | |||||
} | } | ||||
} | } | ||||
static void imdct15_half(IMDCT15Context *s, float *dst, const float *src, | |||||
static void mdct15(MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride) | |||||
{ | |||||
int i, j; | |||||
const int len4 = s->len4, len3 = len4 * 3, len8 = len4 >> 1; | |||||
const int l_ptwo = 1 << s->ptwo_fft.nbits; | |||||
FFTComplex fft15in[15]; | |||||
/* Folding and pre-reindexing */ | |||||
for (i = 0; i < l_ptwo; i++) { | |||||
for (j = 0; j < 15; j++) { | |||||
float re, im; | |||||
const int k = s->pfa_prereindex[i*15 + j]; | |||||
if (k < len8) { | |||||
re = -src[2*k+len3] - src[len3-1-2*k]; | |||||
im = -src[len4+2*k] + src[len4-1-2*k]; | |||||
} else { | |||||
re = src[2*k-len4] - src[1*len3-1-2*k]; | |||||
im = -src[2*k+len4] - src[5*len4-1-2*k]; | |||||
} | |||||
CMUL3(fft15in[j].re, fft15in[j].im, re, im, s->twiddle_exptab[k].re, -s->twiddle_exptab[k].im); | |||||
} | |||||
fft15(s->exptab, s->tmp + s->ptwo_fft.revtab[i], fft15in, l_ptwo); | |||||
} | |||||
/* Then a 15xN FFT (where N is a power of two) */ | |||||
for (i = 0; i < 15; i++) | |||||
s->ptwo_fft.fft_calc(&s->ptwo_fft, s->tmp + l_ptwo*i); | |||||
/* Reindex again, apply twiddles and output */ | |||||
for (i = 0; i < len8; i++) { | |||||
float re0, im0, re1, im1; | |||||
const int i0 = len8 + i, i1 = len8 - i - 1; | |||||
const int s0 = s->pfa_postreindex[i0], s1 = s->pfa_postreindex[i1]; | |||||
CMUL3(im1, re0, s->tmp[s1].re, s->tmp[s1].im, s->twiddle_exptab[i1].im, s->twiddle_exptab[i1].re); | |||||
CMUL3(im0, re1, s->tmp[s0].re, s->tmp[s0].im, s->twiddle_exptab[i0].im, s->twiddle_exptab[i0].re); | |||||
dst[2*i1*stride ] = re0; | |||||
dst[2*i1*stride + stride] = im0; | |||||
dst[2*i0*stride ] = re1; | |||||
dst[2*i0*stride + stride] = im1; | |||||
} | |||||
} | |||||
static void imdct15_half(MDCT15Context *s, float *dst, const float *src, | |||||
ptrdiff_t stride, float scale) | ptrdiff_t stride, float scale) | ||||
{ | { | ||||
FFTComplex fft15in[15]; | FFTComplex fft15in[15]; |
@@ -1,4 +1,6 @@ | |||||
/* | /* | ||||
* Copyright (c) 2017 Rostislav Pehlivanov <atomnuker@gmail.com> | |||||
* | |||||
* This file is part of FFmpeg. | * This file is part of FFmpeg. | ||||
* | * | ||||
* FFmpeg is free software; you can redistribute it and/or | * FFmpeg is free software; you can redistribute it and/or | ||||
@@ -16,17 +18,18 @@ | |||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | ||||
*/ | */ | ||||
#ifndef AVCODEC_IMDCT15_H | |||||
#define AVCODEC_IMDCT15_H | |||||
#ifndef AVCODEC_MDCT15_H | |||||
#define AVCODEC_MDCT15_H | |||||
#include <stddef.h> | #include <stddef.h> | ||||
#include "fft.h" | #include "fft.h" | ||||
typedef struct IMDCT15Context { | |||||
typedef struct MDCT15Context { | |||||
int fft_n; | int fft_n; | ||||
int len2; | int len2; | ||||
int len4; | int len4; | ||||
int inverse; | |||||
int *pfa_prereindex; | int *pfa_prereindex; | ||||
int *pfa_postreindex; | int *pfa_postreindex; | ||||
@@ -39,21 +42,26 @@ typedef struct IMDCT15Context { | |||||
/* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */ | /* 0 - 18: fft15 twiddles, 19 - 20: fft5 twiddles */ | ||||
FFTComplex exptab[21]; | FFTComplex exptab[21]; | ||||
/** | |||||
* Calculate a full 2N -> N MDCT | |||||
*/ | |||||
void (*mdct)(struct MDCT15Context *s, float *dst, const float *src, ptrdiff_t stride); | |||||
/** | /** | ||||
* Calculate the middle half of the iMDCT | * Calculate the middle half of the iMDCT | ||||
*/ | */ | ||||
void (*imdct_half)(struct IMDCT15Context *s, float *dst, const float *src, | |||||
void (*imdct_half)(struct MDCT15Context *s, float *dst, const float *src, | |||||
ptrdiff_t src_stride, float scale); | ptrdiff_t src_stride, float scale); | ||||
} IMDCT15Context; | |||||
} MDCT15Context; | |||||
/** | /** | ||||
* Init an iMDCT of the length 2 * 15 * (2^N) | |||||
* Init an (i)MDCT of the length 2 * 15 * (2^N) | |||||
*/ | */ | ||||
int ff_imdct15_init(IMDCT15Context **s, int N); | |||||
int ff_mdct15_init(MDCT15Context **ps, int inverse, int N, double scale); | |||||
/** | /** | ||||
* Free an iMDCT. | |||||
* Frees a context | |||||
*/ | */ | ||||
void ff_imdct15_uninit(IMDCT15Context **s); | |||||
void ff_mdct15_uninit(MDCT15Context **ps); | |||||
#endif /* AVCODEC_IMDCT15_H */ | |||||
#endif /* AVCODEC_MDCT15_H */ |
@@ -29,7 +29,7 @@ | |||||
#include "libavutil/float_dsp.h" | #include "libavutil/float_dsp.h" | ||||
#include "libavutil/libm.h" | #include "libavutil/libm.h" | ||||
#include "imdct15.h" | |||||
#include "mdct15.h" | |||||
#include "opus.h" | #include "opus.h" | ||||
#include "opustab.h" | #include "opustab.h" | ||||
@@ -63,7 +63,7 @@ typedef struct CeltFrame { | |||||
struct CeltContext { | struct CeltContext { | ||||
// constant values that do not change during context lifetime | // constant values that do not change during context lifetime | ||||
AVCodecContext *avctx; | AVCodecContext *avctx; | ||||
IMDCT15Context *imdct[4]; | |||||
MDCT15Context *imdct[4]; | |||||
AVFloatDSPContext *dsp; | AVFloatDSPContext *dsp; | ||||
int output_channels; | int output_channels; | ||||
@@ -1596,7 +1596,7 @@ int ff_celt_decode_frame(CeltContext *s, OpusRangeCoder *rc, | |||||
int silence = 0; | int silence = 0; | ||||
int transient = 0; | int transient = 0; | ||||
int anticollapse = 0; | int anticollapse = 0; | ||||
IMDCT15Context *imdct; | |||||
MDCT15Context *imdct; | |||||
float imdct_scale = 1.0; | float imdct_scale = 1.0; | ||||
if (coded_channels != 1 && coded_channels != 2) { | if (coded_channels != 1 && coded_channels != 2) { | ||||
@@ -1792,7 +1792,7 @@ void ff_celt_free(CeltContext **ps) | |||||
return; | return; | ||||
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) | for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) | ||||
ff_imdct15_uninit(&s->imdct[i]); | |||||
ff_mdct15_uninit(&s->imdct[i]); | |||||
av_freep(&s->dsp); | av_freep(&s->dsp); | ||||
av_freep(ps); | av_freep(ps); | ||||
@@ -1817,7 +1817,7 @@ int ff_celt_init(AVCodecContext *avctx, CeltContext **ps, int output_channels) | |||||
s->output_channels = output_channels; | s->output_channels = output_channels; | ||||
for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) { | for (i = 0; i < FF_ARRAY_ELEMS(s->imdct); i++) { | ||||
ret = ff_imdct15_init(&s->imdct[i], i + 3); | |||||
ret = ff_mdct15_init(&s->imdct[i], 1, i + 3, -1.0f); | |||||
if (ret < 0) | if (ret < 0) | ||||
goto fail; | goto fail; | ||||
} | } | ||||