You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

492 lines
15KB

  1. /*
  2. * Copyright (C) 2016 foo86
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/mem.h"
  21. #include "libavutil/mem_internal.h"
  22. #include "dcadsp.h"
  23. #include "dcamath.h"
  24. static void decode_hf_c(int32_t **dst,
  25. const int32_t *vq_index,
  26. const int8_t hf_vq[1024][32],
  27. int32_t scale_factors[32][2],
  28. ptrdiff_t sb_start, ptrdiff_t sb_end,
  29. ptrdiff_t ofs, ptrdiff_t len)
  30. {
  31. int i, j;
  32. for (i = sb_start; i < sb_end; i++) {
  33. const int8_t *coeff = hf_vq[vq_index[i]];
  34. int32_t scale = scale_factors[i][0];
  35. for (j = 0; j < len; j++)
  36. dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
  37. }
  38. }
  39. static void decode_joint_c(int32_t **dst, int32_t **src,
  40. const int32_t *scale_factors,
  41. ptrdiff_t sb_start, ptrdiff_t sb_end,
  42. ptrdiff_t ofs, ptrdiff_t len)
  43. {
  44. int i, j;
  45. for (i = sb_start; i < sb_end; i++) {
  46. int32_t scale = scale_factors[i];
  47. for (j = 0; j < len; j++)
  48. dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
  49. }
  50. }
  51. static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
  52. const float *filter_coeff, ptrdiff_t npcmblocks,
  53. int dec_select)
  54. {
  55. // Select decimation factor
  56. int factor = 64 << dec_select;
  57. int ncoeffs = 8 >> dec_select;
  58. int nlfesamples = npcmblocks >> (dec_select + 1);
  59. int i, j, k;
  60. for (i = 0; i < nlfesamples; i++) {
  61. // One decimated sample generates 64 or 128 interpolated ones
  62. for (j = 0; j < factor / 2; j++) {
  63. float a = 0;
  64. float b = 0;
  65. for (k = 0; k < ncoeffs; k++) {
  66. a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
  67. b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
  68. }
  69. pcm_samples[ j] = a;
  70. pcm_samples[factor / 2 + j] = b;
  71. }
  72. lfe_samples++;
  73. pcm_samples += factor;
  74. }
  75. }
  76. static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
  77. const float *filter_coeff, ptrdiff_t npcmblocks)
  78. {
  79. lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
  80. }
  81. static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
  82. const float *filter_coeff, ptrdiff_t npcmblocks)
  83. {
  84. lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
  85. }
  86. static void lfe_x96_float_c(float *dst, const float *src,
  87. float *hist, ptrdiff_t len)
  88. {
  89. float prev = *hist;
  90. int i;
  91. for (i = 0; i < len; i++) {
  92. float a = 0.25f * src[i] + 0.75f * prev;
  93. float b = 0.75f * src[i] + 0.25f * prev;
  94. prev = src[i];
  95. *dst++ = a;
  96. *dst++ = b;
  97. }
  98. *hist = prev;
  99. }
  100. static void sub_qmf32_float_c(SynthFilterContext *synth,
  101. FFTContext *imdct,
  102. float *pcm_samples,
  103. int32_t **subband_samples_lo,
  104. int32_t **subband_samples_hi,
  105. float *hist1, int *offset, float *hist2,
  106. const float *filter_coeff, ptrdiff_t npcmblocks,
  107. float scale)
  108. {
  109. LOCAL_ALIGNED_32(float, input, [32]);
  110. int i, j;
  111. for (j = 0; j < npcmblocks; j++) {
  112. // Load in one sample from each subband
  113. for (i = 0; i < 32; i++) {
  114. if ((i - 1) & 2)
  115. input[i] = -subband_samples_lo[i][j];
  116. else
  117. input[i] = subband_samples_lo[i][j];
  118. }
  119. // One subband sample generates 32 interpolated ones
  120. synth->synth_filter_float(imdct, hist1, offset,
  121. hist2, filter_coeff,
  122. pcm_samples, input, scale);
  123. pcm_samples += 32;
  124. }
  125. }
  126. static void sub_qmf64_float_c(SynthFilterContext *synth,
  127. FFTContext *imdct,
  128. float *pcm_samples,
  129. int32_t **subband_samples_lo,
  130. int32_t **subband_samples_hi,
  131. float *hist1, int *offset, float *hist2,
  132. const float *filter_coeff, ptrdiff_t npcmblocks,
  133. float scale)
  134. {
  135. LOCAL_ALIGNED_32(float, input, [64]);
  136. int i, j;
  137. if (!subband_samples_hi)
  138. memset(&input[32], 0, sizeof(input[0]) * 32);
  139. for (j = 0; j < npcmblocks; j++) {
  140. // Load in one sample from each subband
  141. if (subband_samples_hi) {
  142. // Full 64 subbands, first 32 are residual coded
  143. for (i = 0; i < 32; i++) {
  144. if ((i - 1) & 2)
  145. input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
  146. else
  147. input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
  148. }
  149. for (i = 32; i < 64; i++) {
  150. if ((i - 1) & 2)
  151. input[i] = -subband_samples_hi[i][j];
  152. else
  153. input[i] = subband_samples_hi[i][j];
  154. }
  155. } else {
  156. // Only first 32 subbands
  157. for (i = 0; i < 32; i++) {
  158. if ((i - 1) & 2)
  159. input[i] = -subband_samples_lo[i][j];
  160. else
  161. input[i] = subband_samples_lo[i][j];
  162. }
  163. }
  164. // One subband sample generates 64 interpolated ones
  165. synth->synth_filter_float_64(imdct, hist1, offset,
  166. hist2, filter_coeff,
  167. pcm_samples, input, scale);
  168. pcm_samples += 64;
  169. }
  170. }
  171. static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
  172. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  173. {
  174. // Select decimation factor
  175. int nlfesamples = npcmblocks >> 1;
  176. int i, j, k;
  177. for (i = 0; i < nlfesamples; i++) {
  178. // One decimated sample generates 64 interpolated ones
  179. for (j = 0; j < 32; j++) {
  180. int64_t a = 0;
  181. int64_t b = 0;
  182. for (k = 0; k < 8; k++) {
  183. a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
  184. b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
  185. }
  186. pcm_samples[ j] = clip23(norm23(a));
  187. pcm_samples[32 + j] = clip23(norm23(b));
  188. }
  189. lfe_samples++;
  190. pcm_samples += 64;
  191. }
  192. }
  193. static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
  194. int32_t *hist, ptrdiff_t len)
  195. {
  196. int32_t prev = *hist;
  197. int i;
  198. for (i = 0; i < len; i++) {
  199. int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
  200. int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
  201. prev = src[i];
  202. *dst++ = clip23(norm23(a));
  203. *dst++ = clip23(norm23(b));
  204. }
  205. *hist = prev;
  206. }
  207. static void sub_qmf32_fixed_c(SynthFilterContext *synth,
  208. DCADCTContext *imdct,
  209. int32_t *pcm_samples,
  210. int32_t **subband_samples_lo,
  211. int32_t **subband_samples_hi,
  212. int32_t *hist1, int *offset, int32_t *hist2,
  213. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  214. {
  215. LOCAL_ALIGNED_32(int32_t, input, [32]);
  216. int i, j;
  217. for (j = 0; j < npcmblocks; j++) {
  218. // Load in one sample from each subband
  219. for (i = 0; i < 32; i++)
  220. input[i] = subband_samples_lo[i][j];
  221. // One subband sample generates 32 interpolated ones
  222. synth->synth_filter_fixed(imdct, hist1, offset,
  223. hist2, filter_coeff,
  224. pcm_samples, input);
  225. pcm_samples += 32;
  226. }
  227. }
  228. static void sub_qmf64_fixed_c(SynthFilterContext *synth,
  229. DCADCTContext *imdct,
  230. int32_t *pcm_samples,
  231. int32_t **subband_samples_lo,
  232. int32_t **subband_samples_hi,
  233. int32_t *hist1, int *offset, int32_t *hist2,
  234. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  235. {
  236. LOCAL_ALIGNED_32(int32_t, input, [64]);
  237. int i, j;
  238. if (!subband_samples_hi)
  239. memset(&input[32], 0, sizeof(input[0]) * 32);
  240. for (j = 0; j < npcmblocks; j++) {
  241. // Load in one sample from each subband
  242. if (subband_samples_hi) {
  243. // Full 64 subbands, first 32 are residual coded
  244. for (i = 0; i < 32; i++)
  245. input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
  246. for (i = 32; i < 64; i++)
  247. input[i] = subband_samples_hi[i][j];
  248. } else {
  249. // Only first 32 subbands
  250. for (i = 0; i < 32; i++)
  251. input[i] = subband_samples_lo[i][j];
  252. }
  253. // One subband sample generates 64 interpolated ones
  254. synth->synth_filter_fixed_64(imdct, hist1, offset,
  255. hist2, filter_coeff,
  256. pcm_samples, input);
  257. pcm_samples += 64;
  258. }
  259. }
  260. static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  261. {
  262. int i;
  263. for (i = 0; i < len; i++)
  264. dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
  265. }
  266. static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
  267. const int32_t *src, ptrdiff_t len)
  268. {
  269. int i;
  270. for (i = 0; i < len; i++) {
  271. int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
  272. dst1[i] -= cs;
  273. dst2[i] -= cs;
  274. }
  275. }
  276. static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  277. {
  278. int i;
  279. for (i = 0; i < len; i++)
  280. dst[i] -= (unsigned)mul15(src[i], coeff);
  281. }
  282. static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  283. {
  284. int i;
  285. for (i = 0; i < len; i++)
  286. dst[i] += mul15(src[i], coeff);
  287. }
  288. static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
  289. {
  290. int i;
  291. for (i = 0; i < len; i++)
  292. dst[i] = mul15(dst[i], scale);
  293. }
  294. static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
  295. {
  296. int i;
  297. for (i = 0; i < len; i++)
  298. dst[i] = mul16(dst[i], scale_inv);
  299. }
  300. static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
  301. {
  302. int i;
  303. for (i = 0; i < len; i++)
  304. dst[i] -= mul22(src[i], coeff);
  305. }
  306. static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
  307. {
  308. int i;
  309. for (i = 0; i < len; i++)
  310. dst[i] -= mul23(src[i], coeff);
  311. }
  312. static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
  313. const int32_t *coeff, ptrdiff_t len)
  314. {
  315. int i;
  316. filter0(src0, src1, coeff[0], len);
  317. filter0(src1, src0, coeff[1], len);
  318. filter0(src0, src1, coeff[2], len);
  319. filter0(src1, src0, coeff[3], len);
  320. for (i = 0; i < 8; i++, src0--) {
  321. filter1(src0, src1, coeff[i + 4], len);
  322. filter1(src1, src0, coeff[i + 12], len);
  323. filter1(src0, src1, coeff[i + 4], len);
  324. }
  325. for (i = 0; i < len; i++) {
  326. *dst++ = *src1++;
  327. *dst++ = *++src0;
  328. }
  329. }
  330. static void lbr_bank_c(float output[32][4], float **input,
  331. const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
  332. {
  333. float SW0 = coeff[0];
  334. float SW1 = coeff[1];
  335. float SW2 = coeff[2];
  336. float SW3 = coeff[3];
  337. float C1 = coeff[4];
  338. float C2 = coeff[5];
  339. float C3 = coeff[6];
  340. float C4 = coeff[7];
  341. float AL1 = coeff[8];
  342. float AL2 = coeff[9];
  343. int i;
  344. // Short window and 8 point forward MDCT
  345. for (i = 0; i < len; i++) {
  346. float *src = input[i] + ofs;
  347. float a = src[-4] * SW0 - src[-1] * SW3;
  348. float b = src[-3] * SW1 - src[-2] * SW2;
  349. float c = src[ 2] * SW1 + src[ 1] * SW2;
  350. float d = src[ 3] * SW0 + src[ 0] * SW3;
  351. output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
  352. output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
  353. output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
  354. output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
  355. }
  356. // Aliasing cancellation for high frequencies
  357. for (i = 12; i < len - 1; i++) {
  358. float a = output[i ][3] * AL1;
  359. float b = output[i+1][0] * AL1;
  360. output[i ][3] += b - a;
  361. output[i+1][0] -= b + a;
  362. a = output[i ][2] * AL2;
  363. b = output[i+1][1] * AL2;
  364. output[i ][2] += b - a;
  365. output[i+1][1] -= b + a;
  366. }
  367. }
  368. static void lfe_iir_c(float *output, const float *input,
  369. const float iir[5][4], float hist[5][2],
  370. ptrdiff_t factor)
  371. {
  372. float res, tmp;
  373. int i, j, k;
  374. for (i = 0; i < 64; i++) {
  375. res = *input++;
  376. for (j = 0; j < factor; j++) {
  377. for (k = 0; k < 5; k++) {
  378. tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
  379. res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
  380. hist[k][0] = hist[k][1];
  381. hist[k][1] = tmp;
  382. }
  383. *output++ = res;
  384. res = 0;
  385. }
  386. }
  387. }
  388. av_cold void ff_dcadsp_init(DCADSPContext *s)
  389. {
  390. s->decode_hf = decode_hf_c;
  391. s->decode_joint = decode_joint_c;
  392. s->lfe_fir_float[0] = lfe_fir0_float_c;
  393. s->lfe_fir_float[1] = lfe_fir1_float_c;
  394. s->lfe_x96_float = lfe_x96_float_c;
  395. s->sub_qmf_float[0] = sub_qmf32_float_c;
  396. s->sub_qmf_float[1] = sub_qmf64_float_c;
  397. s->lfe_fir_fixed = lfe_fir_fixed_c;
  398. s->lfe_x96_fixed = lfe_x96_fixed_c;
  399. s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
  400. s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
  401. s->decor = decor_c;
  402. s->dmix_sub_xch = dmix_sub_xch_c;
  403. s->dmix_sub = dmix_sub_c;
  404. s->dmix_add = dmix_add_c;
  405. s->dmix_scale = dmix_scale_c;
  406. s->dmix_scale_inv = dmix_scale_inv_c;
  407. s->assemble_freq_bands = assemble_freq_bands_c;
  408. s->lbr_bank = lbr_bank_c;
  409. s->lfe_iir = lfe_iir_c;
  410. if (ARCH_X86)
  411. ff_dcadsp_init_x86(s);
  412. }