You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

491 lines
15KB

  1. /*
  2. * Copyright (C) 2016 foo86
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/mem.h"
  21. #include "dcadsp.h"
  22. #include "dcamath.h"
  23. static void decode_hf_c(int32_t **dst,
  24. const int32_t *vq_index,
  25. const int8_t hf_vq[1024][32],
  26. int32_t scale_factors[32][2],
  27. ptrdiff_t sb_start, ptrdiff_t sb_end,
  28. ptrdiff_t ofs, ptrdiff_t len)
  29. {
  30. int i, j;
  31. for (i = sb_start; i < sb_end; i++) {
  32. const int8_t *coeff = hf_vq[vq_index[i]];
  33. int32_t scale = scale_factors[i][0];
  34. for (j = 0; j < len; j++)
  35. dst[i][j + ofs] = clip23(coeff[j] * scale + (1 << 3) >> 4);
  36. }
  37. }
  38. static void decode_joint_c(int32_t **dst, int32_t **src,
  39. const int32_t *scale_factors,
  40. ptrdiff_t sb_start, ptrdiff_t sb_end,
  41. ptrdiff_t ofs, ptrdiff_t len)
  42. {
  43. int i, j;
  44. for (i = sb_start; i < sb_end; i++) {
  45. int32_t scale = scale_factors[i];
  46. for (j = 0; j < len; j++)
  47. dst[i][j + ofs] = clip23(mul17(src[i][j + ofs], scale));
  48. }
  49. }
  50. static void lfe_fir_float_c(float *pcm_samples, int32_t *lfe_samples,
  51. const float *filter_coeff, ptrdiff_t npcmblocks,
  52. int dec_select)
  53. {
  54. // Select decimation factor
  55. int factor = 64 << dec_select;
  56. int ncoeffs = 8 >> dec_select;
  57. int nlfesamples = npcmblocks >> (dec_select + 1);
  58. int i, j, k;
  59. for (i = 0; i < nlfesamples; i++) {
  60. // One decimated sample generates 64 or 128 interpolated ones
  61. for (j = 0; j < factor / 2; j++) {
  62. float a = 0;
  63. float b = 0;
  64. for (k = 0; k < ncoeffs; k++) {
  65. a += filter_coeff[ j * ncoeffs + k] * lfe_samples[-k];
  66. b += filter_coeff[255 - j * ncoeffs - k] * lfe_samples[-k];
  67. }
  68. pcm_samples[ j] = a;
  69. pcm_samples[factor / 2 + j] = b;
  70. }
  71. lfe_samples++;
  72. pcm_samples += factor;
  73. }
  74. }
  75. static void lfe_fir0_float_c(float *pcm_samples, int32_t *lfe_samples,
  76. const float *filter_coeff, ptrdiff_t npcmblocks)
  77. {
  78. lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 0);
  79. }
  80. static void lfe_fir1_float_c(float *pcm_samples, int32_t *lfe_samples,
  81. const float *filter_coeff, ptrdiff_t npcmblocks)
  82. {
  83. lfe_fir_float_c(pcm_samples, lfe_samples, filter_coeff, npcmblocks, 1);
  84. }
  85. static void lfe_x96_float_c(float *dst, const float *src,
  86. float *hist, ptrdiff_t len)
  87. {
  88. float prev = *hist;
  89. int i;
  90. for (i = 0; i < len; i++) {
  91. float a = 0.25f * src[i] + 0.75f * prev;
  92. float b = 0.75f * src[i] + 0.25f * prev;
  93. prev = src[i];
  94. *dst++ = a;
  95. *dst++ = b;
  96. }
  97. *hist = prev;
  98. }
  99. static void sub_qmf32_float_c(SynthFilterContext *synth,
  100. FFTContext *imdct,
  101. float *pcm_samples,
  102. int32_t **subband_samples_lo,
  103. int32_t **subband_samples_hi,
  104. float *hist1, int *offset, float *hist2,
  105. const float *filter_coeff, ptrdiff_t npcmblocks,
  106. float scale)
  107. {
  108. LOCAL_ALIGNED_32(float, input, [32]);
  109. int i, j;
  110. for (j = 0; j < npcmblocks; j++) {
  111. // Load in one sample from each subband
  112. for (i = 0; i < 32; i++) {
  113. if ((i - 1) & 2)
  114. input[i] = -subband_samples_lo[i][j];
  115. else
  116. input[i] = subband_samples_lo[i][j];
  117. }
  118. // One subband sample generates 32 interpolated ones
  119. synth->synth_filter_float(imdct, hist1, offset,
  120. hist2, filter_coeff,
  121. pcm_samples, input, scale);
  122. pcm_samples += 32;
  123. }
  124. }
  125. static void sub_qmf64_float_c(SynthFilterContext *synth,
  126. FFTContext *imdct,
  127. float *pcm_samples,
  128. int32_t **subband_samples_lo,
  129. int32_t **subband_samples_hi,
  130. float *hist1, int *offset, float *hist2,
  131. const float *filter_coeff, ptrdiff_t npcmblocks,
  132. float scale)
  133. {
  134. LOCAL_ALIGNED_32(float, input, [64]);
  135. int i, j;
  136. if (!subband_samples_hi)
  137. memset(&input[32], 0, sizeof(input[0]) * 32);
  138. for (j = 0; j < npcmblocks; j++) {
  139. // Load in one sample from each subband
  140. if (subband_samples_hi) {
  141. // Full 64 subbands, first 32 are residual coded
  142. for (i = 0; i < 32; i++) {
  143. if ((i - 1) & 2)
  144. input[i] = -subband_samples_lo[i][j] - subband_samples_hi[i][j];
  145. else
  146. input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
  147. }
  148. for (i = 32; i < 64; i++) {
  149. if ((i - 1) & 2)
  150. input[i] = -subband_samples_hi[i][j];
  151. else
  152. input[i] = subband_samples_hi[i][j];
  153. }
  154. } else {
  155. // Only first 32 subbands
  156. for (i = 0; i < 32; i++) {
  157. if ((i - 1) & 2)
  158. input[i] = -subband_samples_lo[i][j];
  159. else
  160. input[i] = subband_samples_lo[i][j];
  161. }
  162. }
  163. // One subband sample generates 64 interpolated ones
  164. synth->synth_filter_float_64(imdct, hist1, offset,
  165. hist2, filter_coeff,
  166. pcm_samples, input, scale);
  167. pcm_samples += 64;
  168. }
  169. }
  170. static void lfe_fir_fixed_c(int32_t *pcm_samples, int32_t *lfe_samples,
  171. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  172. {
  173. // Select decimation factor
  174. int nlfesamples = npcmblocks >> 1;
  175. int i, j, k;
  176. for (i = 0; i < nlfesamples; i++) {
  177. // One decimated sample generates 64 interpolated ones
  178. for (j = 0; j < 32; j++) {
  179. int64_t a = 0;
  180. int64_t b = 0;
  181. for (k = 0; k < 8; k++) {
  182. a += (int64_t)filter_coeff[ j * 8 + k] * lfe_samples[-k];
  183. b += (int64_t)filter_coeff[255 - j * 8 - k] * lfe_samples[-k];
  184. }
  185. pcm_samples[ j] = clip23(norm23(a));
  186. pcm_samples[32 + j] = clip23(norm23(b));
  187. }
  188. lfe_samples++;
  189. pcm_samples += 64;
  190. }
  191. }
  192. static void lfe_x96_fixed_c(int32_t *dst, const int32_t *src,
  193. int32_t *hist, ptrdiff_t len)
  194. {
  195. int32_t prev = *hist;
  196. int i;
  197. for (i = 0; i < len; i++) {
  198. int64_t a = INT64_C(2097471) * src[i] + INT64_C(6291137) * prev;
  199. int64_t b = INT64_C(6291137) * src[i] + INT64_C(2097471) * prev;
  200. prev = src[i];
  201. *dst++ = clip23(norm23(a));
  202. *dst++ = clip23(norm23(b));
  203. }
  204. *hist = prev;
  205. }
  206. static void sub_qmf32_fixed_c(SynthFilterContext *synth,
  207. DCADCTContext *imdct,
  208. int32_t *pcm_samples,
  209. int32_t **subband_samples_lo,
  210. int32_t **subband_samples_hi,
  211. int32_t *hist1, int *offset, int32_t *hist2,
  212. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  213. {
  214. LOCAL_ALIGNED_32(int32_t, input, [32]);
  215. int i, j;
  216. for (j = 0; j < npcmblocks; j++) {
  217. // Load in one sample from each subband
  218. for (i = 0; i < 32; i++)
  219. input[i] = subband_samples_lo[i][j];
  220. // One subband sample generates 32 interpolated ones
  221. synth->synth_filter_fixed(imdct, hist1, offset,
  222. hist2, filter_coeff,
  223. pcm_samples, input);
  224. pcm_samples += 32;
  225. }
  226. }
  227. static void sub_qmf64_fixed_c(SynthFilterContext *synth,
  228. DCADCTContext *imdct,
  229. int32_t *pcm_samples,
  230. int32_t **subband_samples_lo,
  231. int32_t **subband_samples_hi,
  232. int32_t *hist1, int *offset, int32_t *hist2,
  233. const int32_t *filter_coeff, ptrdiff_t npcmblocks)
  234. {
  235. LOCAL_ALIGNED_32(int32_t, input, [64]);
  236. int i, j;
  237. if (!subband_samples_hi)
  238. memset(&input[32], 0, sizeof(input[0]) * 32);
  239. for (j = 0; j < npcmblocks; j++) {
  240. // Load in one sample from each subband
  241. if (subband_samples_hi) {
  242. // Full 64 subbands, first 32 are residual coded
  243. for (i = 0; i < 32; i++)
  244. input[i] = subband_samples_lo[i][j] + subband_samples_hi[i][j];
  245. for (i = 32; i < 64; i++)
  246. input[i] = subband_samples_hi[i][j];
  247. } else {
  248. // Only first 32 subbands
  249. for (i = 0; i < 32; i++)
  250. input[i] = subband_samples_lo[i][j];
  251. }
  252. // One subband sample generates 64 interpolated ones
  253. synth->synth_filter_fixed_64(imdct, hist1, offset,
  254. hist2, filter_coeff,
  255. pcm_samples, input);
  256. pcm_samples += 64;
  257. }
  258. }
  259. static void decor_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  260. {
  261. int i;
  262. for (i = 0; i < len; i++)
  263. dst[i] += (SUINT)((int)(src[i] * (SUINT)coeff + (1 << 2)) >> 3);
  264. }
  265. static void dmix_sub_xch_c(int32_t *dst1, int32_t *dst2,
  266. const int32_t *src, ptrdiff_t len)
  267. {
  268. int i;
  269. for (i = 0; i < len; i++) {
  270. int32_t cs = mul23(src[i], 5931520 /* M_SQRT1_2 * (1 << 23) */);
  271. dst1[i] -= cs;
  272. dst2[i] -= cs;
  273. }
  274. }
  275. static void dmix_sub_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  276. {
  277. int i;
  278. for (i = 0; i < len; i++)
  279. dst[i] -= (unsigned)mul15(src[i], coeff);
  280. }
  281. static void dmix_add_c(int32_t *dst, const int32_t *src, int coeff, ptrdiff_t len)
  282. {
  283. int i;
  284. for (i = 0; i < len; i++)
  285. dst[i] += mul15(src[i], coeff);
  286. }
  287. static void dmix_scale_c(int32_t *dst, int scale, ptrdiff_t len)
  288. {
  289. int i;
  290. for (i = 0; i < len; i++)
  291. dst[i] = mul15(dst[i], scale);
  292. }
  293. static void dmix_scale_inv_c(int32_t *dst, int scale_inv, ptrdiff_t len)
  294. {
  295. int i;
  296. for (i = 0; i < len; i++)
  297. dst[i] = mul16(dst[i], scale_inv);
  298. }
  299. static void filter0(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
  300. {
  301. int i;
  302. for (i = 0; i < len; i++)
  303. dst[i] -= mul22(src[i], coeff);
  304. }
  305. static void filter1(SUINT32 *dst, const int32_t *src, int32_t coeff, ptrdiff_t len)
  306. {
  307. int i;
  308. for (i = 0; i < len; i++)
  309. dst[i] -= mul23(src[i], coeff);
  310. }
  311. static void assemble_freq_bands_c(int32_t *dst, int32_t *src0, int32_t *src1,
  312. const int32_t *coeff, ptrdiff_t len)
  313. {
  314. int i;
  315. filter0(src0, src1, coeff[0], len);
  316. filter0(src1, src0, coeff[1], len);
  317. filter0(src0, src1, coeff[2], len);
  318. filter0(src1, src0, coeff[3], len);
  319. for (i = 0; i < 8; i++, src0--) {
  320. filter1(src0, src1, coeff[i + 4], len);
  321. filter1(src1, src0, coeff[i + 12], len);
  322. filter1(src0, src1, coeff[i + 4], len);
  323. }
  324. for (i = 0; i < len; i++) {
  325. *dst++ = *src1++;
  326. *dst++ = *++src0;
  327. }
  328. }
  329. static void lbr_bank_c(float output[32][4], float **input,
  330. const float *coeff, ptrdiff_t ofs, ptrdiff_t len)
  331. {
  332. float SW0 = coeff[0];
  333. float SW1 = coeff[1];
  334. float SW2 = coeff[2];
  335. float SW3 = coeff[3];
  336. float C1 = coeff[4];
  337. float C2 = coeff[5];
  338. float C3 = coeff[6];
  339. float C4 = coeff[7];
  340. float AL1 = coeff[8];
  341. float AL2 = coeff[9];
  342. int i;
  343. // Short window and 8 point forward MDCT
  344. for (i = 0; i < len; i++) {
  345. float *src = input[i] + ofs;
  346. float a = src[-4] * SW0 - src[-1] * SW3;
  347. float b = src[-3] * SW1 - src[-2] * SW2;
  348. float c = src[ 2] * SW1 + src[ 1] * SW2;
  349. float d = src[ 3] * SW0 + src[ 0] * SW3;
  350. output[i][0] = C1 * b - C2 * c + C4 * a - C3 * d;
  351. output[i][1] = C1 * d - C2 * a - C4 * b - C3 * c;
  352. output[i][2] = C3 * b + C2 * d - C4 * c + C1 * a;
  353. output[i][3] = C3 * a - C2 * b + C4 * d - C1 * c;
  354. }
  355. // Aliasing cancellation for high frequencies
  356. for (i = 12; i < len - 1; i++) {
  357. float a = output[i ][3] * AL1;
  358. float b = output[i+1][0] * AL1;
  359. output[i ][3] += b - a;
  360. output[i+1][0] -= b + a;
  361. a = output[i ][2] * AL2;
  362. b = output[i+1][1] * AL2;
  363. output[i ][2] += b - a;
  364. output[i+1][1] -= b + a;
  365. }
  366. }
  367. static void lfe_iir_c(float *output, const float *input,
  368. const float iir[5][4], float hist[5][2],
  369. ptrdiff_t factor)
  370. {
  371. float res, tmp;
  372. int i, j, k;
  373. for (i = 0; i < 64; i++) {
  374. res = *input++;
  375. for (j = 0; j < factor; j++) {
  376. for (k = 0; k < 5; k++) {
  377. tmp = hist[k][0] * iir[k][0] + hist[k][1] * iir[k][1] + res;
  378. res = hist[k][0] * iir[k][2] + hist[k][1] * iir[k][3] + tmp;
  379. hist[k][0] = hist[k][1];
  380. hist[k][1] = tmp;
  381. }
  382. *output++ = res;
  383. res = 0;
  384. }
  385. }
  386. }
  387. av_cold void ff_dcadsp_init(DCADSPContext *s)
  388. {
  389. s->decode_hf = decode_hf_c;
  390. s->decode_joint = decode_joint_c;
  391. s->lfe_fir_float[0] = lfe_fir0_float_c;
  392. s->lfe_fir_float[1] = lfe_fir1_float_c;
  393. s->lfe_x96_float = lfe_x96_float_c;
  394. s->sub_qmf_float[0] = sub_qmf32_float_c;
  395. s->sub_qmf_float[1] = sub_qmf64_float_c;
  396. s->lfe_fir_fixed = lfe_fir_fixed_c;
  397. s->lfe_x96_fixed = lfe_x96_fixed_c;
  398. s->sub_qmf_fixed[0] = sub_qmf32_fixed_c;
  399. s->sub_qmf_fixed[1] = sub_qmf64_fixed_c;
  400. s->decor = decor_c;
  401. s->dmix_sub_xch = dmix_sub_xch_c;
  402. s->dmix_sub = dmix_sub_c;
  403. s->dmix_add = dmix_add_c;
  404. s->dmix_scale = dmix_scale_c;
  405. s->dmix_scale_inv = dmix_scale_inv_c;
  406. s->assemble_freq_bands = assemble_freq_bands_c;
  407. s->lbr_bank = lbr_bank_c;
  408. s->lfe_iir = lfe_iir_c;
  409. if (ARCH_X86)
  410. ff_dcadsp_init_x86(s);
  411. }