You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

390 lines
12KB

  1. /*
  2. * AC-3 DSP functions
  3. * Copyright (c) 2011 Justin Ruggles
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/mem_internal.h"
  22. #include "avcodec.h"
  23. #include "ac3.h"
  24. #include "ac3dsp.h"
  25. #include "mathops.h"
  26. static void ac3_exponent_min_c(uint8_t *exp, int num_reuse_blocks, int nb_coefs)
  27. {
  28. int blk, i;
  29. if (!num_reuse_blocks)
  30. return;
  31. for (i = 0; i < nb_coefs; i++) {
  32. uint8_t min_exp = *exp;
  33. uint8_t *exp1 = exp + 256;
  34. for (blk = 0; blk < num_reuse_blocks; blk++) {
  35. uint8_t next_exp = *exp1;
  36. if (next_exp < min_exp)
  37. min_exp = next_exp;
  38. exp1 += 256;
  39. }
  40. *exp++ = min_exp;
  41. }
  42. }
  43. static void float_to_fixed24_c(int32_t *dst, const float *src, unsigned int len)
  44. {
  45. const float scale = 1 << 24;
  46. do {
  47. *dst++ = lrintf(*src++ * scale);
  48. *dst++ = lrintf(*src++ * scale);
  49. *dst++ = lrintf(*src++ * scale);
  50. *dst++ = lrintf(*src++ * scale);
  51. *dst++ = lrintf(*src++ * scale);
  52. *dst++ = lrintf(*src++ * scale);
  53. *dst++ = lrintf(*src++ * scale);
  54. *dst++ = lrintf(*src++ * scale);
  55. len -= 8;
  56. } while (len > 0);
  57. }
  58. static void ac3_bit_alloc_calc_bap_c(int16_t *mask, int16_t *psd,
  59. int start, int end,
  60. int snr_offset, int floor,
  61. const uint8_t *bap_tab, uint8_t *bap)
  62. {
  63. int bin, band, band_end;
  64. /* special case, if snr offset is -960, set all bap's to zero */
  65. if (snr_offset == -960) {
  66. memset(bap, 0, AC3_MAX_COEFS);
  67. return;
  68. }
  69. bin = start;
  70. band = ff_ac3_bin_to_band_tab[start];
  71. do {
  72. int m = (FFMAX(mask[band] - snr_offset - floor, 0) & 0x1FE0) + floor;
  73. band_end = ff_ac3_band_start_tab[++band];
  74. band_end = FFMIN(band_end, end);
  75. for (; bin < band_end; bin++) {
  76. int address = av_clip_uintp2((psd[bin] - m) >> 5, 6);
  77. bap[bin] = bap_tab[address];
  78. }
  79. } while (end > band_end);
  80. }
  81. static void ac3_update_bap_counts_c(uint16_t mant_cnt[16], uint8_t *bap,
  82. int len)
  83. {
  84. while (len-- > 0)
  85. mant_cnt[bap[len]]++;
  86. }
  87. DECLARE_ALIGNED(16, const uint16_t, ff_ac3_bap_bits)[16] = {
  88. 0, 0, 0, 3, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 14, 16
  89. };
  90. static int ac3_compute_mantissa_size_c(uint16_t mant_cnt[6][16])
  91. {
  92. int blk, bap;
  93. int bits = 0;
  94. for (blk = 0; blk < AC3_MAX_BLOCKS; blk++) {
  95. // bap=1 : 3 mantissas in 5 bits
  96. bits += (mant_cnt[blk][1] / 3) * 5;
  97. // bap=2 : 3 mantissas in 7 bits
  98. // bap=4 : 2 mantissas in 7 bits
  99. bits += ((mant_cnt[blk][2] / 3) + (mant_cnt[blk][4] >> 1)) * 7;
  100. // bap=3 : 1 mantissa in 3 bits
  101. bits += mant_cnt[blk][3] * 3;
  102. // bap=5 to 15 : get bits per mantissa from table
  103. for (bap = 5; bap < 16; bap++)
  104. bits += mant_cnt[blk][bap] * ff_ac3_bap_bits[bap];
  105. }
  106. return bits;
  107. }
  108. static void ac3_extract_exponents_c(uint8_t *exp, int32_t *coef, int nb_coefs)
  109. {
  110. int i;
  111. for (i = 0; i < nb_coefs; i++) {
  112. int v = abs(coef[i]);
  113. exp[i] = v ? 23 - av_log2(v) : 24;
  114. }
  115. }
  116. static void ac3_sum_square_butterfly_int32_c(int64_t sum[4],
  117. const int32_t *coef0,
  118. const int32_t *coef1,
  119. int len)
  120. {
  121. int i;
  122. sum[0] = sum[1] = sum[2] = sum[3] = 0;
  123. for (i = 0; i < len; i++) {
  124. int lt = coef0[i];
  125. int rt = coef1[i];
  126. int md = lt + rt;
  127. int sd = lt - rt;
  128. MAC64(sum[0], lt, lt);
  129. MAC64(sum[1], rt, rt);
  130. MAC64(sum[2], md, md);
  131. MAC64(sum[3], sd, sd);
  132. }
  133. }
  134. static void ac3_sum_square_butterfly_float_c(float sum[4],
  135. const float *coef0,
  136. const float *coef1,
  137. int len)
  138. {
  139. int i;
  140. sum[0] = sum[1] = sum[2] = sum[3] = 0;
  141. for (i = 0; i < len; i++) {
  142. float lt = coef0[i];
  143. float rt = coef1[i];
  144. float md = lt + rt;
  145. float sd = lt - rt;
  146. sum[0] += lt * lt;
  147. sum[1] += rt * rt;
  148. sum[2] += md * md;
  149. sum[3] += sd * sd;
  150. }
  151. }
  152. static void ac3_downmix_5_to_2_symmetric_c(float **samples, float **matrix,
  153. int len)
  154. {
  155. int i;
  156. float v0, v1;
  157. float front_mix = matrix[0][0];
  158. float center_mix = matrix[0][1];
  159. float surround_mix = matrix[0][3];
  160. for (i = 0; i < len; i++) {
  161. v0 = samples[0][i] * front_mix +
  162. samples[1][i] * center_mix +
  163. samples[3][i] * surround_mix;
  164. v1 = samples[1][i] * center_mix +
  165. samples[2][i] * front_mix +
  166. samples[4][i] * surround_mix;
  167. samples[0][i] = v0;
  168. samples[1][i] = v1;
  169. }
  170. }
  171. static void ac3_downmix_5_to_1_symmetric_c(float **samples, float **matrix,
  172. int len)
  173. {
  174. int i;
  175. float front_mix = matrix[0][0];
  176. float center_mix = matrix[0][1];
  177. float surround_mix = matrix[0][3];
  178. for (i = 0; i < len; i++) {
  179. samples[0][i] = samples[0][i] * front_mix +
  180. samples[1][i] * center_mix +
  181. samples[2][i] * front_mix +
  182. samples[3][i] * surround_mix +
  183. samples[4][i] * surround_mix;
  184. }
  185. }
  186. static void ac3_downmix_c(float **samples, float **matrix,
  187. int out_ch, int in_ch, int len)
  188. {
  189. int i, j;
  190. float v0, v1;
  191. if (out_ch == 2) {
  192. for (i = 0; i < len; i++) {
  193. v0 = v1 = 0.0f;
  194. for (j = 0; j < in_ch; j++) {
  195. v0 += samples[j][i] * matrix[0][j];
  196. v1 += samples[j][i] * matrix[1][j];
  197. }
  198. samples[0][i] = v0;
  199. samples[1][i] = v1;
  200. }
  201. } else if (out_ch == 1) {
  202. for (i = 0; i < len; i++) {
  203. v0 = 0.0f;
  204. for (j = 0; j < in_ch; j++)
  205. v0 += samples[j][i] * matrix[0][j];
  206. samples[0][i] = v0;
  207. }
  208. }
  209. }
  210. static void ac3_downmix_5_to_2_symmetric_c_fixed(int32_t **samples, int16_t **matrix,
  211. int len)
  212. {
  213. int i;
  214. int64_t v0, v1;
  215. int16_t front_mix = matrix[0][0];
  216. int16_t center_mix = matrix[0][1];
  217. int16_t surround_mix = matrix[0][3];
  218. for (i = 0; i < len; i++) {
  219. v0 = (int64_t)samples[0][i] * front_mix +
  220. (int64_t)samples[1][i] * center_mix +
  221. (int64_t)samples[3][i] * surround_mix;
  222. v1 = (int64_t)samples[1][i] * center_mix +
  223. (int64_t)samples[2][i] * front_mix +
  224. (int64_t)samples[4][i] * surround_mix;
  225. samples[0][i] = (v0+2048)>>12;
  226. samples[1][i] = (v1+2048)>>12;
  227. }
  228. }
  229. static void ac3_downmix_5_to_1_symmetric_c_fixed(int32_t **samples, int16_t **matrix,
  230. int len)
  231. {
  232. int i;
  233. int64_t v0;
  234. int16_t front_mix = matrix[0][0];
  235. int16_t center_mix = matrix[0][1];
  236. int16_t surround_mix = matrix[0][3];
  237. for (i = 0; i < len; i++) {
  238. v0 = (int64_t)samples[0][i] * front_mix +
  239. (int64_t)samples[1][i] * center_mix +
  240. (int64_t)samples[2][i] * front_mix +
  241. (int64_t)samples[3][i] * surround_mix +
  242. (int64_t)samples[4][i] * surround_mix;
  243. samples[0][i] = (v0+2048)>>12;
  244. }
  245. }
  246. static void ac3_downmix_c_fixed(int32_t **samples, int16_t **matrix,
  247. int out_ch, int in_ch, int len)
  248. {
  249. int i, j;
  250. int64_t v0, v1;
  251. if (out_ch == 2) {
  252. for (i = 0; i < len; i++) {
  253. v0 = v1 = 0;
  254. for (j = 0; j < in_ch; j++) {
  255. v0 += (int64_t)samples[j][i] * matrix[0][j];
  256. v1 += (int64_t)samples[j][i] * matrix[1][j];
  257. }
  258. samples[0][i] = (v0+2048)>>12;
  259. samples[1][i] = (v1+2048)>>12;
  260. }
  261. } else if (out_ch == 1) {
  262. for (i = 0; i < len; i++) {
  263. v0 = 0;
  264. for (j = 0; j < in_ch; j++)
  265. v0 += (int64_t)samples[j][i] * matrix[0][j];
  266. samples[0][i] = (v0+2048)>>12;
  267. }
  268. }
  269. }
  270. void ff_ac3dsp_downmix_fixed(AC3DSPContext *c, int32_t **samples, int16_t **matrix,
  271. int out_ch, int in_ch, int len)
  272. {
  273. if (c->in_channels != in_ch || c->out_channels != out_ch) {
  274. c->in_channels = in_ch;
  275. c->out_channels = out_ch;
  276. c->downmix_fixed = NULL;
  277. if (in_ch == 5 && out_ch == 2 &&
  278. !(matrix[1][0] | matrix[0][2] |
  279. matrix[1][3] | matrix[0][4] |
  280. (matrix[0][1] ^ matrix[1][1]) |
  281. (matrix[0][0] ^ matrix[1][2]))) {
  282. c->downmix_fixed = ac3_downmix_5_to_2_symmetric_c_fixed;
  283. } else if (in_ch == 5 && out_ch == 1 &&
  284. matrix[0][0] == matrix[0][2] &&
  285. matrix[0][3] == matrix[0][4]) {
  286. c->downmix_fixed = ac3_downmix_5_to_1_symmetric_c_fixed;
  287. }
  288. }
  289. if (c->downmix_fixed)
  290. c->downmix_fixed(samples, matrix, len);
  291. else
  292. ac3_downmix_c_fixed(samples, matrix, out_ch, in_ch, len);
  293. }
  294. void ff_ac3dsp_downmix(AC3DSPContext *c, float **samples, float **matrix,
  295. int out_ch, int in_ch, int len)
  296. {
  297. if (c->in_channels != in_ch || c->out_channels != out_ch) {
  298. int **matrix_cmp = (int **)matrix;
  299. c->in_channels = in_ch;
  300. c->out_channels = out_ch;
  301. c->downmix = NULL;
  302. if (in_ch == 5 && out_ch == 2 &&
  303. !(matrix_cmp[1][0] | matrix_cmp[0][2] |
  304. matrix_cmp[1][3] | matrix_cmp[0][4] |
  305. (matrix_cmp[0][1] ^ matrix_cmp[1][1]) |
  306. (matrix_cmp[0][0] ^ matrix_cmp[1][2]))) {
  307. c->downmix = ac3_downmix_5_to_2_symmetric_c;
  308. } else if (in_ch == 5 && out_ch == 1 &&
  309. matrix_cmp[0][0] == matrix_cmp[0][2] &&
  310. matrix_cmp[0][3] == matrix_cmp[0][4]) {
  311. c->downmix = ac3_downmix_5_to_1_symmetric_c;
  312. }
  313. if (ARCH_X86)
  314. ff_ac3dsp_set_downmix_x86(c);
  315. }
  316. if (c->downmix)
  317. c->downmix(samples, matrix, len);
  318. else
  319. ac3_downmix_c(samples, matrix, out_ch, in_ch, len);
  320. }
  321. av_cold void ff_ac3dsp_init(AC3DSPContext *c, int bit_exact)
  322. {
  323. c->ac3_exponent_min = ac3_exponent_min_c;
  324. c->float_to_fixed24 = float_to_fixed24_c;
  325. c->bit_alloc_calc_bap = ac3_bit_alloc_calc_bap_c;
  326. c->update_bap_counts = ac3_update_bap_counts_c;
  327. c->compute_mantissa_size = ac3_compute_mantissa_size_c;
  328. c->extract_exponents = ac3_extract_exponents_c;
  329. c->sum_square_butterfly_int32 = ac3_sum_square_butterfly_int32_c;
  330. c->sum_square_butterfly_float = ac3_sum_square_butterfly_float_c;
  331. c->in_channels = 0;
  332. c->out_channels = 0;
  333. c->downmix = NULL;
  334. c->downmix_fixed = NULL;
  335. if (ARCH_ARM)
  336. ff_ac3dsp_init_arm(c, bit_exact);
  337. if (ARCH_X86)
  338. ff_ac3dsp_init_x86(c, bit_exact);
  339. if (ARCH_MIPS)
  340. ff_ac3dsp_init_mips(c, bit_exact);
  341. }