You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

185 lines
7.2KB

  1. /*
  2. * copyright (c) 2008 Michael Niedermayer <michaelni@gmx.at>
  3. * Copyright (C) 2016 foo86
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "fft.h"
  22. #include "dcadct.h"
  23. #include "dcamath.h"
  24. #include "synth_filter.h"
  25. static void synth_filter_float(FFTContext *imdct,
  26. float *synth_buf_ptr, int *synth_buf_offset,
  27. float synth_buf2[32], const float window[512],
  28. float out[32], const float in[32], float scale)
  29. {
  30. float *synth_buf = synth_buf_ptr + *synth_buf_offset;
  31. int i, j;
  32. imdct->imdct_half(imdct, synth_buf, in);
  33. for (i = 0; i < 16; i++) {
  34. float a = synth_buf2[i ];
  35. float b = synth_buf2[i + 16];
  36. float c = 0;
  37. float d = 0;
  38. for (j = 0; j < 512 - *synth_buf_offset; j += 64) {
  39. a += window[i + j ] * (-synth_buf[15 - i + j ]);
  40. b += window[i + j + 16] * ( synth_buf[ i + j ]);
  41. c += window[i + j + 32] * ( synth_buf[16 + i + j ]);
  42. d += window[i + j + 48] * ( synth_buf[31 - i + j ]);
  43. }
  44. for ( ; j < 512; j += 64) {
  45. a += window[i + j ] * (-synth_buf[15 - i + j - 512]);
  46. b += window[i + j + 16] * ( synth_buf[ i + j - 512]);
  47. c += window[i + j + 32] * ( synth_buf[16 + i + j - 512]);
  48. d += window[i + j + 48] * ( synth_buf[31 - i + j - 512]);
  49. }
  50. out[i ] = a * scale;
  51. out[i + 16] = b * scale;
  52. synth_buf2[i ] = c;
  53. synth_buf2[i + 16] = d;
  54. }
  55. *synth_buf_offset = (*synth_buf_offset - 32) & 511;
  56. }
  57. static void synth_filter_float_64(FFTContext *imdct,
  58. float *synth_buf_ptr, int *synth_buf_offset,
  59. float synth_buf2[64], const float window[1024],
  60. float out[64], const float in[64], float scale)
  61. {
  62. float *synth_buf = synth_buf_ptr + *synth_buf_offset;
  63. int i, j;
  64. imdct->imdct_half(imdct, synth_buf, in);
  65. for (i = 0; i < 32; i++) {
  66. float a = synth_buf2[i ];
  67. float b = synth_buf2[i + 32];
  68. float c = 0;
  69. float d = 0;
  70. for (j = 0; j < 1024 - *synth_buf_offset; j += 128) {
  71. a += window[i + j ] * (-synth_buf[31 - i + j ]);
  72. b += window[i + j + 32] * ( synth_buf[ i + j ]);
  73. c += window[i + j + 64] * ( synth_buf[32 + i + j ]);
  74. d += window[i + j + 96] * ( synth_buf[63 - i + j ]);
  75. }
  76. for ( ; j < 1024; j += 128) {
  77. a += window[i + j ] * (-synth_buf[31 - i + j - 1024]);
  78. b += window[i + j + 32] * ( synth_buf[ i + j - 1024]);
  79. c += window[i + j + 64] * ( synth_buf[32 + i + j - 1024]);
  80. d += window[i + j + 96] * ( synth_buf[63 - i + j - 1024]);
  81. }
  82. out[i ] = a * scale;
  83. out[i + 32] = b * scale;
  84. synth_buf2[i ] = c;
  85. synth_buf2[i + 32] = d;
  86. }
  87. *synth_buf_offset = (*synth_buf_offset - 64) & 1023;
  88. }
  89. static void synth_filter_fixed(DCADCTContext *imdct,
  90. int32_t *synth_buf_ptr, int *synth_buf_offset,
  91. int32_t synth_buf2[32], const int32_t window[512],
  92. int32_t out[32], const int32_t in[32])
  93. {
  94. int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset;
  95. int i, j;
  96. imdct->imdct_half[0](synth_buf, in);
  97. for (i = 0; i < 16; i++) {
  98. int64_t a = synth_buf2[i ] * (INT64_C(1) << 21);
  99. int64_t b = synth_buf2[i + 16] * (INT64_C(1) << 21);
  100. int64_t c = 0;
  101. int64_t d = 0;
  102. for (j = 0; j < 512 - *synth_buf_offset; j += 64) {
  103. a += (int64_t)window[i + j ] * synth_buf[ i + j ];
  104. b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j ];
  105. c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j ];
  106. d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j ];
  107. }
  108. for ( ; j < 512; j += 64) {
  109. a += (int64_t)window[i + j ] * synth_buf[ i + j - 512];
  110. b += (int64_t)window[i + j + 16] * synth_buf[15 - i + j - 512];
  111. c += (int64_t)window[i + j + 32] * synth_buf[16 + i + j - 512];
  112. d += (int64_t)window[i + j + 48] * synth_buf[31 - i + j - 512];
  113. }
  114. out[i ] = clip23(norm21(a));
  115. out[i + 16] = clip23(norm21(b));
  116. synth_buf2[i ] = norm21(c);
  117. synth_buf2[i + 16] = norm21(d);
  118. }
  119. *synth_buf_offset = (*synth_buf_offset - 32) & 511;
  120. }
  121. static void synth_filter_fixed_64(DCADCTContext *imdct,
  122. int32_t *synth_buf_ptr, int *synth_buf_offset,
  123. int32_t synth_buf2[64], const int32_t window[1024],
  124. int32_t out[64], const int32_t in[64])
  125. {
  126. int32_t *synth_buf = synth_buf_ptr + *synth_buf_offset;
  127. int i, j;
  128. imdct->imdct_half[1](synth_buf, in);
  129. for (i = 0; i < 32; i++) {
  130. int64_t a = synth_buf2[i ] * (INT64_C(1) << 20);
  131. int64_t b = synth_buf2[i + 32] * (INT64_C(1) << 20);
  132. int64_t c = 0;
  133. int64_t d = 0;
  134. for (j = 0; j < 1024 - *synth_buf_offset; j += 128) {
  135. a += (int64_t)window[i + j ] * synth_buf[ i + j ];
  136. b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j ];
  137. c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j ];
  138. d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j ];
  139. }
  140. for ( ; j < 1024; j += 128) {
  141. a += (int64_t)window[i + j ] * synth_buf[ i + j - 1024];
  142. b += (int64_t)window[i + j + 32] * synth_buf[31 - i + j - 1024];
  143. c += (int64_t)window[i + j + 64] * synth_buf[32 + i + j - 1024];
  144. d += (int64_t)window[i + j + 96] * synth_buf[63 - i + j - 1024];
  145. }
  146. out[i ] = clip23(norm20(a));
  147. out[i + 32] = clip23(norm20(b));
  148. synth_buf2[i ] = norm20(c);
  149. synth_buf2[i + 32] = norm20(d);
  150. }
  151. *synth_buf_offset = (*synth_buf_offset - 64) & 1023;
  152. }
  153. av_cold void ff_synth_filter_init(SynthFilterContext *c)
  154. {
  155. c->synth_filter_float = synth_filter_float;
  156. c->synth_filter_float_64 = synth_filter_float_64;
  157. c->synth_filter_fixed = synth_filter_fixed;
  158. c->synth_filter_fixed_64 = synth_filter_fixed_64;
  159. if (ARCH_AARCH64)
  160. ff_synth_filter_init_aarch64(c);
  161. if (ARCH_ARM)
  162. ff_synth_filter_init_arm(c);
  163. if (ARCH_X86)
  164. ff_synth_filter_init_x86(c);
  165. }