You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

363 lines
11KB

  1. /*
  2. * Copyright (C) 2016 foo86
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "libavutil/common.h"
  21. #include "dcadct.h"
  22. #include "dcamath.h"
  23. static void sum_a(const int *input, int *output, int len)
  24. {
  25. int i;
  26. for (i = 0; i < len; i++)
  27. output[i] = input[2 * i] + input[2 * i + 1];
  28. }
  29. static void sum_b(const int *input, int *output, int len)
  30. {
  31. int i;
  32. output[0] = input[0];
  33. for (i = 1; i < len; i++)
  34. output[i] = input[2 * i] + input[2 * i - 1];
  35. }
  36. static void sum_c(const int *input, int *output, int len)
  37. {
  38. int i;
  39. for (i = 0; i < len; i++)
  40. output[i] = input[2 * i];
  41. }
  42. static void sum_d(const int *input, int *output, int len)
  43. {
  44. int i;
  45. output[0] = input[1];
  46. for (i = 1; i < len; i++)
  47. output[i] = input[2 * i - 1] + input[2 * i + 1];
  48. }
  49. static void dct_a(const int *input, int *output)
  50. {
  51. static const int cos_mod[8][8] = {
  52. { 8348215, 8027397, 7398092, 6484482, 5321677, 3954362, 2435084, 822227 },
  53. { 8027397, 5321677, 822227, -3954362, -7398092, -8348215, -6484482, -2435084 },
  54. { 7398092, 822227, -6484482, -8027397, -2435084, 5321677, 8348215, 3954362 },
  55. { 6484482, -3954362, -8027397, 822227, 8348215, 2435084, -7398092, -5321677 },
  56. { 5321677, -7398092, -2435084, 8348215, -822227, -8027397, 3954362, 6484482 },
  57. { 3954362, -8348215, 5321677, 2435084, -8027397, 6484482, 822227, -7398092 },
  58. { 2435084, -6484482, 8348215, -7398092, 3954362, 822227, -5321677, 8027397 },
  59. { 822227, -2435084, 3954362, -5321677, 6484482, -7398092, 8027397, -8348215 }
  60. };
  61. int i, j;
  62. for (i = 0; i < 8; i++) {
  63. int64_t res = 0;
  64. for (j = 0; j < 8; j++)
  65. res += (int64_t)cos_mod[i][j] * input[j];
  66. output[i] = norm23(res);
  67. }
  68. }
  69. static void dct_b(const int *input, int *output)
  70. {
  71. static const int cos_mod[8][7] = {
  72. { 8227423, 7750063, 6974873, 5931642, 4660461, 3210181, 1636536 },
  73. { 6974873, 3210181, -1636536, -5931642, -8227423, -7750063, -4660461 },
  74. { 4660461, -3210181, -8227423, -5931642, 1636536, 7750063, 6974873 },
  75. { 1636536, -7750063, -4660461, 5931642, 6974873, -3210181, -8227423 },
  76. { -1636536, -7750063, 4660461, 5931642, -6974873, -3210181, 8227423 },
  77. { -4660461, -3210181, 8227423, -5931642, -1636536, 7750063, -6974873 },
  78. { -6974873, 3210181, 1636536, -5931642, 8227423, -7750063, 4660461 },
  79. { -8227423, 7750063, -6974873, 5931642, -4660461, 3210181, -1636536 }
  80. };
  81. int i, j;
  82. for (i = 0; i < 8; i++) {
  83. int64_t res = input[0] * (INT64_C(1) << 23);
  84. for (j = 0; j < 7; j++)
  85. res += (int64_t)cos_mod[i][j] * input[1 + j];
  86. output[i] = norm23(res);
  87. }
  88. }
  89. static void mod_a(const int *input, int *output)
  90. {
  91. static const int cos_mod[16] = {
  92. 4199362, 4240198, 4323885, 4454708,
  93. 4639772, 4890013, 5221943, 5660703,
  94. -6245623, -7040975, -8158494, -9809974,
  95. -12450076, -17261920, -28585092, -85479984
  96. };
  97. int i, k;
  98. for (i = 0; i < 8; i++)
  99. output[i] = mul23(cos_mod[i], input[i] + input[8 + i]);
  100. for (i = 8, k = 7; i < 16; i++, k--)
  101. output[i] = mul23(cos_mod[i], input[k] - input[8 + k]);
  102. }
  103. static void mod_b(int *input, int *output)
  104. {
  105. static const int cos_mod[8] = {
  106. 4214598, 4383036, 4755871, 5425934,
  107. 6611520, 8897610, 14448934, 42791536
  108. };
  109. int i, k;
  110. for (i = 0; i < 8; i++)
  111. input[8 + i] = mul23(cos_mod[i], input[8 + i]);
  112. for (i = 0; i < 8; i++)
  113. output[i] = input[i] + input[8 + i];
  114. for (i = 8, k = 7; i < 16; i++, k--)
  115. output[i] = input[k] - input[8 + k];
  116. }
  117. static void mod_c(const int *input, int *output)
  118. {
  119. static const int cos_mod[32] = {
  120. 1048892, 1051425, 1056522, 1064244,
  121. 1074689, 1087987, 1104313, 1123884,
  122. 1146975, 1173922, 1205139, 1241133,
  123. 1282529, 1330095, 1384791, 1447815,
  124. -1520688, -1605358, -1704360, -1821051,
  125. -1959964, -2127368, -2332183, -2587535,
  126. -2913561, -3342802, -3931480, -4785806,
  127. -6133390, -8566050, -14253820, -42727120
  128. };
  129. int i, k;
  130. for (i = 0; i < 16; i++)
  131. output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
  132. for (i = 16, k = 15; i < 32; i++, k--)
  133. output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
  134. }
  135. static void clp_v(int *input, int len)
  136. {
  137. int i;
  138. for (i = 0; i < len; i++)
  139. input[i] = clip23(input[i]);
  140. }
  141. static void imdct_half_32(int32_t *output, const int32_t *input)
  142. {
  143. int buf_a[32], buf_b[32];
  144. int i, k, mag, shift, round;
  145. mag = 0;
  146. for (i = 0; i < 32; i++)
  147. mag += abs(input[i]);
  148. shift = mag > 0x400000 ? 2 : 0;
  149. round = shift > 0 ? 1 << (shift - 1) : 0;
  150. for (i = 0; i < 32; i++)
  151. buf_a[i] = (input[i] + round) >> shift;
  152. sum_a(buf_a, buf_b + 0, 16);
  153. sum_b(buf_a, buf_b + 16, 16);
  154. clp_v(buf_b, 32);
  155. sum_a(buf_b + 0, buf_a + 0, 8);
  156. sum_b(buf_b + 0, buf_a + 8, 8);
  157. sum_c(buf_b + 16, buf_a + 16, 8);
  158. sum_d(buf_b + 16, buf_a + 24, 8);
  159. clp_v(buf_a, 32);
  160. dct_a(buf_a + 0, buf_b + 0);
  161. dct_b(buf_a + 8, buf_b + 8);
  162. dct_b(buf_a + 16, buf_b + 16);
  163. dct_b(buf_a + 24, buf_b + 24);
  164. clp_v(buf_b, 32);
  165. mod_a(buf_b + 0, buf_a + 0);
  166. mod_b(buf_b + 16, buf_a + 16);
  167. clp_v(buf_a, 32);
  168. mod_c(buf_a, buf_b);
  169. for (i = 0; i < 32; i++)
  170. buf_b[i] = clip23(buf_b[i] * (1 << shift));
  171. for (i = 0, k = 31; i < 16; i++, k--) {
  172. output[ i] = clip23(buf_b[i] - buf_b[k]);
  173. output[16 + i] = clip23(buf_b[i] + buf_b[k]);
  174. }
  175. }
  176. static void mod64_a(const int *input, int *output)
  177. {
  178. static const int cos_mod[32] = {
  179. 4195568, 4205700, 4226086, 4256977,
  180. 4298755, 4351949, 4417251, 4495537,
  181. 4587901, 4695690, 4820557, 4964534,
  182. 5130115, 5320382, 5539164, 5791261,
  183. -6082752, -6421430, -6817439, -7284203,
  184. -7839855, -8509474, -9328732, -10350140,
  185. -11654242, -13371208, -15725922, -19143224,
  186. -24533560, -34264200, -57015280, -170908480
  187. };
  188. int i, k;
  189. for (i = 0; i < 16; i++)
  190. output[i] = mul23(cos_mod[i], input[i] + input[16 + i]);
  191. for (i = 16, k = 15; i < 32; i++, k--)
  192. output[i] = mul23(cos_mod[i], input[k] - input[16 + k]);
  193. }
  194. static void mod64_b(int *input, int *output)
  195. {
  196. static const int cos_mod[16] = {
  197. 4199362, 4240198, 4323885, 4454708,
  198. 4639772, 4890013, 5221943, 5660703,
  199. 6245623, 7040975, 8158494, 9809974,
  200. 12450076, 17261920, 28585092, 85479984
  201. };
  202. int i, k;
  203. for (i = 0; i < 16; i++)
  204. input[16 + i] = mul23(cos_mod[i], input[16 + i]);
  205. for (i = 0; i < 16; i++)
  206. output[i] = input[i] + input[16 + i];
  207. for (i = 16, k = 15; i < 32; i++, k--)
  208. output[i] = input[k] - input[16 + k];
  209. }
  210. static void mod64_c(const int *input, int *output)
  211. {
  212. static const int cos_mod[64] = {
  213. 741511, 741958, 742853, 744199,
  214. 746001, 748262, 750992, 754197,
  215. 757888, 762077, 766777, 772003,
  216. 777772, 784105, 791021, 798546,
  217. 806707, 815532, 825054, 835311,
  218. 846342, 858193, 870912, 884554,
  219. 899181, 914860, 931667, 949686,
  220. 969011, 989747, 1012012, 1035941,
  221. -1061684, -1089412, -1119320, -1151629,
  222. -1186595, -1224511, -1265719, -1310613,
  223. -1359657, -1413400, -1472490, -1537703,
  224. -1609974, -1690442, -1780506, -1881904,
  225. -1996824, -2128058, -2279225, -2455101,
  226. -2662128, -2909200, -3208956, -3579983,
  227. -4050785, -4667404, -5509372, -6726913,
  228. -8641940, -12091426, -20144284, -60420720
  229. };
  230. int i, k;
  231. for (i = 0; i < 32; i++)
  232. output[i] = mul23(cos_mod[i], input[i] + input[32 + i]);
  233. for (i = 32, k = 31; i < 64; i++, k--)
  234. output[i] = mul23(cos_mod[i], input[k] - input[32 + k]);
  235. }
  236. static void imdct_half_64(int32_t *output, const int32_t *input)
  237. {
  238. int buf_a[64], buf_b[64];
  239. int i, k, mag, shift, round;
  240. mag = 0;
  241. for (i = 0; i < 64; i++)
  242. mag += abs(input[i]);
  243. shift = mag > 0x400000 ? 2 : 0;
  244. round = shift > 0 ? 1 << (shift - 1) : 0;
  245. for (i = 0; i < 64; i++)
  246. buf_a[i] = (input[i] + round) >> shift;
  247. sum_a(buf_a, buf_b + 0, 32);
  248. sum_b(buf_a, buf_b + 32, 32);
  249. clp_v(buf_b, 64);
  250. sum_a(buf_b + 0, buf_a + 0, 16);
  251. sum_b(buf_b + 0, buf_a + 16, 16);
  252. sum_c(buf_b + 32, buf_a + 32, 16);
  253. sum_d(buf_b + 32, buf_a + 48, 16);
  254. clp_v(buf_a, 64);
  255. sum_a(buf_a + 0, buf_b + 0, 8);
  256. sum_b(buf_a + 0, buf_b + 8, 8);
  257. sum_c(buf_a + 16, buf_b + 16, 8);
  258. sum_d(buf_a + 16, buf_b + 24, 8);
  259. sum_c(buf_a + 32, buf_b + 32, 8);
  260. sum_d(buf_a + 32, buf_b + 40, 8);
  261. sum_c(buf_a + 48, buf_b + 48, 8);
  262. sum_d(buf_a + 48, buf_b + 56, 8);
  263. clp_v(buf_b, 64);
  264. dct_a(buf_b + 0, buf_a + 0);
  265. dct_b(buf_b + 8, buf_a + 8);
  266. dct_b(buf_b + 16, buf_a + 16);
  267. dct_b(buf_b + 24, buf_a + 24);
  268. dct_b(buf_b + 32, buf_a + 32);
  269. dct_b(buf_b + 40, buf_a + 40);
  270. dct_b(buf_b + 48, buf_a + 48);
  271. dct_b(buf_b + 56, buf_a + 56);
  272. clp_v(buf_a, 64);
  273. mod_a(buf_a + 0, buf_b + 0);
  274. mod_b(buf_a + 16, buf_b + 16);
  275. mod_b(buf_a + 32, buf_b + 32);
  276. mod_b(buf_a + 48, buf_b + 48);
  277. clp_v(buf_b, 64);
  278. mod64_a(buf_b + 0, buf_a + 0);
  279. mod64_b(buf_b + 32, buf_a + 32);
  280. clp_v(buf_a, 64);
  281. mod64_c(buf_a, buf_b);
  282. for (i = 0; i < 64; i++)
  283. buf_b[i] = clip23(buf_b[i] * (1 << shift));
  284. for (i = 0, k = 63; i < 32; i++, k--) {
  285. output[ i] = clip23(buf_b[i] - buf_b[k]);
  286. output[32 + i] = clip23(buf_b[i] + buf_b[k]);
  287. }
  288. }
  289. av_cold void ff_dcadct_init(DCADCTContext *c)
  290. {
  291. c->imdct_half[0] = imdct_half_32;
  292. c->imdct_half[1] = imdct_half_64;
  293. }