You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1316 lines
42KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard
  4. * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5. *
  6. * This file is part of Libav.
  7. *
  8. * Libav is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * Libav is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with Libav; if not, write to the Free Software
  20. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21. */
  22. /**
  23. * @file
  24. * DSP utils
  25. */
  26. #include "libavutil/attributes.h"
  27. #include "libavutil/imgutils.h"
  28. #include "avcodec.h"
  29. #include "copy_block.h"
  30. #include "dct.h"
  31. #include "dsputil.h"
  32. #include "simple_idct.h"
  33. #include "faandct.h"
  34. #include "faanidct.h"
  35. #include "imgconvert.h"
  36. #include "mathops.h"
  37. #include "mpegvideo.h"
  38. #include "config.h"
  39. uint32_t ff_square_tab[512] = { 0, };
  40. #define BIT_DEPTH 16
  41. #include "dsputilenc_template.c"
  42. #undef BIT_DEPTH
  43. #define BIT_DEPTH 8
  44. #include "dsputilenc_template.c"
  45. av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
  46. const uint8_t *src_scantable)
  47. {
  48. int i, end;
  49. st->scantable = src_scantable;
  50. for (i = 0; i < 64; i++) {
  51. int j = src_scantable[i];
  52. st->permutated[i] = permutation[j];
  53. }
  54. end = -1;
  55. for (i = 0; i < 64; i++) {
  56. int j = st->permutated[i];
  57. if (j > end)
  58. end = j;
  59. st->raster_end[i] = end;
  60. }
  61. }
  62. av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
  63. int idct_permutation_type)
  64. {
  65. int i;
  66. if (ARCH_X86)
  67. if (ff_init_scantable_permutation_x86(idct_permutation,
  68. idct_permutation_type))
  69. return;
  70. switch (idct_permutation_type) {
  71. case FF_NO_IDCT_PERM:
  72. for (i = 0; i < 64; i++)
  73. idct_permutation[i] = i;
  74. break;
  75. case FF_LIBMPEG2_IDCT_PERM:
  76. for (i = 0; i < 64; i++)
  77. idct_permutation[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
  78. break;
  79. case FF_TRANSPOSE_IDCT_PERM:
  80. for (i = 0; i < 64; i++)
  81. idct_permutation[i] = ((i & 7) << 3) | (i >> 3);
  82. break;
  83. case FF_PARTTRANS_IDCT_PERM:
  84. for (i = 0; i < 64; i++)
  85. idct_permutation[i] = (i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3);
  86. break;
  87. default:
  88. av_log(NULL, AV_LOG_ERROR,
  89. "Internal error, IDCT permutation not set\n");
  90. }
  91. }
  92. static int pix_sum_c(uint8_t *pix, int line_size)
  93. {
  94. int s = 0, i, j;
  95. for (i = 0; i < 16; i++) {
  96. for (j = 0; j < 16; j += 8) {
  97. s += pix[0];
  98. s += pix[1];
  99. s += pix[2];
  100. s += pix[3];
  101. s += pix[4];
  102. s += pix[5];
  103. s += pix[6];
  104. s += pix[7];
  105. pix += 8;
  106. }
  107. pix += line_size - 16;
  108. }
  109. return s;
  110. }
  111. static int pix_norm1_c(uint8_t *pix, int line_size)
  112. {
  113. int s = 0, i, j;
  114. uint32_t *sq = ff_square_tab + 256;
  115. for (i = 0; i < 16; i++) {
  116. for (j = 0; j < 16; j += 8) {
  117. #if 0
  118. s += sq[pix[0]];
  119. s += sq[pix[1]];
  120. s += sq[pix[2]];
  121. s += sq[pix[3]];
  122. s += sq[pix[4]];
  123. s += sq[pix[5]];
  124. s += sq[pix[6]];
  125. s += sq[pix[7]];
  126. #else
  127. #if HAVE_FAST_64BIT
  128. register uint64_t x = *(uint64_t *) pix;
  129. s += sq[x & 0xff];
  130. s += sq[(x >> 8) & 0xff];
  131. s += sq[(x >> 16) & 0xff];
  132. s += sq[(x >> 24) & 0xff];
  133. s += sq[(x >> 32) & 0xff];
  134. s += sq[(x >> 40) & 0xff];
  135. s += sq[(x >> 48) & 0xff];
  136. s += sq[(x >> 56) & 0xff];
  137. #else
  138. register uint32_t x = *(uint32_t *) pix;
  139. s += sq[x & 0xff];
  140. s += sq[(x >> 8) & 0xff];
  141. s += sq[(x >> 16) & 0xff];
  142. s += sq[(x >> 24) & 0xff];
  143. x = *(uint32_t *) (pix + 4);
  144. s += sq[x & 0xff];
  145. s += sq[(x >> 8) & 0xff];
  146. s += sq[(x >> 16) & 0xff];
  147. s += sq[(x >> 24) & 0xff];
  148. #endif
  149. #endif
  150. pix += 8;
  151. }
  152. pix += line_size - 16;
  153. }
  154. return s;
  155. }
  156. static int sse4_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  157. int line_size, int h)
  158. {
  159. int s = 0, i;
  160. uint32_t *sq = ff_square_tab + 256;
  161. for (i = 0; i < h; i++) {
  162. s += sq[pix1[0] - pix2[0]];
  163. s += sq[pix1[1] - pix2[1]];
  164. s += sq[pix1[2] - pix2[2]];
  165. s += sq[pix1[3] - pix2[3]];
  166. pix1 += line_size;
  167. pix2 += line_size;
  168. }
  169. return s;
  170. }
  171. static int sse8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  172. int line_size, int h)
  173. {
  174. int s = 0, i;
  175. uint32_t *sq = ff_square_tab + 256;
  176. for (i = 0; i < h; i++) {
  177. s += sq[pix1[0] - pix2[0]];
  178. s += sq[pix1[1] - pix2[1]];
  179. s += sq[pix1[2] - pix2[2]];
  180. s += sq[pix1[3] - pix2[3]];
  181. s += sq[pix1[4] - pix2[4]];
  182. s += sq[pix1[5] - pix2[5]];
  183. s += sq[pix1[6] - pix2[6]];
  184. s += sq[pix1[7] - pix2[7]];
  185. pix1 += line_size;
  186. pix2 += line_size;
  187. }
  188. return s;
  189. }
  190. static int sse16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  191. int line_size, int h)
  192. {
  193. int s = 0, i;
  194. uint32_t *sq = ff_square_tab + 256;
  195. for (i = 0; i < h; i++) {
  196. s += sq[pix1[0] - pix2[0]];
  197. s += sq[pix1[1] - pix2[1]];
  198. s += sq[pix1[2] - pix2[2]];
  199. s += sq[pix1[3] - pix2[3]];
  200. s += sq[pix1[4] - pix2[4]];
  201. s += sq[pix1[5] - pix2[5]];
  202. s += sq[pix1[6] - pix2[6]];
  203. s += sq[pix1[7] - pix2[7]];
  204. s += sq[pix1[8] - pix2[8]];
  205. s += sq[pix1[9] - pix2[9]];
  206. s += sq[pix1[10] - pix2[10]];
  207. s += sq[pix1[11] - pix2[11]];
  208. s += sq[pix1[12] - pix2[12]];
  209. s += sq[pix1[13] - pix2[13]];
  210. s += sq[pix1[14] - pix2[14]];
  211. s += sq[pix1[15] - pix2[15]];
  212. pix1 += line_size;
  213. pix2 += line_size;
  214. }
  215. return s;
  216. }
  217. static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
  218. const uint8_t *s2, int stride)
  219. {
  220. int i;
  221. /* read the pixels */
  222. for (i = 0; i < 8; i++) {
  223. block[0] = s1[0] - s2[0];
  224. block[1] = s1[1] - s2[1];
  225. block[2] = s1[2] - s2[2];
  226. block[3] = s1[3] - s2[3];
  227. block[4] = s1[4] - s2[4];
  228. block[5] = s1[5] - s2[5];
  229. block[6] = s1[6] - s2[6];
  230. block[7] = s1[7] - s2[7];
  231. s1 += stride;
  232. s2 += stride;
  233. block += 8;
  234. }
  235. }
  236. static void put_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
  237. int line_size)
  238. {
  239. int i;
  240. /* read the pixels */
  241. for (i = 0; i < 8; i++) {
  242. pixels[0] = av_clip_uint8(block[0]);
  243. pixels[1] = av_clip_uint8(block[1]);
  244. pixels[2] = av_clip_uint8(block[2]);
  245. pixels[3] = av_clip_uint8(block[3]);
  246. pixels[4] = av_clip_uint8(block[4]);
  247. pixels[5] = av_clip_uint8(block[5]);
  248. pixels[6] = av_clip_uint8(block[6]);
  249. pixels[7] = av_clip_uint8(block[7]);
  250. pixels += line_size;
  251. block += 8;
  252. }
  253. }
  254. static void put_signed_pixels_clamped_c(const int16_t *block,
  255. uint8_t *restrict pixels,
  256. int line_size)
  257. {
  258. int i, j;
  259. for (i = 0; i < 8; i++) {
  260. for (j = 0; j < 8; j++) {
  261. if (*block < -128)
  262. *pixels = 0;
  263. else if (*block > 127)
  264. *pixels = 255;
  265. else
  266. *pixels = (uint8_t) (*block + 128);
  267. block++;
  268. pixels++;
  269. }
  270. pixels += (line_size - 8);
  271. }
  272. }
  273. static void add_pixels_clamped_c(const int16_t *block, uint8_t *restrict pixels,
  274. int line_size)
  275. {
  276. int i;
  277. /* read the pixels */
  278. for (i = 0; i < 8; i++) {
  279. pixels[0] = av_clip_uint8(pixels[0] + block[0]);
  280. pixels[1] = av_clip_uint8(pixels[1] + block[1]);
  281. pixels[2] = av_clip_uint8(pixels[2] + block[2]);
  282. pixels[3] = av_clip_uint8(pixels[3] + block[3]);
  283. pixels[4] = av_clip_uint8(pixels[4] + block[4]);
  284. pixels[5] = av_clip_uint8(pixels[5] + block[5]);
  285. pixels[6] = av_clip_uint8(pixels[6] + block[6]);
  286. pixels[7] = av_clip_uint8(pixels[7] + block[7]);
  287. pixels += line_size;
  288. block += 8;
  289. }
  290. }
  291. static int sum_abs_dctelem_c(int16_t *block)
  292. {
  293. int sum = 0, i;
  294. for (i = 0; i < 64; i++)
  295. sum += FFABS(block[i]);
  296. return sum;
  297. }
  298. #define avg2(a, b) ((a + b + 1) >> 1)
  299. #define avg4(a, b, c, d) ((a + b + c + d + 2) >> 2)
  300. static inline int pix_abs16_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  301. int line_size, int h)
  302. {
  303. int s = 0, i;
  304. for (i = 0; i < h; i++) {
  305. s += abs(pix1[0] - pix2[0]);
  306. s += abs(pix1[1] - pix2[1]);
  307. s += abs(pix1[2] - pix2[2]);
  308. s += abs(pix1[3] - pix2[3]);
  309. s += abs(pix1[4] - pix2[4]);
  310. s += abs(pix1[5] - pix2[5]);
  311. s += abs(pix1[6] - pix2[6]);
  312. s += abs(pix1[7] - pix2[7]);
  313. s += abs(pix1[8] - pix2[8]);
  314. s += abs(pix1[9] - pix2[9]);
  315. s += abs(pix1[10] - pix2[10]);
  316. s += abs(pix1[11] - pix2[11]);
  317. s += abs(pix1[12] - pix2[12]);
  318. s += abs(pix1[13] - pix2[13]);
  319. s += abs(pix1[14] - pix2[14]);
  320. s += abs(pix1[15] - pix2[15]);
  321. pix1 += line_size;
  322. pix2 += line_size;
  323. }
  324. return s;
  325. }
  326. static int pix_abs16_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  327. int line_size, int h)
  328. {
  329. int s = 0, i;
  330. for (i = 0; i < h; i++) {
  331. s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
  332. s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
  333. s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
  334. s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
  335. s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
  336. s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
  337. s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
  338. s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
  339. s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
  340. s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
  341. s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
  342. s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
  343. s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
  344. s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
  345. s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
  346. s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
  347. pix1 += line_size;
  348. pix2 += line_size;
  349. }
  350. return s;
  351. }
  352. static int pix_abs16_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  353. int line_size, int h)
  354. {
  355. int s = 0, i;
  356. uint8_t *pix3 = pix2 + line_size;
  357. for (i = 0; i < h; i++) {
  358. s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
  359. s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
  360. s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
  361. s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
  362. s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
  363. s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
  364. s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
  365. s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
  366. s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
  367. s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
  368. s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
  369. s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
  370. s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
  371. s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
  372. s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
  373. s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
  374. pix1 += line_size;
  375. pix2 += line_size;
  376. pix3 += line_size;
  377. }
  378. return s;
  379. }
  380. static int pix_abs16_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  381. int line_size, int h)
  382. {
  383. int s = 0, i;
  384. uint8_t *pix3 = pix2 + line_size;
  385. for (i = 0; i < h; i++) {
  386. s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
  387. s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
  388. s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
  389. s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
  390. s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
  391. s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
  392. s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
  393. s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
  394. s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
  395. s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
  396. s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
  397. s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
  398. s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
  399. s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
  400. s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
  401. s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
  402. pix1 += line_size;
  403. pix2 += line_size;
  404. pix3 += line_size;
  405. }
  406. return s;
  407. }
  408. static inline int pix_abs8_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  409. int line_size, int h)
  410. {
  411. int s = 0, i;
  412. for (i = 0; i < h; i++) {
  413. s += abs(pix1[0] - pix2[0]);
  414. s += abs(pix1[1] - pix2[1]);
  415. s += abs(pix1[2] - pix2[2]);
  416. s += abs(pix1[3] - pix2[3]);
  417. s += abs(pix1[4] - pix2[4]);
  418. s += abs(pix1[5] - pix2[5]);
  419. s += abs(pix1[6] - pix2[6]);
  420. s += abs(pix1[7] - pix2[7]);
  421. pix1 += line_size;
  422. pix2 += line_size;
  423. }
  424. return s;
  425. }
  426. static int pix_abs8_x2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  427. int line_size, int h)
  428. {
  429. int s = 0, i;
  430. for (i = 0; i < h; i++) {
  431. s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
  432. s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
  433. s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
  434. s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
  435. s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
  436. s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
  437. s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
  438. s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
  439. pix1 += line_size;
  440. pix2 += line_size;
  441. }
  442. return s;
  443. }
  444. static int pix_abs8_y2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  445. int line_size, int h)
  446. {
  447. int s = 0, i;
  448. uint8_t *pix3 = pix2 + line_size;
  449. for (i = 0; i < h; i++) {
  450. s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
  451. s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
  452. s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
  453. s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
  454. s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
  455. s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
  456. s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
  457. s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
  458. pix1 += line_size;
  459. pix2 += line_size;
  460. pix3 += line_size;
  461. }
  462. return s;
  463. }
  464. static int pix_abs8_xy2_c(MpegEncContext *v, uint8_t *pix1, uint8_t *pix2,
  465. int line_size, int h)
  466. {
  467. int s = 0, i;
  468. uint8_t *pix3 = pix2 + line_size;
  469. for (i = 0; i < h; i++) {
  470. s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
  471. s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
  472. s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
  473. s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
  474. s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
  475. s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
  476. s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
  477. s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
  478. pix1 += line_size;
  479. pix2 += line_size;
  480. pix3 += line_size;
  481. }
  482. return s;
  483. }
  484. static int nsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h)
  485. {
  486. int score1 = 0, score2 = 0, x, y;
  487. for (y = 0; y < h; y++) {
  488. for (x = 0; x < 16; x++)
  489. score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
  490. if (y + 1 < h) {
  491. for (x = 0; x < 15; x++)
  492. score2 += FFABS(s1[x] - s1[x + stride] -
  493. s1[x + 1] + s1[x + stride + 1]) -
  494. FFABS(s2[x] - s2[x + stride] -
  495. s2[x + 1] + s2[x + stride + 1]);
  496. }
  497. s1 += stride;
  498. s2 += stride;
  499. }
  500. if (c)
  501. return score1 + FFABS(score2) * c->avctx->nsse_weight;
  502. else
  503. return score1 + FFABS(score2) * 8;
  504. }
  505. static int nsse8_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2, int stride, int h)
  506. {
  507. int score1 = 0, score2 = 0, x, y;
  508. for (y = 0; y < h; y++) {
  509. for (x = 0; x < 8; x++)
  510. score1 += (s1[x] - s2[x]) * (s1[x] - s2[x]);
  511. if (y + 1 < h) {
  512. for (x = 0; x < 7; x++)
  513. score2 += FFABS(s1[x] - s1[x + stride] -
  514. s1[x + 1] + s1[x + stride + 1]) -
  515. FFABS(s2[x] - s2[x + stride] -
  516. s2[x + 1] + s2[x + stride + 1]);
  517. }
  518. s1 += stride;
  519. s2 += stride;
  520. }
  521. if (c)
  522. return score1 + FFABS(score2) * c->avctx->nsse_weight;
  523. else
  524. return score1 + FFABS(score2) * 8;
  525. }
  526. static int try_8x8basis_c(int16_t rem[64], int16_t weight[64],
  527. int16_t basis[64], int scale)
  528. {
  529. int i;
  530. unsigned int sum = 0;
  531. for (i = 0; i < 8 * 8; i++) {
  532. int b = rem[i] + ((basis[i] * scale +
  533. (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
  534. (BASIS_SHIFT - RECON_SHIFT));
  535. int w = weight[i];
  536. b >>= RECON_SHIFT;
  537. assert(-512 < b && b < 512);
  538. sum += (w * b) * (w * b) >> 4;
  539. }
  540. return sum >> 2;
  541. }
  542. static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale)
  543. {
  544. int i;
  545. for (i = 0; i < 8 * 8; i++)
  546. rem[i] += (basis[i] * scale +
  547. (1 << (BASIS_SHIFT - RECON_SHIFT - 1))) >>
  548. (BASIS_SHIFT - RECON_SHIFT);
  549. }
  550. static int zero_cmp(MpegEncContext *s, uint8_t *a, uint8_t *b,
  551. int stride, int h)
  552. {
  553. return 0;
  554. }
  555. void ff_set_cmp(DSPContext *c, me_cmp_func *cmp, int type)
  556. {
  557. int i;
  558. memset(cmp, 0, sizeof(void *) * 6);
  559. for (i = 0; i < 6; i++) {
  560. switch (type & 0xFF) {
  561. case FF_CMP_SAD:
  562. cmp[i] = c->sad[i];
  563. break;
  564. case FF_CMP_SATD:
  565. cmp[i] = c->hadamard8_diff[i];
  566. break;
  567. case FF_CMP_SSE:
  568. cmp[i] = c->sse[i];
  569. break;
  570. case FF_CMP_DCT:
  571. cmp[i] = c->dct_sad[i];
  572. break;
  573. case FF_CMP_DCT264:
  574. cmp[i] = c->dct264_sad[i];
  575. break;
  576. case FF_CMP_DCTMAX:
  577. cmp[i] = c->dct_max[i];
  578. break;
  579. case FF_CMP_PSNR:
  580. cmp[i] = c->quant_psnr[i];
  581. break;
  582. case FF_CMP_BIT:
  583. cmp[i] = c->bit[i];
  584. break;
  585. case FF_CMP_RD:
  586. cmp[i] = c->rd[i];
  587. break;
  588. case FF_CMP_VSAD:
  589. cmp[i] = c->vsad[i];
  590. break;
  591. case FF_CMP_VSSE:
  592. cmp[i] = c->vsse[i];
  593. break;
  594. case FF_CMP_ZERO:
  595. cmp[i] = zero_cmp;
  596. break;
  597. case FF_CMP_NSSE:
  598. cmp[i] = c->nsse[i];
  599. break;
  600. default:
  601. av_log(NULL, AV_LOG_ERROR,
  602. "internal error in cmp function selection\n");
  603. }
  604. }
  605. }
  606. #define BUTTERFLY2(o1, o2, i1, i2) \
  607. o1 = (i1) + (i2); \
  608. o2 = (i1) - (i2);
  609. #define BUTTERFLY1(x, y) \
  610. { \
  611. int a, b; \
  612. a = x; \
  613. b = y; \
  614. x = a + b; \
  615. y = a - b; \
  616. }
  617. #define BUTTERFLYA(x, y) (FFABS((x) + (y)) + FFABS((x) - (y)))
  618. static int hadamard8_diff8x8_c(MpegEncContext *s, uint8_t *dst,
  619. uint8_t *src, int stride, int h)
  620. {
  621. int i, temp[64], sum = 0;
  622. assert(h == 8);
  623. for (i = 0; i < 8; i++) {
  624. // FIXME: try pointer walks
  625. BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
  626. src[stride * i + 0] - dst[stride * i + 0],
  627. src[stride * i + 1] - dst[stride * i + 1]);
  628. BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
  629. src[stride * i + 2] - dst[stride * i + 2],
  630. src[stride * i + 3] - dst[stride * i + 3]);
  631. BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
  632. src[stride * i + 4] - dst[stride * i + 4],
  633. src[stride * i + 5] - dst[stride * i + 5]);
  634. BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
  635. src[stride * i + 6] - dst[stride * i + 6],
  636. src[stride * i + 7] - dst[stride * i + 7]);
  637. BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
  638. BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
  639. BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
  640. BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
  641. BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
  642. BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
  643. BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
  644. BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
  645. }
  646. for (i = 0; i < 8; i++) {
  647. BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
  648. BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
  649. BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
  650. BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
  651. BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
  652. BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
  653. BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
  654. BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
  655. sum += BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i]) +
  656. BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i]) +
  657. BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i]) +
  658. BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
  659. }
  660. return sum;
  661. }
  662. static int hadamard8_intra8x8_c(MpegEncContext *s, uint8_t *src,
  663. uint8_t *dummy, int stride, int h)
  664. {
  665. int i, temp[64], sum = 0;
  666. assert(h == 8);
  667. for (i = 0; i < 8; i++) {
  668. // FIXME: try pointer walks
  669. BUTTERFLY2(temp[8 * i + 0], temp[8 * i + 1],
  670. src[stride * i + 0], src[stride * i + 1]);
  671. BUTTERFLY2(temp[8 * i + 2], temp[8 * i + 3],
  672. src[stride * i + 2], src[stride * i + 3]);
  673. BUTTERFLY2(temp[8 * i + 4], temp[8 * i + 5],
  674. src[stride * i + 4], src[stride * i + 5]);
  675. BUTTERFLY2(temp[8 * i + 6], temp[8 * i + 7],
  676. src[stride * i + 6], src[stride * i + 7]);
  677. BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 2]);
  678. BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 3]);
  679. BUTTERFLY1(temp[8 * i + 4], temp[8 * i + 6]);
  680. BUTTERFLY1(temp[8 * i + 5], temp[8 * i + 7]);
  681. BUTTERFLY1(temp[8 * i + 0], temp[8 * i + 4]);
  682. BUTTERFLY1(temp[8 * i + 1], temp[8 * i + 5]);
  683. BUTTERFLY1(temp[8 * i + 2], temp[8 * i + 6]);
  684. BUTTERFLY1(temp[8 * i + 3], temp[8 * i + 7]);
  685. }
  686. for (i = 0; i < 8; i++) {
  687. BUTTERFLY1(temp[8 * 0 + i], temp[8 * 1 + i]);
  688. BUTTERFLY1(temp[8 * 2 + i], temp[8 * 3 + i]);
  689. BUTTERFLY1(temp[8 * 4 + i], temp[8 * 5 + i]);
  690. BUTTERFLY1(temp[8 * 6 + i], temp[8 * 7 + i]);
  691. BUTTERFLY1(temp[8 * 0 + i], temp[8 * 2 + i]);
  692. BUTTERFLY1(temp[8 * 1 + i], temp[8 * 3 + i]);
  693. BUTTERFLY1(temp[8 * 4 + i], temp[8 * 6 + i]);
  694. BUTTERFLY1(temp[8 * 5 + i], temp[8 * 7 + i]);
  695. sum +=
  696. BUTTERFLYA(temp[8 * 0 + i], temp[8 * 4 + i])
  697. + BUTTERFLYA(temp[8 * 1 + i], temp[8 * 5 + i])
  698. + BUTTERFLYA(temp[8 * 2 + i], temp[8 * 6 + i])
  699. + BUTTERFLYA(temp[8 * 3 + i], temp[8 * 7 + i]);
  700. }
  701. sum -= FFABS(temp[8 * 0] + temp[8 * 4]); // -mean
  702. return sum;
  703. }
  704. static int dct_sad8x8_c(MpegEncContext *s, uint8_t *src1,
  705. uint8_t *src2, int stride, int h)
  706. {
  707. LOCAL_ALIGNED_16(int16_t, temp, [64]);
  708. assert(h == 8);
  709. s->dsp.diff_pixels(temp, src1, src2, stride);
  710. s->dsp.fdct(temp);
  711. return s->dsp.sum_abs_dctelem(temp);
  712. }
  713. #if CONFIG_GPL
  714. #define DCT8_1D \
  715. { \
  716. const int s07 = SRC(0) + SRC(7); \
  717. const int s16 = SRC(1) + SRC(6); \
  718. const int s25 = SRC(2) + SRC(5); \
  719. const int s34 = SRC(3) + SRC(4); \
  720. const int a0 = s07 + s34; \
  721. const int a1 = s16 + s25; \
  722. const int a2 = s07 - s34; \
  723. const int a3 = s16 - s25; \
  724. const int d07 = SRC(0) - SRC(7); \
  725. const int d16 = SRC(1) - SRC(6); \
  726. const int d25 = SRC(2) - SRC(5); \
  727. const int d34 = SRC(3) - SRC(4); \
  728. const int a4 = d16 + d25 + (d07 + (d07 >> 1)); \
  729. const int a5 = d07 - d34 - (d25 + (d25 >> 1)); \
  730. const int a6 = d07 + d34 - (d16 + (d16 >> 1)); \
  731. const int a7 = d16 - d25 + (d34 + (d34 >> 1)); \
  732. DST(0, a0 + a1); \
  733. DST(1, a4 + (a7 >> 2)); \
  734. DST(2, a2 + (a3 >> 1)); \
  735. DST(3, a5 + (a6 >> 2)); \
  736. DST(4, a0 - a1); \
  737. DST(5, a6 - (a5 >> 2)); \
  738. DST(6, (a2 >> 1) - a3); \
  739. DST(7, (a4 >> 2) - a7); \
  740. }
  741. static int dct264_sad8x8_c(MpegEncContext *s, uint8_t *src1,
  742. uint8_t *src2, int stride, int h)
  743. {
  744. int16_t dct[8][8];
  745. int i, sum = 0;
  746. s->dsp.diff_pixels(dct[0], src1, src2, stride);
  747. #define SRC(x) dct[i][x]
  748. #define DST(x, v) dct[i][x] = v
  749. for (i = 0; i < 8; i++)
  750. DCT8_1D
  751. #undef SRC
  752. #undef DST
  753. #define SRC(x) dct[x][i]
  754. #define DST(x, v) sum += FFABS(v)
  755. for (i = 0; i < 8; i++)
  756. DCT8_1D
  757. #undef SRC
  758. #undef DST
  759. return sum;
  760. }
  761. #endif
  762. static int dct_max8x8_c(MpegEncContext *s, uint8_t *src1,
  763. uint8_t *src2, int stride, int h)
  764. {
  765. LOCAL_ALIGNED_16(int16_t, temp, [64]);
  766. int sum = 0, i;
  767. assert(h == 8);
  768. s->dsp.diff_pixels(temp, src1, src2, stride);
  769. s->dsp.fdct(temp);
  770. for (i = 0; i < 64; i++)
  771. sum = FFMAX(sum, FFABS(temp[i]));
  772. return sum;
  773. }
  774. static int quant_psnr8x8_c(MpegEncContext *s, uint8_t *src1,
  775. uint8_t *src2, int stride, int h)
  776. {
  777. LOCAL_ALIGNED_16(int16_t, temp, [64 * 2]);
  778. int16_t *const bak = temp + 64;
  779. int sum = 0, i;
  780. assert(h == 8);
  781. s->mb_intra = 0;
  782. s->dsp.diff_pixels(temp, src1, src2, stride);
  783. memcpy(bak, temp, 64 * sizeof(int16_t));
  784. s->block_last_index[0 /* FIXME */] =
  785. s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
  786. s->dct_unquantize_inter(s, temp, 0, s->qscale);
  787. ff_simple_idct_8(temp); // FIXME
  788. for (i = 0; i < 64; i++)
  789. sum += (temp[i] - bak[i]) * (temp[i] - bak[i]);
  790. return sum;
  791. }
  792. static int rd8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
  793. int stride, int h)
  794. {
  795. const uint8_t *scantable = s->intra_scantable.permutated;
  796. LOCAL_ALIGNED_16(int16_t, temp, [64]);
  797. LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
  798. LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
  799. int i, last, run, bits, level, distortion, start_i;
  800. const int esc_length = s->ac_esc_length;
  801. uint8_t *length, *last_length;
  802. assert(h == 8);
  803. copy_block8(lsrc1, src1, 8, stride, 8);
  804. copy_block8(lsrc2, src2, 8, stride, 8);
  805. s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
  806. s->block_last_index[0 /* FIXME */] =
  807. last =
  808. s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
  809. bits = 0;
  810. if (s->mb_intra) {
  811. start_i = 1;
  812. length = s->intra_ac_vlc_length;
  813. last_length = s->intra_ac_vlc_last_length;
  814. bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
  815. } else {
  816. start_i = 0;
  817. length = s->inter_ac_vlc_length;
  818. last_length = s->inter_ac_vlc_last_length;
  819. }
  820. if (last >= start_i) {
  821. run = 0;
  822. for (i = start_i; i < last; i++) {
  823. int j = scantable[i];
  824. level = temp[j];
  825. if (level) {
  826. level += 64;
  827. if ((level & (~127)) == 0)
  828. bits += length[UNI_AC_ENC_INDEX(run, level)];
  829. else
  830. bits += esc_length;
  831. run = 0;
  832. } else
  833. run++;
  834. }
  835. i = scantable[last];
  836. level = temp[i] + 64;
  837. assert(level - 64);
  838. if ((level & (~127)) == 0) {
  839. bits += last_length[UNI_AC_ENC_INDEX(run, level)];
  840. } else
  841. bits += esc_length;
  842. }
  843. if (last >= 0) {
  844. if (s->mb_intra)
  845. s->dct_unquantize_intra(s, temp, 0, s->qscale);
  846. else
  847. s->dct_unquantize_inter(s, temp, 0, s->qscale);
  848. }
  849. s->dsp.idct_add(lsrc2, 8, temp);
  850. distortion = s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
  851. return distortion + ((bits * s->qscale * s->qscale * 109 + 64) >> 7);
  852. }
  853. static int bit8x8_c(MpegEncContext *s, uint8_t *src1, uint8_t *src2,
  854. int stride, int h)
  855. {
  856. const uint8_t *scantable = s->intra_scantable.permutated;
  857. LOCAL_ALIGNED_16(int16_t, temp, [64]);
  858. int i, last, run, bits, level, start_i;
  859. const int esc_length = s->ac_esc_length;
  860. uint8_t *length, *last_length;
  861. assert(h == 8);
  862. s->dsp.diff_pixels(temp, src1, src2, stride);
  863. s->block_last_index[0 /* FIXME */] =
  864. last =
  865. s->fast_dct_quantize(s, temp, 0 /* FIXME */, s->qscale, &i);
  866. bits = 0;
  867. if (s->mb_intra) {
  868. start_i = 1;
  869. length = s->intra_ac_vlc_length;
  870. last_length = s->intra_ac_vlc_last_length;
  871. bits += s->luma_dc_vlc_length[temp[0] + 256]; // FIXME: chroma
  872. } else {
  873. start_i = 0;
  874. length = s->inter_ac_vlc_length;
  875. last_length = s->inter_ac_vlc_last_length;
  876. }
  877. if (last >= start_i) {
  878. run = 0;
  879. for (i = start_i; i < last; i++) {
  880. int j = scantable[i];
  881. level = temp[j];
  882. if (level) {
  883. level += 64;
  884. if ((level & (~127)) == 0)
  885. bits += length[UNI_AC_ENC_INDEX(run, level)];
  886. else
  887. bits += esc_length;
  888. run = 0;
  889. } else
  890. run++;
  891. }
  892. i = scantable[last];
  893. level = temp[i] + 64;
  894. assert(level - 64);
  895. if ((level & (~127)) == 0)
  896. bits += last_length[UNI_AC_ENC_INDEX(run, level)];
  897. else
  898. bits += esc_length;
  899. }
  900. return bits;
  901. }
  902. #define VSAD_INTRA(size) \
  903. static int vsad_intra ## size ## _c(MpegEncContext *c, \
  904. uint8_t *s, uint8_t *dummy, \
  905. int stride, int h) \
  906. { \
  907. int score = 0, x, y; \
  908. \
  909. for (y = 1; y < h; y++) { \
  910. for (x = 0; x < size; x += 4) { \
  911. score += FFABS(s[x] - s[x + stride]) + \
  912. FFABS(s[x + 1] - s[x + stride + 1]) + \
  913. FFABS(s[x + 2] - s[x + 2 + stride]) + \
  914. FFABS(s[x + 3] - s[x + 3 + stride]); \
  915. } \
  916. s += stride; \
  917. } \
  918. \
  919. return score; \
  920. }
  921. VSAD_INTRA(8)
  922. VSAD_INTRA(16)
  923. static int vsad16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
  924. int stride, int h)
  925. {
  926. int score = 0, x, y;
  927. for (y = 1; y < h; y++) {
  928. for (x = 0; x < 16; x++)
  929. score += FFABS(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);
  930. s1 += stride;
  931. s2 += stride;
  932. }
  933. return score;
  934. }
  935. #define SQ(a) ((a) * (a))
  936. #define VSSE_INTRA(size) \
  937. static int vsse_intra ## size ## _c(MpegEncContext *c, \
  938. uint8_t *s, uint8_t *dummy, \
  939. int stride, int h) \
  940. { \
  941. int score = 0, x, y; \
  942. \
  943. for (y = 1; y < h; y++) { \
  944. for (x = 0; x < size; x += 4) { \
  945. score += SQ(s[x] - s[x + stride]) + \
  946. SQ(s[x + 1] - s[x + stride + 1]) + \
  947. SQ(s[x + 2] - s[x + stride + 2]) + \
  948. SQ(s[x + 3] - s[x + stride + 3]); \
  949. } \
  950. s += stride; \
  951. } \
  952. \
  953. return score; \
  954. }
  955. VSSE_INTRA(8)
  956. VSSE_INTRA(16)
  957. static int vsse16_c(MpegEncContext *c, uint8_t *s1, uint8_t *s2,
  958. int stride, int h)
  959. {
  960. int score = 0, x, y;
  961. for (y = 1; y < h; y++) {
  962. for (x = 0; x < 16; x++)
  963. score += SQ(s1[x] - s2[x] - s1[x + stride] + s2[x + stride]);
  964. s1 += stride;
  965. s2 += stride;
  966. }
  967. return score;
  968. }
  969. #define WRAPPER8_16_SQ(name8, name16) \
  970. static int name16(MpegEncContext *s, uint8_t *dst, uint8_t *src, \
  971. int stride, int h) \
  972. { \
  973. int score = 0; \
  974. \
  975. score += name8(s, dst, src, stride, 8); \
  976. score += name8(s, dst + 8, src + 8, stride, 8); \
  977. if (h == 16) { \
  978. dst += 8 * stride; \
  979. src += 8 * stride; \
  980. score += name8(s, dst, src, stride, 8); \
  981. score += name8(s, dst + 8, src + 8, stride, 8); \
  982. } \
  983. return score; \
  984. }
  985. WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
  986. WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
  987. WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
  988. #if CONFIG_GPL
  989. WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
  990. #endif
  991. WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
  992. WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
  993. WRAPPER8_16_SQ(rd8x8_c, rd16_c)
  994. WRAPPER8_16_SQ(bit8x8_c, bit16_c)
  995. static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
  996. {
  997. ff_j_rev_dct(block);
  998. put_pixels_clamped_c(block, dest, line_size);
  999. }
  1000. static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
  1001. {
  1002. ff_j_rev_dct(block);
  1003. add_pixels_clamped_c(block, dest, line_size);
  1004. }
  1005. /* draw the edges of width 'w' of an image of size width, height */
  1006. // FIXME: Check that this is OK for MPEG-4 interlaced.
  1007. static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
  1008. int w, int h, int sides)
  1009. {
  1010. uint8_t *ptr = buf, *last_line;
  1011. int i;
  1012. /* left and right */
  1013. for (i = 0; i < height; i++) {
  1014. memset(ptr - w, ptr[0], w);
  1015. memset(ptr + width, ptr[width - 1], w);
  1016. ptr += wrap;
  1017. }
  1018. /* top and bottom + corners */
  1019. buf -= w;
  1020. last_line = buf + (height - 1) * wrap;
  1021. if (sides & EDGE_TOP)
  1022. for (i = 0; i < h; i++)
  1023. // top
  1024. memcpy(buf - (i + 1) * wrap, buf, width + w + w);
  1025. if (sides & EDGE_BOTTOM)
  1026. for (i = 0; i < h; i++)
  1027. // bottom
  1028. memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
  1029. }
  1030. /* init static data */
  1031. av_cold void ff_dsputil_static_init(void)
  1032. {
  1033. int i;
  1034. for (i = 0; i < 512; i++)
  1035. ff_square_tab[i] = (i - 256) * (i - 256);
  1036. }
  1037. av_cold void ff_dsputil_init(DSPContext *c, AVCodecContext *avctx)
  1038. {
  1039. const unsigned high_bit_depth = avctx->bits_per_raw_sample > 8;
  1040. #if CONFIG_ENCODERS
  1041. if (avctx->bits_per_raw_sample == 10) {
  1042. c->fdct = ff_jpeg_fdct_islow_10;
  1043. c->fdct248 = ff_fdct248_islow_10;
  1044. } else {
  1045. if (avctx->dct_algo == FF_DCT_FASTINT) {
  1046. c->fdct = ff_fdct_ifast;
  1047. c->fdct248 = ff_fdct_ifast248;
  1048. } else if (avctx->dct_algo == FF_DCT_FAAN) {
  1049. c->fdct = ff_faandct;
  1050. c->fdct248 = ff_faandct248;
  1051. } else {
  1052. c->fdct = ff_jpeg_fdct_islow_8; // slow/accurate/default
  1053. c->fdct248 = ff_fdct248_islow_8;
  1054. }
  1055. }
  1056. #endif /* CONFIG_ENCODERS */
  1057. if (avctx->bits_per_raw_sample == 10) {
  1058. c->idct_put = ff_simple_idct_put_10;
  1059. c->idct_add = ff_simple_idct_add_10;
  1060. c->idct = ff_simple_idct_10;
  1061. c->idct_permutation_type = FF_NO_IDCT_PERM;
  1062. } else {
  1063. if (avctx->idct_algo == FF_IDCT_INT) {
  1064. c->idct_put = jref_idct_put;
  1065. c->idct_add = jref_idct_add;
  1066. c->idct = ff_j_rev_dct;
  1067. c->idct_permutation_type = FF_LIBMPEG2_IDCT_PERM;
  1068. } else if (avctx->idct_algo == FF_IDCT_FAAN) {
  1069. c->idct_put = ff_faanidct_put;
  1070. c->idct_add = ff_faanidct_add;
  1071. c->idct = ff_faanidct;
  1072. c->idct_permutation_type = FF_NO_IDCT_PERM;
  1073. } else { // accurate/default
  1074. c->idct_put = ff_simple_idct_put_8;
  1075. c->idct_add = ff_simple_idct_add_8;
  1076. c->idct = ff_simple_idct_8;
  1077. c->idct_permutation_type = FF_NO_IDCT_PERM;
  1078. }
  1079. }
  1080. c->diff_pixels = diff_pixels_c;
  1081. c->put_pixels_clamped = put_pixels_clamped_c;
  1082. c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
  1083. c->add_pixels_clamped = add_pixels_clamped_c;
  1084. c->sum_abs_dctelem = sum_abs_dctelem_c;
  1085. c->pix_sum = pix_sum_c;
  1086. c->pix_norm1 = pix_norm1_c;
  1087. /* TODO [0] 16 [1] 8 */
  1088. c->pix_abs[0][0] = pix_abs16_c;
  1089. c->pix_abs[0][1] = pix_abs16_x2_c;
  1090. c->pix_abs[0][2] = pix_abs16_y2_c;
  1091. c->pix_abs[0][3] = pix_abs16_xy2_c;
  1092. c->pix_abs[1][0] = pix_abs8_c;
  1093. c->pix_abs[1][1] = pix_abs8_x2_c;
  1094. c->pix_abs[1][2] = pix_abs8_y2_c;
  1095. c->pix_abs[1][3] = pix_abs8_xy2_c;
  1096. #define SET_CMP_FUNC(name) \
  1097. c->name[0] = name ## 16_c; \
  1098. c->name[1] = name ## 8x8_c;
  1099. SET_CMP_FUNC(hadamard8_diff)
  1100. c->hadamard8_diff[4] = hadamard8_intra16_c;
  1101. c->hadamard8_diff[5] = hadamard8_intra8x8_c;
  1102. SET_CMP_FUNC(dct_sad)
  1103. SET_CMP_FUNC(dct_max)
  1104. #if CONFIG_GPL
  1105. SET_CMP_FUNC(dct264_sad)
  1106. #endif
  1107. c->sad[0] = pix_abs16_c;
  1108. c->sad[1] = pix_abs8_c;
  1109. c->sse[0] = sse16_c;
  1110. c->sse[1] = sse8_c;
  1111. c->sse[2] = sse4_c;
  1112. SET_CMP_FUNC(quant_psnr)
  1113. SET_CMP_FUNC(rd)
  1114. SET_CMP_FUNC(bit)
  1115. c->vsad[0] = vsad16_c;
  1116. c->vsad[4] = vsad_intra16_c;
  1117. c->vsad[5] = vsad_intra8_c;
  1118. c->vsse[0] = vsse16_c;
  1119. c->vsse[4] = vsse_intra16_c;
  1120. c->vsse[5] = vsse_intra8_c;
  1121. c->nsse[0] = nsse16_c;
  1122. c->nsse[1] = nsse8_c;
  1123. c->try_8x8basis = try_8x8basis_c;
  1124. c->add_8x8basis = add_8x8basis_c;
  1125. c->shrink[0] = av_image_copy_plane;
  1126. c->shrink[1] = ff_shrink22;
  1127. c->shrink[2] = ff_shrink44;
  1128. c->shrink[3] = ff_shrink88;
  1129. c->draw_edges = draw_edges_8_c;
  1130. switch (avctx->bits_per_raw_sample) {
  1131. case 9:
  1132. case 10:
  1133. c->get_pixels = get_pixels_16_c;
  1134. break;
  1135. default:
  1136. c->get_pixels = get_pixels_8_c;
  1137. break;
  1138. }
  1139. if (ARCH_ARM)
  1140. ff_dsputil_init_arm(c, avctx, high_bit_depth);
  1141. if (ARCH_PPC)
  1142. ff_dsputil_init_ppc(c, avctx, high_bit_depth);
  1143. if (ARCH_X86)
  1144. ff_dsputil_init_x86(c, avctx, high_bit_depth);
  1145. ff_init_scantable_permutation(c->idct_permutation,
  1146. c->idct_permutation_type);
  1147. }