You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1720 lines
72KB

  1. /*
  2. * DSP utils
  3. * Copyright (c) 2000, 2001 Fabrice Bellard.
  4. *
  5. * This library is free software; you can redistribute it and/or
  6. * modify it under the terms of the GNU Lesser General Public
  7. * License as published by the Free Software Foundation; either
  8. * version 2 of the License, or (at your option) any later version.
  9. *
  10. * This library is distributed in the hope that it will be useful,
  11. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  13. * Lesser General Public License for more details.
  14. *
  15. * You should have received a copy of the GNU Lesser General Public
  16. * License along with this library; if not, write to the Free Software
  17. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  18. *
  19. * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  20. */
  21. #include "avcodec.h"
  22. #include "dsputil.h"
  23. /*
  24. void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
  25. void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
  26. void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
  27. void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
  28. void (*ff_gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
  29. void (*ff_gmc )(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
  30. int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
  31. void (*clear_blocks)(DCTELEM *blocks);
  32. int (*pix_sum)(UINT8 * pix, int line_size);
  33. int (*pix_norm1)(UINT8 * pix, int line_size);
  34. op_pixels_abs_func pix_abs16x16;
  35. op_pixels_abs_func pix_abs16x16_x2;
  36. op_pixels_abs_func pix_abs16x16_y2;
  37. op_pixels_abs_func pix_abs16x16_xy2;
  38. op_pixels_abs_func pix_abs8x8;
  39. op_pixels_abs_func pix_abs8x8_x2;
  40. op_pixels_abs_func pix_abs8x8_y2;
  41. op_pixels_abs_func pix_abs8x8_xy2;
  42. */
  43. int ff_bit_exact=0;
  44. UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
  45. UINT32 squareTbl[512];
  46. const UINT8 ff_zigzag_direct[64] = {
  47. 0, 1, 8, 16, 9, 2, 3, 10,
  48. 17, 24, 32, 25, 18, 11, 4, 5,
  49. 12, 19, 26, 33, 40, 48, 41, 34,
  50. 27, 20, 13, 6, 7, 14, 21, 28,
  51. 35, 42, 49, 56, 57, 50, 43, 36,
  52. 29, 22, 15, 23, 30, 37, 44, 51,
  53. 58, 59, 52, 45, 38, 31, 39, 46,
  54. 53, 60, 61, 54, 47, 55, 62, 63
  55. };
  56. /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
  57. UINT16 __align8 inv_zigzag_direct16[64];
  58. const UINT8 ff_alternate_horizontal_scan[64] = {
  59. 0, 1, 2, 3, 8, 9, 16, 17,
  60. 10, 11, 4, 5, 6, 7, 15, 14,
  61. 13, 12, 19, 18, 24, 25, 32, 33,
  62. 26, 27, 20, 21, 22, 23, 28, 29,
  63. 30, 31, 34, 35, 40, 41, 48, 49,
  64. 42, 43, 36, 37, 38, 39, 44, 45,
  65. 46, 47, 50, 51, 56, 57, 58, 59,
  66. 52, 53, 54, 55, 60, 61, 62, 63,
  67. };
  68. const UINT8 ff_alternate_vertical_scan[64] = {
  69. 0, 8, 16, 24, 1, 9, 2, 10,
  70. 17, 25, 32, 40, 48, 56, 57, 49,
  71. 41, 33, 26, 18, 3, 11, 4, 12,
  72. 19, 27, 34, 42, 50, 58, 35, 43,
  73. 51, 59, 20, 28, 5, 13, 6, 14,
  74. 21, 29, 36, 44, 52, 60, 37, 45,
  75. 53, 61, 22, 30, 7, 15, 23, 31,
  76. 38, 46, 54, 62, 39, 47, 55, 63,
  77. };
  78. /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
  79. const UINT32 inverse[256]={
  80. 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757,
  81. 536870912, 477218589, 429496730, 390451573, 357913942, 330382100, 306783379, 286331154,
  82. 268435456, 252645136, 238609295, 226050911, 214748365, 204522253, 195225787, 186737709,
  83. 178956971, 171798692, 165191050, 159072863, 153391690, 148102321, 143165577, 138547333,
  84. 134217728, 130150525, 126322568, 122713352, 119304648, 116080198, 113025456, 110127367,
  85. 107374183, 104755300, 102261127, 99882961, 97612894, 95443718, 93368855, 91382283,
  86. 89478486, 87652394, 85899346, 84215046, 82595525, 81037119, 79536432, 78090315,
  87. 76695845, 75350304, 74051161, 72796056, 71582789, 70409300, 69273667, 68174085,
  88. 67108864, 66076420, 65075263, 64103990, 63161284, 62245903, 61356676, 60492498,
  89. 59652324, 58835169, 58040099, 57266231, 56512728, 55778797, 55063684, 54366675,
  90. 53687092, 53024288, 52377650, 51746594, 51130564, 50529028, 49941481, 49367441,
  91. 48806447, 48258060, 47721859, 47197443, 46684428, 46182445, 45691142, 45210183,
  92. 44739243, 44278014, 43826197, 43383509, 42949673, 42524429, 42107523, 41698712,
  93. 41297763, 40904451, 40518560, 40139882, 39768216, 39403370, 39045158, 38693400,
  94. 38347923, 38008561, 37675152, 37347542, 37025581, 36709123, 36398028, 36092163,
  95. 35791395, 35495598, 35204650, 34918434, 34636834, 34359739, 34087043, 33818641,
  96. 33554432, 33294321, 33038210, 32786010, 32537632, 32292988, 32051995, 31814573,
  97. 31580642, 31350127, 31122952, 30899046, 30678338, 30460761, 30246249, 30034737,
  98. 29826162, 29620465, 29417585, 29217465, 29020050, 28825284, 28633116, 28443493,
  99. 28256364, 28071682, 27889399, 27709467, 27531842, 27356480, 27183338, 27012373,
  100. 26843546, 26676816, 26512144, 26349493, 26188825, 26030105, 25873297, 25718368,
  101. 25565282, 25414008, 25264514, 25116768, 24970741, 24826401, 24683721, 24542671,
  102. 24403224, 24265352, 24129030, 23994231, 23860930, 23729102, 23598722, 23469767,
  103. 23342214, 23216040, 23091223, 22967740, 22845571, 22724695, 22605092, 22486740,
  104. 22369622, 22253717, 22139007, 22025474, 21913099, 21801865, 21691755, 21582751,
  105. 21474837, 21367997, 21262215, 21157475, 21053762, 20951060, 20849356, 20748635,
  106. 20648882, 20550083, 20452226, 20355296, 20259280, 20164166, 20069941, 19976593,
  107. 19884108, 19792477, 19701685, 19611723, 19522579, 19434242, 19346700, 19259944,
  108. 19173962, 19088744, 19004281, 18920561, 18837576, 18755316, 18673771, 18592933,
  109. 18512791, 18433337, 18354562, 18276457, 18199014, 18122225, 18046082, 17970575,
  110. 17895698, 17821442, 17747799, 17674763, 17602325, 17530479, 17459217, 17388532,
  111. 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010,
  112. };
  113. static int pix_sum_c(UINT8 * pix, int line_size)
  114. {
  115. int s, i, j;
  116. s = 0;
  117. for (i = 0; i < 16; i++) {
  118. for (j = 0; j < 16; j += 8) {
  119. s += pix[0];
  120. s += pix[1];
  121. s += pix[2];
  122. s += pix[3];
  123. s += pix[4];
  124. s += pix[5];
  125. s += pix[6];
  126. s += pix[7];
  127. pix += 8;
  128. }
  129. pix += line_size - 16;
  130. }
  131. return s;
  132. }
  133. static int pix_norm1_c(UINT8 * pix, int line_size)
  134. {
  135. int s, i, j;
  136. UINT32 *sq = squareTbl + 256;
  137. s = 0;
  138. for (i = 0; i < 16; i++) {
  139. for (j = 0; j < 16; j += 8) {
  140. s += sq[pix[0]];
  141. s += sq[pix[1]];
  142. s += sq[pix[2]];
  143. s += sq[pix[3]];
  144. s += sq[pix[4]];
  145. s += sq[pix[5]];
  146. s += sq[pix[6]];
  147. s += sq[pix[7]];
  148. pix += 8;
  149. }
  150. pix += line_size - 16;
  151. }
  152. return s;
  153. }
  154. static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
  155. {
  156. int i;
  157. /* read the pixels */
  158. for(i=0;i<8;i++) {
  159. block[0] = pixels[0];
  160. block[1] = pixels[1];
  161. block[2] = pixels[2];
  162. block[3] = pixels[3];
  163. block[4] = pixels[4];
  164. block[5] = pixels[5];
  165. block[6] = pixels[6];
  166. block[7] = pixels[7];
  167. pixels += line_size;
  168. block += 8;
  169. }
  170. }
  171. static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
  172. const UINT8 *s2, int stride){
  173. int i;
  174. /* read the pixels */
  175. for(i=0;i<8;i++) {
  176. block[0] = s1[0] - s2[0];
  177. block[1] = s1[1] - s2[1];
  178. block[2] = s1[2] - s2[2];
  179. block[3] = s1[3] - s2[3];
  180. block[4] = s1[4] - s2[4];
  181. block[5] = s1[5] - s2[5];
  182. block[6] = s1[6] - s2[6];
  183. block[7] = s1[7] - s2[7];
  184. s1 += stride;
  185. s2 += stride;
  186. block += 8;
  187. }
  188. }
  189. static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
  190. int line_size)
  191. {
  192. int i;
  193. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  194. /* read the pixels */
  195. for(i=0;i<8;i++) {
  196. pixels[0] = cm[block[0]];
  197. pixels[1] = cm[block[1]];
  198. pixels[2] = cm[block[2]];
  199. pixels[3] = cm[block[3]];
  200. pixels[4] = cm[block[4]];
  201. pixels[5] = cm[block[5]];
  202. pixels[6] = cm[block[6]];
  203. pixels[7] = cm[block[7]];
  204. pixels += line_size;
  205. block += 8;
  206. }
  207. }
  208. static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
  209. int line_size)
  210. {
  211. int i;
  212. UINT8 *cm = cropTbl + MAX_NEG_CROP;
  213. /* read the pixels */
  214. for(i=0;i<8;i++) {
  215. pixels[0] = cm[pixels[0] + block[0]];
  216. pixels[1] = cm[pixels[1] + block[1]];
  217. pixels[2] = cm[pixels[2] + block[2]];
  218. pixels[3] = cm[pixels[3] + block[3]];
  219. pixels[4] = cm[pixels[4] + block[4]];
  220. pixels[5] = cm[pixels[5] + block[5]];
  221. pixels[6] = cm[pixels[6] + block[6]];
  222. pixels[7] = cm[pixels[7] + block[7]];
  223. pixels += line_size;
  224. block += 8;
  225. }
  226. }
  227. #if 0
  228. #define PIXOP2(OPNAME, OP) \
  229. static void OPNAME ## _pixels(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  230. {\
  231. int i;\
  232. for(i=0; i<h; i++){\
  233. OP(*((uint64_t*)block), LD64(pixels));\
  234. pixels+=line_size;\
  235. block +=line_size;\
  236. }\
  237. }\
  238. \
  239. static void OPNAME ## _no_rnd_pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  240. {\
  241. int i;\
  242. for(i=0; i<h; i++){\
  243. const uint64_t a= LD64(pixels );\
  244. const uint64_t b= LD64(pixels+1);\
  245. OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
  246. pixels+=line_size;\
  247. block +=line_size;\
  248. }\
  249. }\
  250. \
  251. static void OPNAME ## _pixels_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  252. {\
  253. int i;\
  254. for(i=0; i<h; i++){\
  255. const uint64_t a= LD64(pixels );\
  256. const uint64_t b= LD64(pixels+1);\
  257. OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
  258. pixels+=line_size;\
  259. block +=line_size;\
  260. }\
  261. }\
  262. \
  263. static void OPNAME ## _no_rnd_pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  264. {\
  265. int i;\
  266. for(i=0; i<h; i++){\
  267. const uint64_t a= LD64(pixels );\
  268. const uint64_t b= LD64(pixels+line_size);\
  269. OP(*((uint64_t*)block), (a&b) + (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
  270. pixels+=line_size;\
  271. block +=line_size;\
  272. }\
  273. }\
  274. \
  275. static void OPNAME ## _pixels_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  276. {\
  277. int i;\
  278. for(i=0; i<h; i++){\
  279. const uint64_t a= LD64(pixels );\
  280. const uint64_t b= LD64(pixels+line_size);\
  281. OP(*((uint64_t*)block), (a|b) - (((a^b)&0xFEFEFEFEFEFEFEFEULL)>>1));\
  282. pixels+=line_size;\
  283. block +=line_size;\
  284. }\
  285. }\
  286. \
  287. static void OPNAME ## _pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  288. {\
  289. int i;\
  290. const uint64_t a= LD64(pixels );\
  291. const uint64_t b= LD64(pixels+1);\
  292. uint64_t l0= (a&0x0303030303030303ULL)\
  293. + (b&0x0303030303030303ULL)\
  294. + 0x0202020202020202ULL;\
  295. uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  296. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  297. uint64_t l1,h1;\
  298. \
  299. pixels+=line_size;\
  300. for(i=0; i<h; i+=2){\
  301. uint64_t a= LD64(pixels );\
  302. uint64_t b= LD64(pixels+1);\
  303. l1= (a&0x0303030303030303ULL)\
  304. + (b&0x0303030303030303ULL);\
  305. h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  306. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  307. OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
  308. pixels+=line_size;\
  309. block +=line_size;\
  310. a= LD64(pixels );\
  311. b= LD64(pixels+1);\
  312. l0= (a&0x0303030303030303ULL)\
  313. + (b&0x0303030303030303ULL)\
  314. + 0x0202020202020202ULL;\
  315. h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  316. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  317. OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
  318. pixels+=line_size;\
  319. block +=line_size;\
  320. }\
  321. }\
  322. \
  323. static void OPNAME ## _no_rnd_pixels_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  324. {\
  325. int i;\
  326. const uint64_t a= LD64(pixels );\
  327. const uint64_t b= LD64(pixels+1);\
  328. uint64_t l0= (a&0x0303030303030303ULL)\
  329. + (b&0x0303030303030303ULL)\
  330. + 0x0101010101010101ULL;\
  331. uint64_t h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  332. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  333. uint64_t l1,h1;\
  334. \
  335. pixels+=line_size;\
  336. for(i=0; i<h; i+=2){\
  337. uint64_t a= LD64(pixels );\
  338. uint64_t b= LD64(pixels+1);\
  339. l1= (a&0x0303030303030303ULL)\
  340. + (b&0x0303030303030303ULL);\
  341. h1= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  342. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  343. OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
  344. pixels+=line_size;\
  345. block +=line_size;\
  346. a= LD64(pixels );\
  347. b= LD64(pixels+1);\
  348. l0= (a&0x0303030303030303ULL)\
  349. + (b&0x0303030303030303ULL)\
  350. + 0x0101010101010101ULL;\
  351. h0= ((a&0xFCFCFCFCFCFCFCFCULL)>>2)\
  352. + ((b&0xFCFCFCFCFCFCFCFCULL)>>2);\
  353. OP(*((uint64_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0F0F0F0F0FULL));\
  354. pixels+=line_size;\
  355. block +=line_size;\
  356. }\
  357. }\
  358. \
  359. CALL_2X_PIXELS(OPNAME ## _pixels16 , OPNAME ## _pixels , 8)\
  360. CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels_x2 , 8)\
  361. CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels_y2 , 8)\
  362. CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels_xy2, 8)\
  363. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels_x2 , 8)\
  364. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels_y2 , 8)\
  365. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels_xy2, 8)\
  366. \
  367. void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
  368. {\
  369. OPNAME ## _pixels,\
  370. OPNAME ## _pixels_x2,\
  371. OPNAME ## _pixels_y2,\
  372. OPNAME ## _pixels_xy2},\
  373. {\
  374. OPNAME ## _pixels16,\
  375. OPNAME ## _pixels16_x2,\
  376. OPNAME ## _pixels16_y2,\
  377. OPNAME ## _pixels16_xy2}\
  378. };\
  379. \
  380. void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
  381. {\
  382. OPNAME ## _pixels,\
  383. OPNAME ## _no_rnd_pixels_x2,\
  384. OPNAME ## _no_rnd_pixels_y2,\
  385. OPNAME ## _no_rnd_pixels_xy2},\
  386. {\
  387. OPNAME ## _pixels16,\
  388. OPNAME ## _no_rnd_pixels16_x2,\
  389. OPNAME ## _no_rnd_pixels16_y2,\
  390. OPNAME ## _no_rnd_pixels16_xy2}\
  391. };
  392. #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEFEFEFEFEULL)>>1) )
  393. #else // 64 bit variant
  394. #define PIXOP2(OPNAME, OP) \
  395. static void OPNAME ## _pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  396. int i;\
  397. for(i=0; i<h; i++){\
  398. OP(*((uint32_t*)(block )), LD32(pixels ));\
  399. OP(*((uint32_t*)(block+4)), LD32(pixels+4));\
  400. pixels+=line_size;\
  401. block +=line_size;\
  402. }\
  403. }\
  404. static inline void OPNAME ## _no_rnd_pixels8(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  405. OPNAME ## _pixels8(block, pixels, line_size, h);\
  406. }\
  407. \
  408. static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  409. int src_stride1, int src_stride2, int h){\
  410. int i;\
  411. for(i=0; i<h; i++){\
  412. uint32_t a,b;\
  413. a= LD32(&src1[i*src_stride1 ]);\
  414. b= LD32(&src2[i*src_stride2 ]);\
  415. OP(*((uint32_t*)&dst[i*dst_stride ]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
  416. a= LD32(&src1[i*src_stride1+4]);\
  417. b= LD32(&src2[i*src_stride2+4]);\
  418. OP(*((uint32_t*)&dst[i*dst_stride+4]), (a&b) + (((a^b)&0xFEFEFEFEUL)>>1));\
  419. }\
  420. }\
  421. \
  422. static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  423. int src_stride1, int src_stride2, int h){\
  424. int i;\
  425. for(i=0; i<h; i++){\
  426. uint32_t a,b;\
  427. a= LD32(&src1[i*src_stride1 ]);\
  428. b= LD32(&src2[i*src_stride2 ]);\
  429. OP(*((uint32_t*)&dst[i*dst_stride ]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
  430. a= LD32(&src1[i*src_stride1+4]);\
  431. b= LD32(&src2[i*src_stride2+4]);\
  432. OP(*((uint32_t*)&dst[i*dst_stride+4]), (a|b) - (((a^b)&0xFEFEFEFEUL)>>1));\
  433. }\
  434. }\
  435. \
  436. static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  437. int src_stride1, int src_stride2, int h){\
  438. OPNAME ## _pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  439. OPNAME ## _pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
  440. }\
  441. \
  442. static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, \
  443. int src_stride1, int src_stride2, int h){\
  444. OPNAME ## _no_rnd_pixels8_l2(dst , src1 , src2 , dst_stride, src_stride1, src_stride2, h);\
  445. OPNAME ## _no_rnd_pixels8_l2(dst+8, src1+8, src2+8, dst_stride, src_stride1, src_stride2, h);\
  446. }\
  447. \
  448. static inline void OPNAME ## _no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  449. OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
  450. }\
  451. \
  452. static inline void OPNAME ## _pixels8_x2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  453. OPNAME ## _pixels8_l2(block, pixels, pixels+1, line_size, line_size, line_size, h);\
  454. }\
  455. \
  456. static inline void OPNAME ## _no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  457. OPNAME ## _no_rnd_pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  458. }\
  459. \
  460. static inline void OPNAME ## _pixels8_y2(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
  461. OPNAME ## _pixels8_l2(block, pixels, pixels+line_size, line_size, line_size, line_size, h);\
  462. }\
  463. \
  464. static inline void OPNAME ## _pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
  465. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  466. int i;\
  467. for(i=0; i<h; i++){\
  468. uint32_t a, b, c, d, l0, l1, h0, h1;\
  469. a= LD32(&src1[i*src_stride1]);\
  470. b= LD32(&src2[i*src_stride2]);\
  471. c= LD32(&src3[i*src_stride3]);\
  472. d= LD32(&src4[i*src_stride4]);\
  473. l0= (a&0x03030303UL)\
  474. + (b&0x03030303UL)\
  475. + 0x02020202UL;\
  476. h0= ((a&0xFCFCFCFCUL)>>2)\
  477. + ((b&0xFCFCFCFCUL)>>2);\
  478. l1= (c&0x03030303UL)\
  479. + (d&0x03030303UL);\
  480. h1= ((c&0xFCFCFCFCUL)>>2)\
  481. + ((d&0xFCFCFCFCUL)>>2);\
  482. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  483. a= LD32(&src1[i*src_stride1+4]);\
  484. b= LD32(&src2[i*src_stride2+4]);\
  485. c= LD32(&src3[i*src_stride3+4]);\
  486. d= LD32(&src4[i*src_stride4+4]);\
  487. l0= (a&0x03030303UL)\
  488. + (b&0x03030303UL)\
  489. + 0x02020202UL;\
  490. h0= ((a&0xFCFCFCFCUL)>>2)\
  491. + ((b&0xFCFCFCFCUL)>>2);\
  492. l1= (c&0x03030303UL)\
  493. + (d&0x03030303UL);\
  494. h1= ((c&0xFCFCFCFCUL)>>2)\
  495. + ((d&0xFCFCFCFCUL)>>2);\
  496. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  497. }\
  498. }\
  499. static inline void OPNAME ## _no_rnd_pixels8_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
  500. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  501. int i;\
  502. for(i=0; i<h; i++){\
  503. uint32_t a, b, c, d, l0, l1, h0, h1;\
  504. a= LD32(&src1[i*src_stride1]);\
  505. b= LD32(&src2[i*src_stride2]);\
  506. c= LD32(&src3[i*src_stride3]);\
  507. d= LD32(&src4[i*src_stride4]);\
  508. l0= (a&0x03030303UL)\
  509. + (b&0x03030303UL)\
  510. + 0x01010101UL;\
  511. h0= ((a&0xFCFCFCFCUL)>>2)\
  512. + ((b&0xFCFCFCFCUL)>>2);\
  513. l1= (c&0x03030303UL)\
  514. + (d&0x03030303UL);\
  515. h1= ((c&0xFCFCFCFCUL)>>2)\
  516. + ((d&0xFCFCFCFCUL)>>2);\
  517. OP(*((uint32_t*)&dst[i*dst_stride]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  518. a= LD32(&src1[i*src_stride1+4]);\
  519. b= LD32(&src2[i*src_stride2+4]);\
  520. c= LD32(&src3[i*src_stride3+4]);\
  521. d= LD32(&src4[i*src_stride4+4]);\
  522. l0= (a&0x03030303UL)\
  523. + (b&0x03030303UL)\
  524. + 0x01010101UL;\
  525. h0= ((a&0xFCFCFCFCUL)>>2)\
  526. + ((b&0xFCFCFCFCUL)>>2);\
  527. l1= (c&0x03030303UL)\
  528. + (d&0x03030303UL);\
  529. h1= ((c&0xFCFCFCFCUL)>>2)\
  530. + ((d&0xFCFCFCFCUL)>>2);\
  531. OP(*((uint32_t*)&dst[i*dst_stride+4]), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  532. }\
  533. }\
  534. static inline void OPNAME ## _pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
  535. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  536. OPNAME ## _pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  537. OPNAME ## _pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  538. }\
  539. static inline void OPNAME ## _no_rnd_pixels16_l4(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,\
  540. int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
  541. OPNAME ## _no_rnd_pixels8_l4(dst , src1 , src2 , src3 , src4 , dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  542. OPNAME ## _no_rnd_pixels8_l4(dst+8, src1+8, src2+8, src3+8, src4+8, dst_stride, src_stride1, src_stride2, src_stride3, src_stride4, h);\
  543. }\
  544. \
  545. static inline void OPNAME ## _pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  546. {\
  547. int j;\
  548. for(j=0; j<2; j++){\
  549. int i;\
  550. const uint32_t a= LD32(pixels );\
  551. const uint32_t b= LD32(pixels+1);\
  552. uint32_t l0= (a&0x03030303UL)\
  553. + (b&0x03030303UL)\
  554. + 0x02020202UL;\
  555. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  556. + ((b&0xFCFCFCFCUL)>>2);\
  557. uint32_t l1,h1;\
  558. \
  559. pixels+=line_size;\
  560. for(i=0; i<h; i+=2){\
  561. uint32_t a= LD32(pixels );\
  562. uint32_t b= LD32(pixels+1);\
  563. l1= (a&0x03030303UL)\
  564. + (b&0x03030303UL);\
  565. h1= ((a&0xFCFCFCFCUL)>>2)\
  566. + ((b&0xFCFCFCFCUL)>>2);\
  567. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  568. pixels+=line_size;\
  569. block +=line_size;\
  570. a= LD32(pixels );\
  571. b= LD32(pixels+1);\
  572. l0= (a&0x03030303UL)\
  573. + (b&0x03030303UL)\
  574. + 0x02020202UL;\
  575. h0= ((a&0xFCFCFCFCUL)>>2)\
  576. + ((b&0xFCFCFCFCUL)>>2);\
  577. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  578. pixels+=line_size;\
  579. block +=line_size;\
  580. }\
  581. pixels+=4-line_size*(h+1);\
  582. block +=4-line_size*h;\
  583. }\
  584. }\
  585. \
  586. static inline void OPNAME ## _no_rnd_pixels8_xy2(uint8_t *block, const uint8_t *pixels, int line_size, int h)\
  587. {\
  588. int j;\
  589. for(j=0; j<2; j++){\
  590. int i;\
  591. const uint32_t a= LD32(pixels );\
  592. const uint32_t b= LD32(pixels+1);\
  593. uint32_t l0= (a&0x03030303UL)\
  594. + (b&0x03030303UL)\
  595. + 0x01010101UL;\
  596. uint32_t h0= ((a&0xFCFCFCFCUL)>>2)\
  597. + ((b&0xFCFCFCFCUL)>>2);\
  598. uint32_t l1,h1;\
  599. \
  600. pixels+=line_size;\
  601. for(i=0; i<h; i+=2){\
  602. uint32_t a= LD32(pixels );\
  603. uint32_t b= LD32(pixels+1);\
  604. l1= (a&0x03030303UL)\
  605. + (b&0x03030303UL);\
  606. h1= ((a&0xFCFCFCFCUL)>>2)\
  607. + ((b&0xFCFCFCFCUL)>>2);\
  608. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  609. pixels+=line_size;\
  610. block +=line_size;\
  611. a= LD32(pixels );\
  612. b= LD32(pixels+1);\
  613. l0= (a&0x03030303UL)\
  614. + (b&0x03030303UL)\
  615. + 0x01010101UL;\
  616. h0= ((a&0xFCFCFCFCUL)>>2)\
  617. + ((b&0xFCFCFCFCUL)>>2);\
  618. OP(*((uint32_t*)block), h0+h1+(((l0+l1)>>2)&0x0F0F0F0FUL));\
  619. pixels+=line_size;\
  620. block +=line_size;\
  621. }\
  622. pixels+=4-line_size*(h+1);\
  623. block +=4-line_size*h;\
  624. }\
  625. }\
  626. \
  627. CALL_2X_PIXELS(OPNAME ## _pixels16 , OPNAME ## _pixels8 , 8)\
  628. CALL_2X_PIXELS(OPNAME ## _pixels16_x2 , OPNAME ## _pixels8_x2 , 8)\
  629. CALL_2X_PIXELS(OPNAME ## _pixels16_y2 , OPNAME ## _pixels8_y2 , 8)\
  630. CALL_2X_PIXELS(OPNAME ## _pixels16_xy2, OPNAME ## _pixels8_xy2, 8)\
  631. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16 , OPNAME ## _pixels8 , 8)\
  632. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_x2 , OPNAME ## _no_rnd_pixels8_x2 , 8)\
  633. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_y2 , OPNAME ## _no_rnd_pixels8_y2 , 8)\
  634. CALL_2X_PIXELS(OPNAME ## _no_rnd_pixels16_xy2, OPNAME ## _no_rnd_pixels8_xy2, 8)\
  635. \
  636. void (*OPNAME ## _pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
  637. {\
  638. OPNAME ## _pixels16,\
  639. OPNAME ## _pixels16_x2,\
  640. OPNAME ## _pixels16_y2,\
  641. OPNAME ## _pixels16_xy2},\
  642. {\
  643. OPNAME ## _pixels8,\
  644. OPNAME ## _pixels8_x2,\
  645. OPNAME ## _pixels8_y2,\
  646. OPNAME ## _pixels8_xy2},\
  647. };\
  648. \
  649. void (*OPNAME ## _no_rnd_pixels_tab[2][4])(uint8_t *block, const uint8_t *pixels, int line_size, int h) = {\
  650. {\
  651. OPNAME ## _pixels16,\
  652. OPNAME ## _no_rnd_pixels16_x2,\
  653. OPNAME ## _no_rnd_pixels16_y2,\
  654. OPNAME ## _no_rnd_pixels16_xy2},\
  655. {\
  656. OPNAME ## _pixels8,\
  657. OPNAME ## _no_rnd_pixels8_x2,\
  658. OPNAME ## _no_rnd_pixels8_y2,\
  659. OPNAME ## _no_rnd_pixels8_xy2},\
  660. };
  661. #define op_avg(a, b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
  662. #endif
  663. #define op_put(a, b) a = b
  664. PIXOP2(avg, op_avg)
  665. PIXOP2(put, op_put)
  666. #undef op_avg
  667. #undef op_put
  668. #if 0
  669. /* FIXME this stuff could be removed as its ot really used anymore */
  670. #define PIXOP(BTYPE, OPNAME, OP, INCR) \
  671. \
  672. static void OPNAME ## _pixels(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
  673. { \
  674. BTYPE *p; \
  675. const UINT8 *pix; \
  676. \
  677. p = block; \
  678. pix = pixels; \
  679. do { \
  680. OP(p[0], pix[0]); \
  681. OP(p[1], pix[1]); \
  682. OP(p[2], pix[2]); \
  683. OP(p[3], pix[3]); \
  684. OP(p[4], pix[4]); \
  685. OP(p[5], pix[5]); \
  686. OP(p[6], pix[6]); \
  687. OP(p[7], pix[7]); \
  688. pix += line_size; \
  689. p += INCR; \
  690. } while (--h);; \
  691. } \
  692. \
  693. static void OPNAME ## _pixels_x2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
  694. { \
  695. BTYPE *p; \
  696. const UINT8 *pix; \
  697. \
  698. p = block; \
  699. pix = pixels; \
  700. do { \
  701. OP(p[0], avg2(pix[0], pix[1])); \
  702. OP(p[1], avg2(pix[1], pix[2])); \
  703. OP(p[2], avg2(pix[2], pix[3])); \
  704. OP(p[3], avg2(pix[3], pix[4])); \
  705. OP(p[4], avg2(pix[4], pix[5])); \
  706. OP(p[5], avg2(pix[5], pix[6])); \
  707. OP(p[6], avg2(pix[6], pix[7])); \
  708. OP(p[7], avg2(pix[7], pix[8])); \
  709. pix += line_size; \
  710. p += INCR; \
  711. } while (--h); \
  712. } \
  713. \
  714. static void OPNAME ## _pixels_y2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
  715. { \
  716. BTYPE *p; \
  717. const UINT8 *pix; \
  718. const UINT8 *pix1; \
  719. \
  720. p = block; \
  721. pix = pixels; \
  722. pix1 = pixels + line_size; \
  723. do { \
  724. OP(p[0], avg2(pix[0], pix1[0])); \
  725. OP(p[1], avg2(pix[1], pix1[1])); \
  726. OP(p[2], avg2(pix[2], pix1[2])); \
  727. OP(p[3], avg2(pix[3], pix1[3])); \
  728. OP(p[4], avg2(pix[4], pix1[4])); \
  729. OP(p[5], avg2(pix[5], pix1[5])); \
  730. OP(p[6], avg2(pix[6], pix1[6])); \
  731. OP(p[7], avg2(pix[7], pix1[7])); \
  732. pix += line_size; \
  733. pix1 += line_size; \
  734. p += INCR; \
  735. } while(--h); \
  736. } \
  737. \
  738. static void OPNAME ## _pixels_xy2(BTYPE *block, const UINT8 *pixels, int line_size, int h) \
  739. { \
  740. BTYPE *p; \
  741. const UINT8 *pix; \
  742. const UINT8 *pix1; \
  743. \
  744. p = block; \
  745. pix = pixels; \
  746. pix1 = pixels + line_size; \
  747. do { \
  748. OP(p[0], avg4(pix[0], pix[1], pix1[0], pix1[1])); \
  749. OP(p[1], avg4(pix[1], pix[2], pix1[1], pix1[2])); \
  750. OP(p[2], avg4(pix[2], pix[3], pix1[2], pix1[3])); \
  751. OP(p[3], avg4(pix[3], pix[4], pix1[3], pix1[4])); \
  752. OP(p[4], avg4(pix[4], pix[5], pix1[4], pix1[5])); \
  753. OP(p[5], avg4(pix[5], pix[6], pix1[5], pix1[6])); \
  754. OP(p[6], avg4(pix[6], pix[7], pix1[6], pix1[7])); \
  755. OP(p[7], avg4(pix[7], pix[8], pix1[7], pix1[8])); \
  756. pix += line_size; \
  757. pix1 += line_size; \
  758. p += INCR; \
  759. } while(--h); \
  760. } \
  761. \
  762. void (*OPNAME ## _pixels_tab[4])(BTYPE *block, const UINT8 *pixels, int line_size, int h) = { \
  763. OPNAME ## _pixels, \
  764. OPNAME ## _pixels_x2, \
  765. OPNAME ## _pixels_y2, \
  766. OPNAME ## _pixels_xy2, \
  767. };
  768. /* rounding primitives */
  769. #define avg2(a,b) ((a+b+1)>>1)
  770. #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
  771. #define op_avg(a, b) a = avg2(a, b)
  772. #define op_sub(a, b) a -= b
  773. #define op_put(a, b) a = b
  774. PIXOP(DCTELEM, sub, op_sub, 8)
  775. PIXOP(uint8_t, avg, op_avg, line_size)
  776. PIXOP(uint8_t, put, op_put, line_size)
  777. /* not rounding primitives */
  778. #undef avg2
  779. #undef avg4
  780. #define avg2(a,b) ((a+b)>>1)
  781. #define avg4(a,b,c,d) ((a+b+c+d+1)>>2)
  782. PIXOP(uint8_t, avg_no_rnd, op_avg, line_size)
  783. PIXOP(uint8_t, put_no_rnd, op_put, line_size)
  784. /* motion estimation */
  785. #undef avg2
  786. #undef avg4
  787. #endif
  788. #define avg2(a,b) ((a+b+1)>>1)
  789. #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
  790. static void gmc1_c(UINT8 *dst, UINT8 *src, int stride, int h, int x16, int y16, int rounder)
  791. {
  792. const int A=(16-x16)*(16-y16);
  793. const int B=( x16)*(16-y16);
  794. const int C=(16-x16)*( y16);
  795. const int D=( x16)*( y16);
  796. int i;
  797. for(i=0; i<h; i++)
  798. {
  799. dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
  800. dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
  801. dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
  802. dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
  803. dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
  804. dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
  805. dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
  806. dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
  807. dst+= stride;
  808. src+= stride;
  809. }
  810. }
  811. static void gmc_c(UINT8 *dst, UINT8 *src, int stride, int h, int ox, int oy,
  812. int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
  813. {
  814. int y, vx, vy;
  815. const int s= 1<<shift;
  816. width--;
  817. height--;
  818. for(y=0; y<h; y++){
  819. int x;
  820. vx= ox;
  821. vy= oy;
  822. for(x=0; x<8; x++){ //XXX FIXME optimize
  823. int src_x, src_y, frac_x, frac_y, index;
  824. src_x= vx>>16;
  825. src_y= vy>>16;
  826. frac_x= src_x&(s-1);
  827. frac_y= src_y&(s-1);
  828. src_x>>=shift;
  829. src_y>>=shift;
  830. if((unsigned)src_x < width){
  831. if((unsigned)src_y < height){
  832. index= src_x + src_y*stride;
  833. dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
  834. + src[index +1]* frac_x )*(s-frac_y)
  835. + ( src[index+stride ]*(s-frac_x)
  836. + src[index+stride+1]* frac_x )* frac_y
  837. + r)>>(shift*2);
  838. }else{
  839. index= src_x + clip(src_y, 0, height)*stride;
  840. dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
  841. + src[index +1]* frac_x )*s
  842. + r)>>(shift*2);
  843. }
  844. }else{
  845. if((unsigned)src_y < height){
  846. index= clip(src_x, 0, width) + src_y*stride;
  847. dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
  848. + src[index+stride ]* frac_y )*s
  849. + r)>>(shift*2);
  850. }else{
  851. index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
  852. dst[y*stride + x]= src[index ];
  853. }
  854. }
  855. vx+= dxx;
  856. vy+= dyx;
  857. }
  858. ox += dxy;
  859. oy += dyy;
  860. }
  861. }
  862. static inline void copy_block17(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
  863. {
  864. int i;
  865. for(i=0; i<h; i++)
  866. {
  867. ST32(dst , LD32(src ));
  868. ST32(dst+4 , LD32(src+4 ));
  869. ST32(dst+8 , LD32(src+8 ));
  870. ST32(dst+12, LD32(src+12));
  871. dst[16]= src[16];
  872. dst+=dstStride;
  873. src+=srcStride;
  874. }
  875. }
  876. static inline void copy_block9(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h)
  877. {
  878. int i;
  879. for(i=0; i<h; i++)
  880. {
  881. ST32(dst , LD32(src ));
  882. ST32(dst+4 , LD32(src+4 ));
  883. dst[8]= src[8];
  884. dst+=dstStride;
  885. src+=srcStride;
  886. }
  887. }
  888. #define QPEL_MC(r, OPNAME, RND, OP) \
  889. static void OPNAME ## mpeg4_qpel8_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
  890. UINT8 *cm = cropTbl + MAX_NEG_CROP;\
  891. int i;\
  892. for(i=0; i<h; i++)\
  893. {\
  894. OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
  895. OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
  896. OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
  897. OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
  898. OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
  899. OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
  900. OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
  901. OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
  902. dst+=dstStride;\
  903. src+=srcStride;\
  904. }\
  905. }\
  906. \
  907. static void OPNAME ## mpeg4_qpel8_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
  908. UINT8 *cm = cropTbl + MAX_NEG_CROP;\
  909. int i;\
  910. for(i=0; i<w; i++)\
  911. {\
  912. const int src0= src[0*srcStride];\
  913. const int src1= src[1*srcStride];\
  914. const int src2= src[2*srcStride];\
  915. const int src3= src[3*srcStride];\
  916. const int src4= src[4*srcStride];\
  917. const int src5= src[5*srcStride];\
  918. const int src6= src[6*srcStride];\
  919. const int src7= src[7*srcStride];\
  920. const int src8= src[8*srcStride];\
  921. OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
  922. OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
  923. OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
  924. OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
  925. OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
  926. OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
  927. OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
  928. OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
  929. dst++;\
  930. src++;\
  931. }\
  932. }\
  933. \
  934. static void OPNAME ## mpeg4_qpel16_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h){\
  935. UINT8 *cm = cropTbl + MAX_NEG_CROP;\
  936. int i;\
  937. for(i=0; i<h; i++)\
  938. {\
  939. OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
  940. OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
  941. OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
  942. OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
  943. OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
  944. OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
  945. OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
  946. OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
  947. OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
  948. OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
  949. OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
  950. OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
  951. OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
  952. OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
  953. OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
  954. OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
  955. dst+=dstStride;\
  956. src+=srcStride;\
  957. }\
  958. }\
  959. \
  960. static void OPNAME ## mpeg4_qpel16_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w){\
  961. UINT8 *cm = cropTbl + MAX_NEG_CROP;\
  962. int i;\
  963. for(i=0; i<w; i++)\
  964. {\
  965. const int src0= src[0*srcStride];\
  966. const int src1= src[1*srcStride];\
  967. const int src2= src[2*srcStride];\
  968. const int src3= src[3*srcStride];\
  969. const int src4= src[4*srcStride];\
  970. const int src5= src[5*srcStride];\
  971. const int src6= src[6*srcStride];\
  972. const int src7= src[7*srcStride];\
  973. const int src8= src[8*srcStride];\
  974. const int src9= src[9*srcStride];\
  975. const int src10= src[10*srcStride];\
  976. const int src11= src[11*srcStride];\
  977. const int src12= src[12*srcStride];\
  978. const int src13= src[13*srcStride];\
  979. const int src14= src[14*srcStride];\
  980. const int src15= src[15*srcStride];\
  981. const int src16= src[16*srcStride];\
  982. OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
  983. OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
  984. OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
  985. OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
  986. OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
  987. OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
  988. OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
  989. OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
  990. OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
  991. OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
  992. OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
  993. OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
  994. OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
  995. OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
  996. OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
  997. OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
  998. dst++;\
  999. src++;\
  1000. }\
  1001. }\
  1002. \
  1003. static void OPNAME ## qpel8_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
  1004. OPNAME ## pixels8(dst, src, stride, 8);\
  1005. }\
  1006. \
  1007. static void OPNAME ## qpel8_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
  1008. UINT8 half[64];\
  1009. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
  1010. OPNAME ## pixels8_l2(dst, src, half, stride, stride, 8, 8);\
  1011. }\
  1012. \
  1013. static void OPNAME ## qpel8_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
  1014. OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
  1015. }\
  1016. \
  1017. static void OPNAME ## qpel8_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
  1018. UINT8 half[64];\
  1019. put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
  1020. OPNAME ## pixels8_l2(dst, src+1, half, stride, stride, 8, 8);\
  1021. }\
  1022. \
  1023. static void OPNAME ## qpel8_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
  1024. UINT8 full[16*9];\
  1025. UINT8 half[64];\
  1026. copy_block9(full, src, 16, stride, 9);\
  1027. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
  1028. OPNAME ## pixels8_l2(dst, full, half, stride, 16, 8, 8);\
  1029. }\
  1030. \
  1031. static void OPNAME ## qpel8_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
  1032. UINT8 full[16*9];\
  1033. copy_block9(full, src, 16, stride, 9);\
  1034. OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16, 8);\
  1035. }\
  1036. \
  1037. static void OPNAME ## qpel8_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
  1038. UINT8 full[16*9];\
  1039. UINT8 half[64];\
  1040. copy_block9(full, src, 16, stride, 9);\
  1041. put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16, 8);\
  1042. OPNAME ## pixels8_l2(dst, full+16, half, stride, 16, 8, 8);\
  1043. }\
  1044. static void OPNAME ## qpel8_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
  1045. UINT8 full[16*9];\
  1046. UINT8 halfH[72];\
  1047. UINT8 halfV[64];\
  1048. UINT8 halfHV[64];\
  1049. copy_block9(full, src, 16, stride, 9);\
  1050. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
  1051. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
  1052. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1053. OPNAME ## pixels8_l4(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
  1054. }\
  1055. static void OPNAME ## qpel8_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
  1056. UINT8 full[16*9];\
  1057. UINT8 halfH[72];\
  1058. UINT8 halfV[64];\
  1059. UINT8 halfHV[64];\
  1060. copy_block9(full, src, 16, stride, 9);\
  1061. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
  1062. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
  1063. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1064. OPNAME ## pixels8_l4(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
  1065. }\
  1066. static void OPNAME ## qpel8_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
  1067. UINT8 full[16*9];\
  1068. UINT8 halfH[72];\
  1069. UINT8 halfV[64];\
  1070. UINT8 halfHV[64];\
  1071. copy_block9(full, src, 16, stride, 9);\
  1072. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
  1073. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
  1074. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1075. OPNAME ## pixels8_l4(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
  1076. }\
  1077. static void OPNAME ## qpel8_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
  1078. UINT8 full[16*9];\
  1079. UINT8 halfH[72];\
  1080. UINT8 halfV[64];\
  1081. UINT8 halfHV[64];\
  1082. copy_block9(full, src, 16, stride, 9);\
  1083. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full , 8, 16, 9);\
  1084. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
  1085. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1086. OPNAME ## pixels8_l4(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
  1087. }\
  1088. static void OPNAME ## qpel8_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
  1089. UINT8 halfH[72];\
  1090. UINT8 halfHV[64];\
  1091. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
  1092. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1093. OPNAME ## pixels8_l2(dst, halfH, halfHV, stride, 8, 8, 8);\
  1094. }\
  1095. static void OPNAME ## qpel8_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
  1096. UINT8 halfH[72];\
  1097. UINT8 halfHV[64];\
  1098. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
  1099. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1100. OPNAME ## pixels8_l2(dst, halfH+8, halfHV, stride, 8, 8, 8);\
  1101. }\
  1102. static void OPNAME ## qpel8_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
  1103. UINT8 full[16*9];\
  1104. UINT8 halfH[72];\
  1105. UINT8 halfV[64];\
  1106. UINT8 halfHV[64];\
  1107. copy_block9(full, src, 16, stride, 9);\
  1108. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
  1109. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16, 8);\
  1110. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1111. OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
  1112. }\
  1113. static void OPNAME ## qpel8_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
  1114. UINT8 full[16*9];\
  1115. UINT8 halfH[72];\
  1116. UINT8 halfV[64];\
  1117. UINT8 halfHV[64];\
  1118. copy_block9(full, src, 16, stride, 9);\
  1119. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
  1120. put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16, 8);\
  1121. put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8, 8);\
  1122. OPNAME ## pixels8_l2(dst, halfV, halfHV, stride, 8, 8, 8);\
  1123. }\
  1124. static void OPNAME ## qpel8_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
  1125. UINT8 halfH[72];\
  1126. put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
  1127. OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8, 8);\
  1128. }\
  1129. static void OPNAME ## qpel16_mc00_c (UINT8 *dst, UINT8 *src, int stride){\
  1130. OPNAME ## pixels16(dst, src, stride, 16);\
  1131. }\
  1132. \
  1133. static void OPNAME ## qpel16_mc10_c(UINT8 *dst, UINT8 *src, int stride){\
  1134. UINT8 half[256];\
  1135. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
  1136. OPNAME ## pixels16_l2(dst, src, half, stride, stride, 16, 16);\
  1137. }\
  1138. \
  1139. static void OPNAME ## qpel16_mc20_c(UINT8 *dst, UINT8 *src, int stride){\
  1140. OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
  1141. }\
  1142. \
  1143. static void OPNAME ## qpel16_mc30_c(UINT8 *dst, UINT8 *src, int stride){\
  1144. UINT8 half[256];\
  1145. put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
  1146. OPNAME ## pixels16_l2(dst, src+1, half, stride, stride, 16, 16);\
  1147. }\
  1148. \
  1149. static void OPNAME ## qpel16_mc01_c(UINT8 *dst, UINT8 *src, int stride){\
  1150. UINT8 full[24*17];\
  1151. UINT8 half[256];\
  1152. copy_block17(full, src, 24, stride, 17);\
  1153. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
  1154. OPNAME ## pixels16_l2(dst, full, half, stride, 24, 16, 16);\
  1155. }\
  1156. \
  1157. static void OPNAME ## qpel16_mc02_c(UINT8 *dst, UINT8 *src, int stride){\
  1158. UINT8 full[24*17];\
  1159. copy_block17(full, src, 24, stride, 17);\
  1160. OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24, 16);\
  1161. }\
  1162. \
  1163. static void OPNAME ## qpel16_mc03_c(UINT8 *dst, UINT8 *src, int stride){\
  1164. UINT8 full[24*17];\
  1165. UINT8 half[256];\
  1166. copy_block17(full, src, 24, stride, 17);\
  1167. put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24, 16);\
  1168. OPNAME ## pixels16_l2(dst, full+24, half, stride, 24, 16, 16);\
  1169. }\
  1170. static void OPNAME ## qpel16_mc11_c(UINT8 *dst, UINT8 *src, int stride){\
  1171. UINT8 full[24*17];\
  1172. UINT8 halfH[272];\
  1173. UINT8 halfV[256];\
  1174. UINT8 halfHV[256];\
  1175. copy_block17(full, src, 24, stride, 17);\
  1176. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
  1177. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
  1178. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1179. OPNAME ## pixels16_l4(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
  1180. }\
  1181. static void OPNAME ## qpel16_mc31_c(UINT8 *dst, UINT8 *src, int stride){\
  1182. UINT8 full[24*17];\
  1183. UINT8 halfH[272];\
  1184. UINT8 halfV[256];\
  1185. UINT8 halfHV[256];\
  1186. copy_block17(full, src, 24, stride, 17);\
  1187. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
  1188. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
  1189. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1190. OPNAME ## pixels16_l4(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
  1191. }\
  1192. static void OPNAME ## qpel16_mc13_c(UINT8 *dst, UINT8 *src, int stride){\
  1193. UINT8 full[24*17];\
  1194. UINT8 halfH[272];\
  1195. UINT8 halfV[256];\
  1196. UINT8 halfHV[256];\
  1197. copy_block17(full, src, 24, stride, 17);\
  1198. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
  1199. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
  1200. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1201. OPNAME ## pixels16_l4(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
  1202. }\
  1203. static void OPNAME ## qpel16_mc33_c(UINT8 *dst, UINT8 *src, int stride){\
  1204. UINT8 full[24*17];\
  1205. UINT8 halfH[272];\
  1206. UINT8 halfV[256];\
  1207. UINT8 halfHV[256];\
  1208. copy_block17(full, src, 24, stride, 17);\
  1209. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full , 16, 24, 17);\
  1210. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
  1211. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1212. OPNAME ## pixels16_l4(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
  1213. }\
  1214. static void OPNAME ## qpel16_mc21_c(UINT8 *dst, UINT8 *src, int stride){\
  1215. UINT8 halfH[272];\
  1216. UINT8 halfHV[256];\
  1217. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
  1218. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1219. OPNAME ## pixels16_l2(dst, halfH, halfHV, stride, 16, 16, 16);\
  1220. }\
  1221. static void OPNAME ## qpel16_mc23_c(UINT8 *dst, UINT8 *src, int stride){\
  1222. UINT8 halfH[272];\
  1223. UINT8 halfHV[256];\
  1224. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
  1225. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1226. OPNAME ## pixels16_l2(dst, halfH+16, halfHV, stride, 16, 16, 16);\
  1227. }\
  1228. static void OPNAME ## qpel16_mc12_c(UINT8 *dst, UINT8 *src, int stride){\
  1229. UINT8 full[24*17];\
  1230. UINT8 halfH[272];\
  1231. UINT8 halfV[256];\
  1232. UINT8 halfHV[256];\
  1233. copy_block17(full, src, 24, stride, 17);\
  1234. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
  1235. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24, 16);\
  1236. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1237. OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
  1238. }\
  1239. static void OPNAME ## qpel16_mc32_c(UINT8 *dst, UINT8 *src, int stride){\
  1240. UINT8 full[24*17];\
  1241. UINT8 halfH[272];\
  1242. UINT8 halfV[256];\
  1243. UINT8 halfHV[256];\
  1244. copy_block17(full, src, 24, stride, 17);\
  1245. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
  1246. put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24, 16);\
  1247. put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16, 16);\
  1248. OPNAME ## pixels16_l2(dst, halfV, halfHV, stride, 16, 16, 16);\
  1249. }\
  1250. static void OPNAME ## qpel16_mc22_c(UINT8 *dst, UINT8 *src, int stride){\
  1251. UINT8 halfH[272];\
  1252. put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
  1253. OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16, 16);\
  1254. }\
  1255. qpel_mc_func OPNAME ## qpel_pixels_tab[2][16]={ \
  1256. {\
  1257. OPNAME ## qpel16_mc00_c, \
  1258. OPNAME ## qpel16_mc10_c, \
  1259. OPNAME ## qpel16_mc20_c, \
  1260. OPNAME ## qpel16_mc30_c, \
  1261. OPNAME ## qpel16_mc01_c, \
  1262. OPNAME ## qpel16_mc11_c, \
  1263. OPNAME ## qpel16_mc21_c, \
  1264. OPNAME ## qpel16_mc31_c, \
  1265. OPNAME ## qpel16_mc02_c, \
  1266. OPNAME ## qpel16_mc12_c, \
  1267. OPNAME ## qpel16_mc22_c, \
  1268. OPNAME ## qpel16_mc32_c, \
  1269. OPNAME ## qpel16_mc03_c, \
  1270. OPNAME ## qpel16_mc13_c, \
  1271. OPNAME ## qpel16_mc23_c, \
  1272. OPNAME ## qpel16_mc33_c, \
  1273. },{\
  1274. OPNAME ## qpel8_mc00_c, \
  1275. OPNAME ## qpel8_mc10_c, \
  1276. OPNAME ## qpel8_mc20_c, \
  1277. OPNAME ## qpel8_mc30_c, \
  1278. OPNAME ## qpel8_mc01_c, \
  1279. OPNAME ## qpel8_mc11_c, \
  1280. OPNAME ## qpel8_mc21_c, \
  1281. OPNAME ## qpel8_mc31_c, \
  1282. OPNAME ## qpel8_mc02_c, \
  1283. OPNAME ## qpel8_mc12_c, \
  1284. OPNAME ## qpel8_mc22_c, \
  1285. OPNAME ## qpel8_mc32_c, \
  1286. OPNAME ## qpel8_mc03_c, \
  1287. OPNAME ## qpel8_mc13_c, \
  1288. OPNAME ## qpel8_mc23_c, \
  1289. OPNAME ## qpel8_mc33_c, \
  1290. }\
  1291. };
  1292. #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
  1293. #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
  1294. #define op_put(a, b) a = cm[((b) + 16)>>5]
  1295. #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
  1296. QPEL_MC(0, put_ , _ , op_put)
  1297. QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
  1298. QPEL_MC(0, avg_ , _ , op_avg)
  1299. //QPEL_MC(1, avg_no_rnd , _ , op_avg)
  1300. #undef op_avg
  1301. #undef op_avg_no_rnd
  1302. #undef op_put
  1303. #undef op_put_no_rnd
  1304. static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1305. {
  1306. int s, i;
  1307. s = 0;
  1308. for(i=0;i<16;i++) {
  1309. s += abs(pix1[0] - pix2[0]);
  1310. s += abs(pix1[1] - pix2[1]);
  1311. s += abs(pix1[2] - pix2[2]);
  1312. s += abs(pix1[3] - pix2[3]);
  1313. s += abs(pix1[4] - pix2[4]);
  1314. s += abs(pix1[5] - pix2[5]);
  1315. s += abs(pix1[6] - pix2[6]);
  1316. s += abs(pix1[7] - pix2[7]);
  1317. s += abs(pix1[8] - pix2[8]);
  1318. s += abs(pix1[9] - pix2[9]);
  1319. s += abs(pix1[10] - pix2[10]);
  1320. s += abs(pix1[11] - pix2[11]);
  1321. s += abs(pix1[12] - pix2[12]);
  1322. s += abs(pix1[13] - pix2[13]);
  1323. s += abs(pix1[14] - pix2[14]);
  1324. s += abs(pix1[15] - pix2[15]);
  1325. pix1 += line_size;
  1326. pix2 += line_size;
  1327. }
  1328. return s;
  1329. }
  1330. static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1331. {
  1332. int s, i;
  1333. s = 0;
  1334. for(i=0;i<16;i++) {
  1335. s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
  1336. s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
  1337. s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
  1338. s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
  1339. s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
  1340. s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
  1341. s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
  1342. s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
  1343. s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
  1344. s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
  1345. s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
  1346. s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
  1347. s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
  1348. s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
  1349. s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
  1350. s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
  1351. pix1 += line_size;
  1352. pix2 += line_size;
  1353. }
  1354. return s;
  1355. }
  1356. static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1357. {
  1358. int s, i;
  1359. UINT8 *pix3 = pix2 + line_size;
  1360. s = 0;
  1361. for(i=0;i<16;i++) {
  1362. s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
  1363. s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
  1364. s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
  1365. s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
  1366. s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
  1367. s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
  1368. s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
  1369. s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
  1370. s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
  1371. s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
  1372. s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
  1373. s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
  1374. s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
  1375. s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
  1376. s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
  1377. s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
  1378. pix1 += line_size;
  1379. pix2 += line_size;
  1380. pix3 += line_size;
  1381. }
  1382. return s;
  1383. }
  1384. static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1385. {
  1386. int s, i;
  1387. UINT8 *pix3 = pix2 + line_size;
  1388. s = 0;
  1389. for(i=0;i<16;i++) {
  1390. s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
  1391. s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
  1392. s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
  1393. s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
  1394. s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
  1395. s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
  1396. s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
  1397. s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
  1398. s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
  1399. s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
  1400. s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
  1401. s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
  1402. s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
  1403. s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
  1404. s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
  1405. s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
  1406. pix1 += line_size;
  1407. pix2 += line_size;
  1408. pix3 += line_size;
  1409. }
  1410. return s;
  1411. }
  1412. static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1413. {
  1414. int s, i;
  1415. s = 0;
  1416. for(i=0;i<8;i++) {
  1417. s += abs(pix1[0] - pix2[0]);
  1418. s += abs(pix1[1] - pix2[1]);
  1419. s += abs(pix1[2] - pix2[2]);
  1420. s += abs(pix1[3] - pix2[3]);
  1421. s += abs(pix1[4] - pix2[4]);
  1422. s += abs(pix1[5] - pix2[5]);
  1423. s += abs(pix1[6] - pix2[6]);
  1424. s += abs(pix1[7] - pix2[7]);
  1425. pix1 += line_size;
  1426. pix2 += line_size;
  1427. }
  1428. return s;
  1429. }
  1430. static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1431. {
  1432. int s, i;
  1433. s = 0;
  1434. for(i=0;i<8;i++) {
  1435. s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
  1436. s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
  1437. s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
  1438. s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
  1439. s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
  1440. s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
  1441. s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
  1442. s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
  1443. pix1 += line_size;
  1444. pix2 += line_size;
  1445. }
  1446. return s;
  1447. }
  1448. static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1449. {
  1450. int s, i;
  1451. UINT8 *pix3 = pix2 + line_size;
  1452. s = 0;
  1453. for(i=0;i<8;i++) {
  1454. s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
  1455. s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
  1456. s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
  1457. s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
  1458. s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
  1459. s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
  1460. s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
  1461. s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
  1462. pix1 += line_size;
  1463. pix2 += line_size;
  1464. pix3 += line_size;
  1465. }
  1466. return s;
  1467. }
  1468. static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
  1469. {
  1470. int s, i;
  1471. UINT8 *pix3 = pix2 + line_size;
  1472. s = 0;
  1473. for(i=0;i<8;i++) {
  1474. s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
  1475. s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
  1476. s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
  1477. s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
  1478. s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
  1479. s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
  1480. s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
  1481. s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
  1482. pix1 += line_size;
  1483. pix2 += line_size;
  1484. pix3 += line_size;
  1485. }
  1486. return s;
  1487. }
  1488. void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last)
  1489. {
  1490. int i;
  1491. INT16 temp[64];
  1492. if(last<=0) return;
  1493. if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
  1494. for(i=0; i<=last; i++){
  1495. const int j= scantable[i];
  1496. temp[j]= block[j];
  1497. block[j]=0;
  1498. }
  1499. for(i=0; i<=last; i++){
  1500. const int j= scantable[i];
  1501. const int perm_j= permutation[j];
  1502. block[perm_j]= temp[j];
  1503. }
  1504. }
  1505. static void clear_blocks_c(DCTELEM *blocks)
  1506. {
  1507. memset(blocks, 0, sizeof(DCTELEM)*6*64);
  1508. }
  1509. void dsputil_init(DSPContext* c, unsigned mask)
  1510. {
  1511. int i;
  1512. for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i;
  1513. for(i=0;i<MAX_NEG_CROP;i++) {
  1514. cropTbl[i] = 0;
  1515. cropTbl[i + MAX_NEG_CROP + 256] = 255;
  1516. }
  1517. for(i=0;i<512;i++) {
  1518. squareTbl[i] = (i - 256) * (i - 256);
  1519. }
  1520. c->get_pixels = get_pixels_c;
  1521. c->diff_pixels = diff_pixels_c;
  1522. c->put_pixels_clamped = put_pixels_clamped_c;
  1523. c->add_pixels_clamped = add_pixels_clamped_c;
  1524. c->gmc1 = gmc1_c;
  1525. c->gmc = gmc_c;
  1526. c->clear_blocks = clear_blocks_c;
  1527. c->pix_sum = pix_sum_c;
  1528. c->pix_norm1 = pix_norm1_c;
  1529. c->pix_abs16x16 = pix_abs16x16_c;
  1530. c->pix_abs16x16_x2 = pix_abs16x16_x2_c;
  1531. c->pix_abs16x16_y2 = pix_abs16x16_y2_c;
  1532. c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
  1533. c->pix_abs8x8 = pix_abs8x8_c;
  1534. c->pix_abs8x8_x2 = pix_abs8x8_x2_c;
  1535. c->pix_abs8x8_y2 = pix_abs8x8_y2_c;
  1536. c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
  1537. c->put_pixels_tab[0][0] = put_pixels16;
  1538. c->put_pixels_tab[0][1] = put_pixels16_x2;
  1539. c->put_pixels_tab[0][2] = put_pixels16_y2;
  1540. c->put_pixels_tab[0][3] = put_pixels16_xy2;
  1541. c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
  1542. c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
  1543. c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
  1544. c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
  1545. c->avg_pixels_tab[0][0] = avg_pixels16;
  1546. c->avg_pixels_tab[0][1] = avg_pixels16_x2;
  1547. c->avg_pixels_tab[0][2] = avg_pixels16_y2;
  1548. c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
  1549. c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
  1550. c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
  1551. c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
  1552. c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
  1553. c->put_pixels_tab[1][0] = put_pixels8;
  1554. c->put_pixels_tab[1][1] = put_pixels8_x2;
  1555. c->put_pixels_tab[1][2] = put_pixels8_y2;
  1556. c->put_pixels_tab[1][3] = put_pixels8_xy2;
  1557. c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
  1558. c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
  1559. c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
  1560. c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
  1561. c->avg_pixels_tab[1][0] = avg_pixels8;
  1562. c->avg_pixels_tab[1][1] = avg_pixels8_x2;
  1563. c->avg_pixels_tab[1][2] = avg_pixels8_y2;
  1564. c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
  1565. c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
  1566. c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
  1567. c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
  1568. c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
  1569. #ifdef HAVE_MMX
  1570. dsputil_init_mmx(c, mask);
  1571. #endif
  1572. #ifdef ARCH_ARMV4L
  1573. dsputil_init_armv4l(c, mask);
  1574. #endif
  1575. #ifdef HAVE_MLIB
  1576. dsputil_init_mlib(c, mask);
  1577. #endif
  1578. #ifdef ARCH_ALPHA
  1579. dsputil_init_alpha(c, mask);
  1580. #endif
  1581. #ifdef ARCH_POWERPC
  1582. dsputil_init_ppc(c, mask);
  1583. #endif
  1584. #ifdef HAVE_MMI
  1585. dsputil_init_mmi(c, mask);
  1586. #endif
  1587. for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
  1588. }
  1589. /* remove any non bit exact operation (testing purpose) */
  1590. void avcodec_set_bit_exact(void)
  1591. {
  1592. ff_bit_exact=1;
  1593. #ifdef HAVE_MMX
  1594. #warning FIXME - set_bit_exact
  1595. // dsputil_set_bit_exact_mmx();
  1596. #endif
  1597. }
  1598. void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3],
  1599. int orig_linesize[3], int coded_linesize,
  1600. AVCodecContext *avctx)
  1601. {
  1602. int quad, diff, x, y;
  1603. UINT8 *orig, *coded;
  1604. UINT32 *sq = squareTbl + 256;
  1605. quad = 0;
  1606. diff = 0;
  1607. /* Luminance */
  1608. orig = orig_image[0];
  1609. coded = coded_image[0];
  1610. for (y=0;y<avctx->height;y++) {
  1611. for (x=0;x<avctx->width;x++) {
  1612. diff = *(orig + x) - *(coded + x);
  1613. quad += sq[diff];
  1614. }
  1615. orig += orig_linesize[0];
  1616. coded += coded_linesize;
  1617. }
  1618. avctx->psnr_y = (float) quad / (float) (avctx->width * avctx->height);
  1619. if (avctx->psnr_y) {
  1620. avctx->psnr_y = (float) (255 * 255) / avctx->psnr_y;
  1621. avctx->psnr_y = 10 * (float) log10 (avctx->psnr_y);
  1622. } else
  1623. avctx->psnr_y = 99.99;
  1624. }