You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1340 lines
44KB

  1. /*
  2. * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  3. * Copyright (c) 2003-2011 Michael Niedermayer <michaelni@gmx.at>
  4. *
  5. * This file is part of FFmpeg.
  6. *
  7. * FFmpeg is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * FFmpeg is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with FFmpeg; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. /**
  22. * @file
  23. * H.264 / AVC / MPEG4 part10 prediction functions.
  24. * @author Michael Niedermayer <michaelni@gmx.at>
  25. */
  26. #include "mathops.h"
  27. #include "h264_high_depth.h"
  28. static void FUNCC(pred4x4_vertical)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  29. pixel *src = (pixel*)p_src;
  30. int stride = p_stride>>(sizeof(pixel)-1);
  31. const pixel4 a= ((pixel4*)(src-stride))[0];
  32. ((pixel4*)(src+0*stride))[0]= a;
  33. ((pixel4*)(src+1*stride))[0]= a;
  34. ((pixel4*)(src+2*stride))[0]= a;
  35. ((pixel4*)(src+3*stride))[0]= a;
  36. }
  37. static void FUNCC(pred4x4_horizontal)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  38. pixel *src = (pixel*)p_src;
  39. int stride = p_stride>>(sizeof(pixel)-1);
  40. ((pixel4*)(src+0*stride))[0]= PIXEL_SPLAT_X4(src[-1+0*stride]);
  41. ((pixel4*)(src+1*stride))[0]= PIXEL_SPLAT_X4(src[-1+1*stride]);
  42. ((pixel4*)(src+2*stride))[0]= PIXEL_SPLAT_X4(src[-1+2*stride]);
  43. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(src[-1+3*stride]);
  44. }
  45. static void FUNCC(pred4x4_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  46. pixel *src = (pixel*)p_src;
  47. int stride = p_stride>>(sizeof(pixel)-1);
  48. const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
  49. + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
  50. ((pixel4*)(src+0*stride))[0]=
  51. ((pixel4*)(src+1*stride))[0]=
  52. ((pixel4*)(src+2*stride))[0]=
  53. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
  54. }
  55. static void FUNCC(pred4x4_left_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  56. pixel *src = (pixel*)p_src;
  57. int stride = p_stride>>(sizeof(pixel)-1);
  58. const int dc= ( src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
  59. ((pixel4*)(src+0*stride))[0]=
  60. ((pixel4*)(src+1*stride))[0]=
  61. ((pixel4*)(src+2*stride))[0]=
  62. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
  63. }
  64. static void FUNCC(pred4x4_top_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  65. pixel *src = (pixel*)p_src;
  66. int stride = p_stride>>(sizeof(pixel)-1);
  67. const int dc= ( src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
  68. ((pixel4*)(src+0*stride))[0]=
  69. ((pixel4*)(src+1*stride))[0]=
  70. ((pixel4*)(src+2*stride))[0]=
  71. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(dc);
  72. }
  73. static void FUNCC(pred4x4_128_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  74. pixel *src = (pixel*)p_src;
  75. int stride = p_stride>>(sizeof(pixel)-1);
  76. ((pixel4*)(src+0*stride))[0]=
  77. ((pixel4*)(src+1*stride))[0]=
  78. ((pixel4*)(src+2*stride))[0]=
  79. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1));
  80. }
  81. static void FUNCC(pred4x4_127_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  82. pixel *src = (pixel*)p_src;
  83. int stride = p_stride>>(sizeof(pixel)-1);
  84. ((pixel4*)(src+0*stride))[0]=
  85. ((pixel4*)(src+1*stride))[0]=
  86. ((pixel4*)(src+2*stride))[0]=
  87. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))-1);
  88. }
  89. static void FUNCC(pred4x4_129_dc)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  90. pixel *src = (pixel*)p_src;
  91. int stride = p_stride>>(sizeof(pixel)-1);
  92. ((pixel4*)(src+0*stride))[0]=
  93. ((pixel4*)(src+1*stride))[0]=
  94. ((pixel4*)(src+2*stride))[0]=
  95. ((pixel4*)(src+3*stride))[0]= PIXEL_SPLAT_X4((1<<(BIT_DEPTH-1))+1);
  96. }
  97. #define LOAD_TOP_RIGHT_EDGE\
  98. const int av_unused t4= topright[0];\
  99. const int av_unused t5= topright[1];\
  100. const int av_unused t6= topright[2];\
  101. const int av_unused t7= topright[3];\
  102. #define LOAD_DOWN_LEFT_EDGE\
  103. const int av_unused l4= src[-1+4*stride];\
  104. const int av_unused l5= src[-1+5*stride];\
  105. const int av_unused l6= src[-1+6*stride];\
  106. const int av_unused l7= src[-1+7*stride];\
  107. #define LOAD_LEFT_EDGE\
  108. const int av_unused l0= src[-1+0*stride];\
  109. const int av_unused l1= src[-1+1*stride];\
  110. const int av_unused l2= src[-1+2*stride];\
  111. const int av_unused l3= src[-1+3*stride];\
  112. #define LOAD_TOP_EDGE\
  113. const int av_unused t0= src[ 0-1*stride];\
  114. const int av_unused t1= src[ 1-1*stride];\
  115. const int av_unused t2= src[ 2-1*stride];\
  116. const int av_unused t3= src[ 3-1*stride];\
  117. static void FUNCC(pred4x4_vertical_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  118. pixel *src = (pixel*)p_src;
  119. const pixel *topright = (const pixel*)p_topright;
  120. int stride = p_stride>>(sizeof(pixel)-1);
  121. const int lt= src[-1-1*stride];
  122. LOAD_TOP_EDGE
  123. LOAD_TOP_RIGHT_EDGE
  124. pixel4 v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
  125. (t0 + 2*t1 + t2 + 2) >> 2,
  126. (t1 + 2*t2 + t3 + 2) >> 2,
  127. (t2 + 2*t3 + t4 + 2) >> 2);
  128. AV_WN4PA(src+0*stride, v);
  129. AV_WN4PA(src+1*stride, v);
  130. AV_WN4PA(src+2*stride, v);
  131. AV_WN4PA(src+3*stride, v);
  132. }
  133. static void FUNCC(pred4x4_horizontal_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  134. pixel *src = (pixel*)p_src;
  135. int stride = p_stride>>(sizeof(pixel)-1);
  136. const int lt= src[-1-1*stride];
  137. LOAD_LEFT_EDGE
  138. AV_WN4PA(src+0*stride, PIXEL_SPLAT_X4((lt + 2*l0 + l1 + 2) >> 2));
  139. AV_WN4PA(src+1*stride, PIXEL_SPLAT_X4((l0 + 2*l1 + l2 + 2) >> 2));
  140. AV_WN4PA(src+2*stride, PIXEL_SPLAT_X4((l1 + 2*l2 + l3 + 2) >> 2));
  141. AV_WN4PA(src+3*stride, PIXEL_SPLAT_X4((l2 + 2*l3 + l3 + 2) >> 2));
  142. }
  143. static void FUNCC(pred4x4_down_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  144. pixel *src = (pixel*)p_src;
  145. int stride = p_stride>>(sizeof(pixel)-1);
  146. const int lt= src[-1-1*stride];
  147. LOAD_TOP_EDGE
  148. LOAD_LEFT_EDGE
  149. src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
  150. src[0+2*stride]=
  151. src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
  152. src[0+1*stride]=
  153. src[1+2*stride]=
  154. src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
  155. src[0+0*stride]=
  156. src[1+1*stride]=
  157. src[2+2*stride]=
  158. src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
  159. src[1+0*stride]=
  160. src[2+1*stride]=
  161. src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
  162. src[2+0*stride]=
  163. src[3+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
  164. src[3+0*stride]=(t1 + 2*t2 + t3 + 2)>>2;
  165. }
  166. static void FUNCC(pred4x4_down_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  167. pixel *src = (pixel*)p_src;
  168. const pixel *topright = (const pixel*)p_topright;
  169. int stride = p_stride>>(sizeof(pixel)-1);
  170. LOAD_TOP_EDGE
  171. LOAD_TOP_RIGHT_EDGE
  172. // LOAD_LEFT_EDGE
  173. src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
  174. src[1+0*stride]=
  175. src[0+1*stride]=(t1 + t3 + 2*t2 + 2)>>2;
  176. src[2+0*stride]=
  177. src[1+1*stride]=
  178. src[0+2*stride]=(t2 + t4 + 2*t3 + 2)>>2;
  179. src[3+0*stride]=
  180. src[2+1*stride]=
  181. src[1+2*stride]=
  182. src[0+3*stride]=(t3 + t5 + 2*t4 + 2)>>2;
  183. src[3+1*stride]=
  184. src[2+2*stride]=
  185. src[1+3*stride]=(t4 + t6 + 2*t5 + 2)>>2;
  186. src[3+2*stride]=
  187. src[2+3*stride]=(t5 + t7 + 2*t6 + 2)>>2;
  188. src[3+3*stride]=(t6 + 3*t7 + 2)>>2;
  189. }
  190. static void FUNCC(pred4x4_down_left_svq3)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  191. pixel *src = (pixel*)p_src;
  192. int stride = p_stride>>(sizeof(pixel)-1);
  193. LOAD_TOP_EDGE
  194. LOAD_LEFT_EDGE
  195. const av_unused int unu0= t0;
  196. const av_unused int unu1= l0;
  197. src[0+0*stride]=(l1 + t1)>>1;
  198. src[1+0*stride]=
  199. src[0+1*stride]=(l2 + t2)>>1;
  200. src[2+0*stride]=
  201. src[1+1*stride]=
  202. src[0+2*stride]=
  203. src[3+0*stride]=
  204. src[2+1*stride]=
  205. src[1+2*stride]=
  206. src[0+3*stride]=
  207. src[3+1*stride]=
  208. src[2+2*stride]=
  209. src[1+3*stride]=
  210. src[3+2*stride]=
  211. src[2+3*stride]=
  212. src[3+3*stride]=(l3 + t3)>>1;
  213. }
  214. static void FUNCC(pred4x4_down_left_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  215. pixel *src = (pixel*)p_src;
  216. const pixel *topright = (const pixel*)p_topright;
  217. int stride = p_stride>>(sizeof(pixel)-1);
  218. LOAD_TOP_EDGE
  219. LOAD_TOP_RIGHT_EDGE
  220. LOAD_LEFT_EDGE
  221. LOAD_DOWN_LEFT_EDGE
  222. src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
  223. src[1+0*stride]=
  224. src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
  225. src[2+0*stride]=
  226. src[1+1*stride]=
  227. src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + l4 + 2*l3 + 2)>>3;
  228. src[3+0*stride]=
  229. src[2+1*stride]=
  230. src[1+2*stride]=
  231. src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3 + l5 + 2*l4 + 2)>>3;
  232. src[3+1*stride]=
  233. src[2+2*stride]=
  234. src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l4 + l6 + 2*l5 + 2)>>3;
  235. src[3+2*stride]=
  236. src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l5 + l7 + 2*l6 + 2)>>3;
  237. src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
  238. }
  239. static void FUNCC(pred4x4_down_left_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  240. pixel *src = (pixel*)p_src;
  241. const pixel *topright = (const pixel*)p_topright;
  242. int stride = p_stride>>(sizeof(pixel)-1);
  243. LOAD_TOP_EDGE
  244. LOAD_TOP_RIGHT_EDGE
  245. LOAD_LEFT_EDGE
  246. src[0+0*stride]=(t0 + t2 + 2*t1 + 2 + l0 + l2 + 2*l1 + 2)>>3;
  247. src[1+0*stride]=
  248. src[0+1*stride]=(t1 + t3 + 2*t2 + 2 + l1 + l3 + 2*l2 + 2)>>3;
  249. src[2+0*stride]=
  250. src[1+1*stride]=
  251. src[0+2*stride]=(t2 + t4 + 2*t3 + 2 + l2 + 3*l3 + 2)>>3;
  252. src[3+0*stride]=
  253. src[2+1*stride]=
  254. src[1+2*stride]=
  255. src[0+3*stride]=(t3 + t5 + 2*t4 + 2 + l3*4 + 2)>>3;
  256. src[3+1*stride]=
  257. src[2+2*stride]=
  258. src[1+3*stride]=(t4 + t6 + 2*t5 + 2 + l3*4 + 2)>>3;
  259. src[3+2*stride]=
  260. src[2+3*stride]=(t5 + t7 + 2*t6 + 2 + l3*4 + 2)>>3;
  261. src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
  262. }
  263. static void FUNCC(pred4x4_vertical_right)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  264. pixel *src = (pixel*)p_src;
  265. int stride = p_stride>>(sizeof(pixel)-1);
  266. const int lt= src[-1-1*stride];
  267. LOAD_TOP_EDGE
  268. LOAD_LEFT_EDGE
  269. src[0+0*stride]=
  270. src[1+2*stride]=(lt + t0 + 1)>>1;
  271. src[1+0*stride]=
  272. src[2+2*stride]=(t0 + t1 + 1)>>1;
  273. src[2+0*stride]=
  274. src[3+2*stride]=(t1 + t2 + 1)>>1;
  275. src[3+0*stride]=(t2 + t3 + 1)>>1;
  276. src[0+1*stride]=
  277. src[1+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
  278. src[1+1*stride]=
  279. src[2+3*stride]=(lt + 2*t0 + t1 + 2)>>2;
  280. src[2+1*stride]=
  281. src[3+3*stride]=(t0 + 2*t1 + t2 + 2)>>2;
  282. src[3+1*stride]=(t1 + 2*t2 + t3 + 2)>>2;
  283. src[0+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
  284. src[0+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
  285. }
  286. static void FUNCC(pred4x4_vertical_left)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  287. pixel *src = (pixel*)p_src;
  288. const pixel *topright = (const pixel*)p_topright;
  289. int stride = p_stride>>(sizeof(pixel)-1);
  290. LOAD_TOP_EDGE
  291. LOAD_TOP_RIGHT_EDGE
  292. src[0+0*stride]=(t0 + t1 + 1)>>1;
  293. src[1+0*stride]=
  294. src[0+2*stride]=(t1 + t2 + 1)>>1;
  295. src[2+0*stride]=
  296. src[1+2*stride]=(t2 + t3 + 1)>>1;
  297. src[3+0*stride]=
  298. src[2+2*stride]=(t3 + t4+ 1)>>1;
  299. src[3+2*stride]=(t4 + t5+ 1)>>1;
  300. src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
  301. src[1+1*stride]=
  302. src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
  303. src[2+1*stride]=
  304. src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
  305. src[3+1*stride]=
  306. src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
  307. src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
  308. }
  309. static void FUNCC(pred4x4_vertical_left_rv40_internal)(uint8_t *p_src, const uint8_t *p_topright, int p_stride,
  310. const int l0, const int l1, const int l2, const int l3, const int l4){
  311. pixel *src = (pixel*)p_src;
  312. const pixel *topright = (const pixel*)p_topright;
  313. int stride = p_stride>>(sizeof(pixel)-1);
  314. LOAD_TOP_EDGE
  315. LOAD_TOP_RIGHT_EDGE
  316. src[0+0*stride]=(2*t0 + 2*t1 + l1 + 2*l2 + l3 + 4)>>3;
  317. src[1+0*stride]=
  318. src[0+2*stride]=(t1 + t2 + 1)>>1;
  319. src[2+0*stride]=
  320. src[1+2*stride]=(t2 + t3 + 1)>>1;
  321. src[3+0*stride]=
  322. src[2+2*stride]=(t3 + t4+ 1)>>1;
  323. src[3+2*stride]=(t4 + t5+ 1)>>1;
  324. src[0+1*stride]=(t0 + 2*t1 + t2 + l2 + 2*l3 + l4 + 4)>>3;
  325. src[1+1*stride]=
  326. src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
  327. src[2+1*stride]=
  328. src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
  329. src[3+1*stride]=
  330. src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
  331. src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
  332. }
  333. static void FUNCC(pred4x4_vertical_left_rv40)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  334. pixel *src = (pixel*)p_src;
  335. int stride = p_stride>>(sizeof(pixel)-1);
  336. LOAD_LEFT_EDGE
  337. LOAD_DOWN_LEFT_EDGE
  338. FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l4);
  339. }
  340. static void FUNCC(pred4x4_vertical_left_rv40_nodown)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  341. pixel *src = (pixel*)p_src;
  342. int stride = p_stride>>(sizeof(pixel)-1);
  343. LOAD_LEFT_EDGE
  344. FUNCC(pred4x4_vertical_left_rv40_internal)(p_src, topright, p_stride, l0, l1, l2, l3, l3);
  345. }
  346. static void FUNCC(pred4x4_vertical_left_vp8)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  347. pixel *src = (pixel*)p_src;
  348. const pixel *topright = (const pixel*)p_topright;
  349. int stride = p_stride>>(sizeof(pixel)-1);
  350. LOAD_TOP_EDGE
  351. LOAD_TOP_RIGHT_EDGE
  352. src[0+0*stride]=(t0 + t1 + 1)>>1;
  353. src[1+0*stride]=
  354. src[0+2*stride]=(t1 + t2 + 1)>>1;
  355. src[2+0*stride]=
  356. src[1+2*stride]=(t2 + t3 + 1)>>1;
  357. src[3+0*stride]=
  358. src[2+2*stride]=(t3 + t4 + 1)>>1;
  359. src[0+1*stride]=(t0 + 2*t1 + t2 + 2)>>2;
  360. src[1+1*stride]=
  361. src[0+3*stride]=(t1 + 2*t2 + t3 + 2)>>2;
  362. src[2+1*stride]=
  363. src[1+3*stride]=(t2 + 2*t3 + t4 + 2)>>2;
  364. src[3+1*stride]=
  365. src[2+3*stride]=(t3 + 2*t4 + t5 + 2)>>2;
  366. src[3+2*stride]=(t4 + 2*t5 + t6 + 2)>>2;
  367. src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
  368. }
  369. static void FUNCC(pred4x4_horizontal_up)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  370. pixel *src = (pixel*)p_src;
  371. int stride = p_stride>>(sizeof(pixel)-1);
  372. LOAD_LEFT_EDGE
  373. src[0+0*stride]=(l0 + l1 + 1)>>1;
  374. src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
  375. src[2+0*stride]=
  376. src[0+1*stride]=(l1 + l2 + 1)>>1;
  377. src[3+0*stride]=
  378. src[1+1*stride]=(l1 + 2*l2 + l3 + 2)>>2;
  379. src[2+1*stride]=
  380. src[0+2*stride]=(l2 + l3 + 1)>>1;
  381. src[3+1*stride]=
  382. src[1+2*stride]=(l2 + 2*l3 + l3 + 2)>>2;
  383. src[3+2*stride]=
  384. src[1+3*stride]=
  385. src[0+3*stride]=
  386. src[2+2*stride]=
  387. src[2+3*stride]=
  388. src[3+3*stride]=l3;
  389. }
  390. static void FUNCC(pred4x4_horizontal_up_rv40)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  391. pixel *src = (pixel*)p_src;
  392. const pixel *topright = (const pixel*)p_topright;
  393. int stride = p_stride>>(sizeof(pixel)-1);
  394. LOAD_LEFT_EDGE
  395. LOAD_DOWN_LEFT_EDGE
  396. LOAD_TOP_EDGE
  397. LOAD_TOP_RIGHT_EDGE
  398. src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
  399. src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
  400. src[2+0*stride]=
  401. src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
  402. src[3+0*stride]=
  403. src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
  404. src[2+1*stride]=
  405. src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
  406. src[3+1*stride]=
  407. src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
  408. src[3+2*stride]=
  409. src[1+3*stride]=(l3 + 2*l4 + l5 + 2)>>2;
  410. src[0+3*stride]=
  411. src[2+2*stride]=(t6 + t7 + l3 + l4 + 2)>>2;
  412. src[2+3*stride]=(l4 + l5 + 1)>>1;
  413. src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
  414. }
  415. static void FUNCC(pred4x4_horizontal_up_rv40_nodown)(uint8_t *p_src, const uint8_t *p_topright, int p_stride){
  416. pixel *src = (pixel*)p_src;
  417. const pixel *topright = (const pixel*)p_topright;
  418. int stride = p_stride>>(sizeof(pixel)-1);
  419. LOAD_LEFT_EDGE
  420. LOAD_TOP_EDGE
  421. LOAD_TOP_RIGHT_EDGE
  422. src[0+0*stride]=(t1 + 2*t2 + t3 + 2*l0 + 2*l1 + 4)>>3;
  423. src[1+0*stride]=(t2 + 2*t3 + t4 + l0 + 2*l1 + l2 + 4)>>3;
  424. src[2+0*stride]=
  425. src[0+1*stride]=(t3 + 2*t4 + t5 + 2*l1 + 2*l2 + 4)>>3;
  426. src[3+0*stride]=
  427. src[1+1*stride]=(t4 + 2*t5 + t6 + l1 + 2*l2 + l3 + 4)>>3;
  428. src[2+1*stride]=
  429. src[0+2*stride]=(t5 + 2*t6 + t7 + 2*l2 + 2*l3 + 4)>>3;
  430. src[3+1*stride]=
  431. src[1+2*stride]=(t6 + 3*t7 + l2 + 3*l3 + 4)>>3;
  432. src[3+2*stride]=
  433. src[1+3*stride]=l3;
  434. src[0+3*stride]=
  435. src[2+2*stride]=(t6 + t7 + 2*l3 + 2)>>2;
  436. src[2+3*stride]=
  437. src[3+3*stride]=l3;
  438. }
  439. static void FUNCC(pred4x4_horizontal_down)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  440. pixel *src = (pixel*)p_src;
  441. int stride = p_stride>>(sizeof(pixel)-1);
  442. const int lt= src[-1-1*stride];
  443. LOAD_TOP_EDGE
  444. LOAD_LEFT_EDGE
  445. src[0+0*stride]=
  446. src[2+1*stride]=(lt + l0 + 1)>>1;
  447. src[1+0*stride]=
  448. src[3+1*stride]=(l0 + 2*lt + t0 + 2)>>2;
  449. src[2+0*stride]=(lt + 2*t0 + t1 + 2)>>2;
  450. src[3+0*stride]=(t0 + 2*t1 + t2 + 2)>>2;
  451. src[0+1*stride]=
  452. src[2+2*stride]=(l0 + l1 + 1)>>1;
  453. src[1+1*stride]=
  454. src[3+2*stride]=(lt + 2*l0 + l1 + 2)>>2;
  455. src[0+2*stride]=
  456. src[2+3*stride]=(l1 + l2+ 1)>>1;
  457. src[1+2*stride]=
  458. src[3+3*stride]=(l0 + 2*l1 + l2 + 2)>>2;
  459. src[0+3*stride]=(l2 + l3 + 1)>>1;
  460. src[1+3*stride]=(l1 + 2*l2 + l3 + 2)>>2;
  461. }
  462. static void FUNCC(pred4x4_tm_vp8)(uint8_t *p_src, const uint8_t *topright, int p_stride){
  463. pixel *src = (pixel*)p_src;
  464. int stride = p_stride>>(sizeof(pixel)-1);
  465. uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
  466. pixel *top = src-stride;
  467. int y;
  468. for (y = 0; y < 4; y++) {
  469. uint8_t *cm_in = cm + src[-1];
  470. src[0] = cm_in[top[0]];
  471. src[1] = cm_in[top[1]];
  472. src[2] = cm_in[top[2]];
  473. src[3] = cm_in[top[3]];
  474. src += stride;
  475. }
  476. }
  477. static void FUNCC(pred16x16_vertical)(uint8_t *p_src, int p_stride){
  478. int i;
  479. pixel *src = (pixel*)p_src;
  480. int stride = p_stride>>(sizeof(pixel)-1);
  481. const pixel4 a = ((pixel4*)(src-stride))[0];
  482. const pixel4 b = ((pixel4*)(src-stride))[1];
  483. const pixel4 c = ((pixel4*)(src-stride))[2];
  484. const pixel4 d = ((pixel4*)(src-stride))[3];
  485. for(i=0; i<16; i++){
  486. ((pixel4*)(src+i*stride))[0] = a;
  487. ((pixel4*)(src+i*stride))[1] = b;
  488. ((pixel4*)(src+i*stride))[2] = c;
  489. ((pixel4*)(src+i*stride))[3] = d;
  490. }
  491. }
  492. static void FUNCC(pred16x16_horizontal)(uint8_t *p_src, int stride){
  493. int i;
  494. pixel *src = (pixel*)p_src;
  495. stride >>= sizeof(pixel)-1;
  496. for(i=0; i<16; i++){
  497. ((pixel4*)(src+i*stride))[0] =
  498. ((pixel4*)(src+i*stride))[1] =
  499. ((pixel4*)(src+i*stride))[2] =
  500. ((pixel4*)(src+i*stride))[3] = PIXEL_SPLAT_X4(src[-1+i*stride]);
  501. }
  502. }
  503. #define PREDICT_16x16_DC(v)\
  504. for(i=0; i<16; i++){\
  505. AV_WN4P(src+ 0, v);\
  506. AV_WN4P(src+ 4, v);\
  507. AV_WN4P(src+ 8, v);\
  508. AV_WN4P(src+12, v);\
  509. src += stride;\
  510. }
  511. static void FUNCC(pred16x16_dc)(uint8_t *p_src, int stride){
  512. int i, dc=0;
  513. pixel *src = (pixel*)p_src;
  514. pixel4 dcsplat;
  515. stride >>= sizeof(pixel)-1;
  516. for(i=0;i<16; i++){
  517. dc+= src[-1+i*stride];
  518. }
  519. for(i=0;i<16; i++){
  520. dc+= src[i-stride];
  521. }
  522. dcsplat = PIXEL_SPLAT_X4((dc+16)>>5);
  523. PREDICT_16x16_DC(dcsplat);
  524. }
  525. static void FUNCC(pred16x16_left_dc)(uint8_t *p_src, int stride){
  526. int i, dc=0;
  527. pixel *src = (pixel*)p_src;
  528. pixel4 dcsplat;
  529. stride >>= sizeof(pixel)-1;
  530. for(i=0;i<16; i++){
  531. dc+= src[-1+i*stride];
  532. }
  533. dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
  534. PREDICT_16x16_DC(dcsplat);
  535. }
  536. static void FUNCC(pred16x16_top_dc)(uint8_t *p_src, int stride){
  537. int i, dc=0;
  538. pixel *src = (pixel*)p_src;
  539. pixel4 dcsplat;
  540. stride >>= sizeof(pixel)-1;
  541. for(i=0;i<16; i++){
  542. dc+= src[i-stride];
  543. }
  544. dcsplat = PIXEL_SPLAT_X4((dc+8)>>4);
  545. PREDICT_16x16_DC(dcsplat);
  546. }
  547. #define PRED16x16_X(n, v) \
  548. static void FUNCC(pred16x16_##n##_dc)(uint8_t *p_src, int stride){\
  549. int i;\
  550. pixel *src = (pixel*)p_src;\
  551. stride >>= sizeof(pixel)-1;\
  552. PREDICT_16x16_DC(PIXEL_SPLAT_X4(v));\
  553. }
  554. PRED16x16_X(127, (1<<(BIT_DEPTH-1))-1);
  555. PRED16x16_X(128, (1<<(BIT_DEPTH-1))+0);
  556. PRED16x16_X(129, (1<<(BIT_DEPTH-1))+1);
  557. static inline void FUNCC(pred16x16_plane_compat)(uint8_t *p_src, int p_stride, const int svq3, const int rv40){
  558. int i, j, k;
  559. int a;
  560. INIT_CLIP
  561. pixel *src = (pixel*)p_src;
  562. int stride = p_stride>>(sizeof(pixel)-1);
  563. const pixel * const src0 = src +7-stride;
  564. const pixel * src1 = src +8*stride-1;
  565. const pixel * src2 = src1-2*stride; // == src+6*stride-1;
  566. int H = src0[1] - src0[-1];
  567. int V = src1[0] - src2[ 0];
  568. for(k=2; k<=8; ++k) {
  569. src1 += stride; src2 -= stride;
  570. H += k*(src0[k] - src0[-k]);
  571. V += k*(src1[0] - src2[ 0]);
  572. }
  573. if(svq3){
  574. H = ( 5*(H/4) ) / 16;
  575. V = ( 5*(V/4) ) / 16;
  576. /* required for 100% accuracy */
  577. i = H; H = V; V = i;
  578. }else if(rv40){
  579. H = ( H + (H>>2) ) >> 4;
  580. V = ( V + (V>>2) ) >> 4;
  581. }else{
  582. H = ( 5*H+32 ) >> 6;
  583. V = ( 5*V+32 ) >> 6;
  584. }
  585. a = 16*(src1[0] + src2[16] + 1) - 7*(V+H);
  586. for(j=16; j>0; --j) {
  587. int b = a;
  588. a += V;
  589. for(i=-16; i<0; i+=4) {
  590. src[16+i] = CLIP((b ) >> 5);
  591. src[17+i] = CLIP((b+ H) >> 5);
  592. src[18+i] = CLIP((b+2*H) >> 5);
  593. src[19+i] = CLIP((b+3*H) >> 5);
  594. b += 4*H;
  595. }
  596. src += stride;
  597. }
  598. }
  599. static void FUNCC(pred16x16_plane)(uint8_t *src, int stride){
  600. FUNCC(pred16x16_plane_compat)(src, stride, 0, 0);
  601. }
  602. static void FUNCC(pred16x16_plane_svq3)(uint8_t *src, int stride){
  603. FUNCC(pred16x16_plane_compat)(src, stride, 1, 0);
  604. }
  605. static void FUNCC(pred16x16_plane_rv40)(uint8_t *src, int stride){
  606. FUNCC(pred16x16_plane_compat)(src, stride, 0, 1);
  607. }
  608. static void FUNCC(pred16x16_tm_vp8)(uint8_t *src, int stride){
  609. uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
  610. uint8_t *top = src-stride;
  611. int y;
  612. for (y = 0; y < 16; y++) {
  613. uint8_t *cm_in = cm + src[-1];
  614. src[0] = cm_in[top[0]];
  615. src[1] = cm_in[top[1]];
  616. src[2] = cm_in[top[2]];
  617. src[3] = cm_in[top[3]];
  618. src[4] = cm_in[top[4]];
  619. src[5] = cm_in[top[5]];
  620. src[6] = cm_in[top[6]];
  621. src[7] = cm_in[top[7]];
  622. src[8] = cm_in[top[8]];
  623. src[9] = cm_in[top[9]];
  624. src[10] = cm_in[top[10]];
  625. src[11] = cm_in[top[11]];
  626. src[12] = cm_in[top[12]];
  627. src[13] = cm_in[top[13]];
  628. src[14] = cm_in[top[14]];
  629. src[15] = cm_in[top[15]];
  630. src += stride;
  631. }
  632. }
  633. static void FUNCC(pred8x8_vertical)(uint8_t *p_src, int p_stride){
  634. int i;
  635. pixel *src = (pixel*)p_src;
  636. int stride = p_stride>>(sizeof(pixel)-1);
  637. const pixel4 a= ((pixel4*)(src-stride))[0];
  638. const pixel4 b= ((pixel4*)(src-stride))[1];
  639. for(i=0; i<8; i++){
  640. ((pixel4*)(src+i*stride))[0]= a;
  641. ((pixel4*)(src+i*stride))[1]= b;
  642. }
  643. }
  644. static void FUNCC(pred8x8_horizontal)(uint8_t *p_src, int stride){
  645. int i;
  646. pixel *src = (pixel*)p_src;
  647. stride >>= sizeof(pixel)-1;
  648. for(i=0; i<8; i++){
  649. ((pixel4*)(src+i*stride))[0]=
  650. ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(src[-1+i*stride]);
  651. }
  652. }
  653. #define PRED8x8_X(n, v)\
  654. static void FUNCC(pred8x8_##n##_dc)(uint8_t *p_src, int stride){\
  655. int i;\
  656. pixel *src = (pixel*)p_src;\
  657. stride >>= sizeof(pixel)-1;\
  658. for(i=0; i<8; i++){\
  659. ((pixel4*)(src+i*stride))[0]=\
  660. ((pixel4*)(src+i*stride))[1]= PIXEL_SPLAT_X4(v);\
  661. }\
  662. }
  663. PRED8x8_X(127, (1<<(BIT_DEPTH-1))-1);
  664. PRED8x8_X(128, (1<<(BIT_DEPTH-1))+0);
  665. PRED8x8_X(129, (1<<(BIT_DEPTH-1))+1);
  666. static void FUNCC(pred8x8_left_dc)(uint8_t *p_src, int stride){
  667. int i;
  668. int dc0, dc2;
  669. pixel4 dc0splat, dc2splat;
  670. pixel *src = (pixel*)p_src;
  671. stride >>= sizeof(pixel)-1;
  672. dc0=dc2=0;
  673. for(i=0;i<4; i++){
  674. dc0+= src[-1+i*stride];
  675. dc2+= src[-1+(i+4)*stride];
  676. }
  677. dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
  678. dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
  679. for(i=0; i<4; i++){
  680. ((pixel4*)(src+i*stride))[0]=
  681. ((pixel4*)(src+i*stride))[1]= dc0splat;
  682. }
  683. for(i=4; i<8; i++){
  684. ((pixel4*)(src+i*stride))[0]=
  685. ((pixel4*)(src+i*stride))[1]= dc2splat;
  686. }
  687. }
  688. static void FUNCC(pred8x8_left_dc_rv40)(uint8_t *p_src, int stride){
  689. int i;
  690. int dc0;
  691. pixel4 dc0splat;
  692. pixel *src = (pixel*)p_src;
  693. stride >>= sizeof(pixel)-1;
  694. dc0=0;
  695. for(i=0;i<8; i++)
  696. dc0+= src[-1+i*stride];
  697. dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
  698. for(i=0; i<8; i++){
  699. ((pixel4*)(src+i*stride))[0]=
  700. ((pixel4*)(src+i*stride))[1]= dc0splat;
  701. }
  702. }
  703. static void FUNCC(pred8x8_top_dc)(uint8_t *p_src, int stride){
  704. int i;
  705. int dc0, dc1;
  706. pixel4 dc0splat, dc1splat;
  707. pixel *src = (pixel*)p_src;
  708. stride >>= sizeof(pixel)-1;
  709. dc0=dc1=0;
  710. for(i=0;i<4; i++){
  711. dc0+= src[i-stride];
  712. dc1+= src[4+i-stride];
  713. }
  714. dc0splat = PIXEL_SPLAT_X4((dc0 + 2)>>2);
  715. dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
  716. for(i=0; i<4; i++){
  717. ((pixel4*)(src+i*stride))[0]= dc0splat;
  718. ((pixel4*)(src+i*stride))[1]= dc1splat;
  719. }
  720. for(i=4; i<8; i++){
  721. ((pixel4*)(src+i*stride))[0]= dc0splat;
  722. ((pixel4*)(src+i*stride))[1]= dc1splat;
  723. }
  724. }
  725. static void FUNCC(pred8x8_top_dc_rv40)(uint8_t *p_src, int stride){
  726. int i;
  727. int dc0;
  728. pixel4 dc0splat;
  729. pixel *src = (pixel*)p_src;
  730. stride >>= sizeof(pixel)-1;
  731. dc0=0;
  732. for(i=0;i<8; i++)
  733. dc0+= src[i-stride];
  734. dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
  735. for(i=0; i<8; i++){
  736. ((pixel4*)(src+i*stride))[0]=
  737. ((pixel4*)(src+i*stride))[1]= dc0splat;
  738. }
  739. }
  740. static void FUNCC(pred8x8_dc)(uint8_t *p_src, int stride){
  741. int i;
  742. int dc0, dc1, dc2;
  743. pixel4 dc0splat, dc1splat, dc2splat, dc3splat;
  744. pixel *src = (pixel*)p_src;
  745. stride >>= sizeof(pixel)-1;
  746. dc0=dc1=dc2=0;
  747. for(i=0;i<4; i++){
  748. dc0+= src[-1+i*stride] + src[i-stride];
  749. dc1+= src[4+i-stride];
  750. dc2+= src[-1+(i+4)*stride];
  751. }
  752. dc0splat = PIXEL_SPLAT_X4((dc0 + 4)>>3);
  753. dc1splat = PIXEL_SPLAT_X4((dc1 + 2)>>2);
  754. dc2splat = PIXEL_SPLAT_X4((dc2 + 2)>>2);
  755. dc3splat = PIXEL_SPLAT_X4((dc1 + dc2 + 4)>>3);
  756. for(i=0; i<4; i++){
  757. ((pixel4*)(src+i*stride))[0]= dc0splat;
  758. ((pixel4*)(src+i*stride))[1]= dc1splat;
  759. }
  760. for(i=4; i<8; i++){
  761. ((pixel4*)(src+i*stride))[0]= dc2splat;
  762. ((pixel4*)(src+i*stride))[1]= dc3splat;
  763. }
  764. }
  765. //the following 4 function should not be optimized!
  766. static void FUNC(pred8x8_mad_cow_dc_l0t)(uint8_t *src, int stride){
  767. FUNCC(pred8x8_top_dc)(src, stride);
  768. FUNCC(pred4x4_dc)(src, NULL, stride);
  769. }
  770. static void FUNC(pred8x8_mad_cow_dc_0lt)(uint8_t *src, int stride){
  771. FUNCC(pred8x8_dc)(src, stride);
  772. FUNCC(pred4x4_top_dc)(src, NULL, stride);
  773. }
  774. static void FUNC(pred8x8_mad_cow_dc_l00)(uint8_t *src, int stride){
  775. FUNCC(pred8x8_left_dc)(src, stride);
  776. FUNCC(pred4x4_128_dc)(src + 4*stride , NULL, stride);
  777. FUNCC(pred4x4_128_dc)(src + 4*stride + 4*sizeof(pixel), NULL, stride);
  778. }
  779. static void FUNC(pred8x8_mad_cow_dc_0l0)(uint8_t *src, int stride){
  780. FUNCC(pred8x8_left_dc)(src, stride);
  781. FUNCC(pred4x4_128_dc)(src , NULL, stride);
  782. FUNCC(pred4x4_128_dc)(src + 4*sizeof(pixel), NULL, stride);
  783. }
  784. static void FUNCC(pred8x8_dc_rv40)(uint8_t *p_src, int stride){
  785. int i;
  786. int dc0=0;
  787. pixel4 dc0splat;
  788. pixel *src = (pixel*)p_src;
  789. stride >>= sizeof(pixel)-1;
  790. for(i=0;i<4; i++){
  791. dc0+= src[-1+i*stride] + src[i-stride];
  792. dc0+= src[4+i-stride];
  793. dc0+= src[-1+(i+4)*stride];
  794. }
  795. dc0splat = PIXEL_SPLAT_X4((dc0 + 8)>>4);
  796. for(i=0; i<4; i++){
  797. ((pixel4*)(src+i*stride))[0]= dc0splat;
  798. ((pixel4*)(src+i*stride))[1]= dc0splat;
  799. }
  800. for(i=4; i<8; i++){
  801. ((pixel4*)(src+i*stride))[0]= dc0splat;
  802. ((pixel4*)(src+i*stride))[1]= dc0splat;
  803. }
  804. }
  805. static void FUNCC(pred8x8_plane)(uint8_t *p_src, int p_stride){
  806. int j, k;
  807. int a;
  808. INIT_CLIP
  809. pixel *src = (pixel*)p_src;
  810. int stride = p_stride>>(sizeof(pixel)-1);
  811. const pixel * const src0 = src +3-stride;
  812. const pixel * src1 = src +4*stride-1;
  813. const pixel * src2 = src1-2*stride; // == src+2*stride-1;
  814. int H = src0[1] - src0[-1];
  815. int V = src1[0] - src2[ 0];
  816. for(k=2; k<=4; ++k) {
  817. src1 += stride; src2 -= stride;
  818. H += k*(src0[k] - src0[-k]);
  819. V += k*(src1[0] - src2[ 0]);
  820. }
  821. H = ( 17*H+16 ) >> 5;
  822. V = ( 17*V+16 ) >> 5;
  823. a = 16*(src1[0] + src2[8]+1) - 3*(V+H);
  824. for(j=8; j>0; --j) {
  825. int b = a;
  826. a += V;
  827. src[0] = CLIP((b ) >> 5);
  828. src[1] = CLIP((b+ H) >> 5);
  829. src[2] = CLIP((b+2*H) >> 5);
  830. src[3] = CLIP((b+3*H) >> 5);
  831. src[4] = CLIP((b+4*H) >> 5);
  832. src[5] = CLIP((b+5*H) >> 5);
  833. src[6] = CLIP((b+6*H) >> 5);
  834. src[7] = CLIP((b+7*H) >> 5);
  835. src += stride;
  836. }
  837. }
  838. static void FUNCC(pred8x8_tm_vp8)(uint8_t *p_src, int p_stride){
  839. pixel *src = (pixel*)p_src;
  840. int stride = p_stride>>(sizeof(pixel)-1);
  841. uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
  842. pixel *top = src-stride;
  843. int y;
  844. for (y = 0; y < 8; y++) {
  845. uint8_t *cm_in = cm + src[-1];
  846. src[0] = cm_in[top[0]];
  847. src[1] = cm_in[top[1]];
  848. src[2] = cm_in[top[2]];
  849. src[3] = cm_in[top[3]];
  850. src[4] = cm_in[top[4]];
  851. src[5] = cm_in[top[5]];
  852. src[6] = cm_in[top[6]];
  853. src[7] = cm_in[top[7]];
  854. src += stride;
  855. }
  856. }
  857. #define SRC(x,y) src[(x)+(y)*stride]
  858. #define PL(y) \
  859. const int l##y = (SRC(-1,y-1) + 2*SRC(-1,y) + SRC(-1,y+1) + 2) >> 2;
  860. #define PREDICT_8x8_LOAD_LEFT \
  861. const int l0 = ((has_topleft ? SRC(-1,-1) : SRC(-1,0)) \
  862. + 2*SRC(-1,0) + SRC(-1,1) + 2) >> 2; \
  863. PL(1) PL(2) PL(3) PL(4) PL(5) PL(6) \
  864. const int l7 av_unused = (SRC(-1,6) + 3*SRC(-1,7) + 2) >> 2
  865. #define PT(x) \
  866. const int t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
  867. #define PREDICT_8x8_LOAD_TOP \
  868. const int t0 = ((has_topleft ? SRC(-1,-1) : SRC(0,-1)) \
  869. + 2*SRC(0,-1) + SRC(1,-1) + 2) >> 2; \
  870. PT(1) PT(2) PT(3) PT(4) PT(5) PT(6) \
  871. const int t7 av_unused = ((has_topright ? SRC(8,-1) : SRC(7,-1)) \
  872. + 2*SRC(7,-1) + SRC(6,-1) + 2) >> 2
  873. #define PTR(x) \
  874. t##x = (SRC(x-1,-1) + 2*SRC(x,-1) + SRC(x+1,-1) + 2) >> 2;
  875. #define PREDICT_8x8_LOAD_TOPRIGHT \
  876. int t8, t9, t10, t11, t12, t13, t14, t15; \
  877. if(has_topright) { \
  878. PTR(8) PTR(9) PTR(10) PTR(11) PTR(12) PTR(13) PTR(14) \
  879. t15 = (SRC(14,-1) + 3*SRC(15,-1) + 2) >> 2; \
  880. } else t8=t9=t10=t11=t12=t13=t14=t15= SRC(7,-1);
  881. #define PREDICT_8x8_LOAD_TOPLEFT \
  882. const int lt = (SRC(-1,0) + 2*SRC(-1,-1) + SRC(0,-1) + 2) >> 2
  883. #define PREDICT_8x8_DC(v) \
  884. int y; \
  885. for( y = 0; y < 8; y++ ) { \
  886. ((pixel4*)src)[0] = \
  887. ((pixel4*)src)[1] = v; \
  888. src += stride; \
  889. }
  890. static void FUNCC(pred8x8l_128_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  891. {
  892. pixel *src = (pixel*)p_src;
  893. int stride = p_stride>>(sizeof(pixel)-1);
  894. PREDICT_8x8_DC(PIXEL_SPLAT_X4(1<<(BIT_DEPTH-1)));
  895. }
  896. static void FUNCC(pred8x8l_left_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  897. {
  898. pixel *src = (pixel*)p_src;
  899. int stride = p_stride>>(sizeof(pixel)-1);
  900. PREDICT_8x8_LOAD_LEFT;
  901. const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7+4) >> 3);
  902. PREDICT_8x8_DC(dc);
  903. }
  904. static void FUNCC(pred8x8l_top_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  905. {
  906. pixel *src = (pixel*)p_src;
  907. int stride = p_stride>>(sizeof(pixel)-1);
  908. PREDICT_8x8_LOAD_TOP;
  909. const pixel4 dc = PIXEL_SPLAT_X4((t0+t1+t2+t3+t4+t5+t6+t7+4) >> 3);
  910. PREDICT_8x8_DC(dc);
  911. }
  912. static void FUNCC(pred8x8l_dc)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  913. {
  914. pixel *src = (pixel*)p_src;
  915. int stride = p_stride>>(sizeof(pixel)-1);
  916. PREDICT_8x8_LOAD_LEFT;
  917. PREDICT_8x8_LOAD_TOP;
  918. const pixel4 dc = PIXEL_SPLAT_X4((l0+l1+l2+l3+l4+l5+l6+l7
  919. +t0+t1+t2+t3+t4+t5+t6+t7+8) >> 4);
  920. PREDICT_8x8_DC(dc);
  921. }
  922. static void FUNCC(pred8x8l_horizontal)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  923. {
  924. pixel *src = (pixel*)p_src;
  925. int stride = p_stride>>(sizeof(pixel)-1);
  926. PREDICT_8x8_LOAD_LEFT;
  927. #define ROW(y) ((pixel4*)(src+y*stride))[0] =\
  928. ((pixel4*)(src+y*stride))[1] = PIXEL_SPLAT_X4(l##y)
  929. ROW(0); ROW(1); ROW(2); ROW(3); ROW(4); ROW(5); ROW(6); ROW(7);
  930. #undef ROW
  931. }
  932. static void FUNCC(pred8x8l_vertical)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  933. {
  934. int y;
  935. pixel *src = (pixel*)p_src;
  936. int stride = p_stride>>(sizeof(pixel)-1);
  937. PREDICT_8x8_LOAD_TOP;
  938. src[0] = t0;
  939. src[1] = t1;
  940. src[2] = t2;
  941. src[3] = t3;
  942. src[4] = t4;
  943. src[5] = t5;
  944. src[6] = t6;
  945. src[7] = t7;
  946. for( y = 1; y < 8; y++ ) {
  947. ((pixel4*)(src+y*stride))[0] = ((pixel4*)src)[0];
  948. ((pixel4*)(src+y*stride))[1] = ((pixel4*)src)[1];
  949. }
  950. }
  951. static void FUNCC(pred8x8l_down_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  952. {
  953. pixel *src = (pixel*)p_src;
  954. int stride = p_stride>>(sizeof(pixel)-1);
  955. PREDICT_8x8_LOAD_TOP;
  956. PREDICT_8x8_LOAD_TOPRIGHT;
  957. SRC(0,0)= (t0 + 2*t1 + t2 + 2) >> 2;
  958. SRC(0,1)=SRC(1,0)= (t1 + 2*t2 + t3 + 2) >> 2;
  959. SRC(0,2)=SRC(1,1)=SRC(2,0)= (t2 + 2*t3 + t4 + 2) >> 2;
  960. SRC(0,3)=SRC(1,2)=SRC(2,1)=SRC(3,0)= (t3 + 2*t4 + t5 + 2) >> 2;
  961. SRC(0,4)=SRC(1,3)=SRC(2,2)=SRC(3,1)=SRC(4,0)= (t4 + 2*t5 + t6 + 2) >> 2;
  962. SRC(0,5)=SRC(1,4)=SRC(2,3)=SRC(3,2)=SRC(4,1)=SRC(5,0)= (t5 + 2*t6 + t7 + 2) >> 2;
  963. SRC(0,6)=SRC(1,5)=SRC(2,4)=SRC(3,3)=SRC(4,2)=SRC(5,1)=SRC(6,0)= (t6 + 2*t7 + t8 + 2) >> 2;
  964. SRC(0,7)=SRC(1,6)=SRC(2,5)=SRC(3,4)=SRC(4,3)=SRC(5,2)=SRC(6,1)=SRC(7,0)= (t7 + 2*t8 + t9 + 2) >> 2;
  965. SRC(1,7)=SRC(2,6)=SRC(3,5)=SRC(4,4)=SRC(5,3)=SRC(6,2)=SRC(7,1)= (t8 + 2*t9 + t10 + 2) >> 2;
  966. SRC(2,7)=SRC(3,6)=SRC(4,5)=SRC(5,4)=SRC(6,3)=SRC(7,2)= (t9 + 2*t10 + t11 + 2) >> 2;
  967. SRC(3,7)=SRC(4,6)=SRC(5,5)=SRC(6,4)=SRC(7,3)= (t10 + 2*t11 + t12 + 2) >> 2;
  968. SRC(4,7)=SRC(5,6)=SRC(6,5)=SRC(7,4)= (t11 + 2*t12 + t13 + 2) >> 2;
  969. SRC(5,7)=SRC(6,6)=SRC(7,5)= (t12 + 2*t13 + t14 + 2) >> 2;
  970. SRC(6,7)=SRC(7,6)= (t13 + 2*t14 + t15 + 2) >> 2;
  971. SRC(7,7)= (t14 + 3*t15 + 2) >> 2;
  972. }
  973. static void FUNCC(pred8x8l_down_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  974. {
  975. pixel *src = (pixel*)p_src;
  976. int stride = p_stride>>(sizeof(pixel)-1);
  977. PREDICT_8x8_LOAD_TOP;
  978. PREDICT_8x8_LOAD_LEFT;
  979. PREDICT_8x8_LOAD_TOPLEFT;
  980. SRC(0,7)= (l7 + 2*l6 + l5 + 2) >> 2;
  981. SRC(0,6)=SRC(1,7)= (l6 + 2*l5 + l4 + 2) >> 2;
  982. SRC(0,5)=SRC(1,6)=SRC(2,7)= (l5 + 2*l4 + l3 + 2) >> 2;
  983. SRC(0,4)=SRC(1,5)=SRC(2,6)=SRC(3,7)= (l4 + 2*l3 + l2 + 2) >> 2;
  984. SRC(0,3)=SRC(1,4)=SRC(2,5)=SRC(3,6)=SRC(4,7)= (l3 + 2*l2 + l1 + 2) >> 2;
  985. SRC(0,2)=SRC(1,3)=SRC(2,4)=SRC(3,5)=SRC(4,6)=SRC(5,7)= (l2 + 2*l1 + l0 + 2) >> 2;
  986. SRC(0,1)=SRC(1,2)=SRC(2,3)=SRC(3,4)=SRC(4,5)=SRC(5,6)=SRC(6,7)= (l1 + 2*l0 + lt + 2) >> 2;
  987. SRC(0,0)=SRC(1,1)=SRC(2,2)=SRC(3,3)=SRC(4,4)=SRC(5,5)=SRC(6,6)=SRC(7,7)= (l0 + 2*lt + t0 + 2) >> 2;
  988. SRC(1,0)=SRC(2,1)=SRC(3,2)=SRC(4,3)=SRC(5,4)=SRC(6,5)=SRC(7,6)= (lt + 2*t0 + t1 + 2) >> 2;
  989. SRC(2,0)=SRC(3,1)=SRC(4,2)=SRC(5,3)=SRC(6,4)=SRC(7,5)= (t0 + 2*t1 + t2 + 2) >> 2;
  990. SRC(3,0)=SRC(4,1)=SRC(5,2)=SRC(6,3)=SRC(7,4)= (t1 + 2*t2 + t3 + 2) >> 2;
  991. SRC(4,0)=SRC(5,1)=SRC(6,2)=SRC(7,3)= (t2 + 2*t3 + t4 + 2) >> 2;
  992. SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
  993. SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
  994. SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
  995. }
  996. static void FUNCC(pred8x8l_vertical_right)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  997. {
  998. pixel *src = (pixel*)p_src;
  999. int stride = p_stride>>(sizeof(pixel)-1);
  1000. PREDICT_8x8_LOAD_TOP;
  1001. PREDICT_8x8_LOAD_LEFT;
  1002. PREDICT_8x8_LOAD_TOPLEFT;
  1003. SRC(0,6)= (l5 + 2*l4 + l3 + 2) >> 2;
  1004. SRC(0,7)= (l6 + 2*l5 + l4 + 2) >> 2;
  1005. SRC(0,4)=SRC(1,6)= (l3 + 2*l2 + l1 + 2) >> 2;
  1006. SRC(0,5)=SRC(1,7)= (l4 + 2*l3 + l2 + 2) >> 2;
  1007. SRC(0,2)=SRC(1,4)=SRC(2,6)= (l1 + 2*l0 + lt + 2) >> 2;
  1008. SRC(0,3)=SRC(1,5)=SRC(2,7)= (l2 + 2*l1 + l0 + 2) >> 2;
  1009. SRC(0,1)=SRC(1,3)=SRC(2,5)=SRC(3,7)= (l0 + 2*lt + t0 + 2) >> 2;
  1010. SRC(0,0)=SRC(1,2)=SRC(2,4)=SRC(3,6)= (lt + t0 + 1) >> 1;
  1011. SRC(1,1)=SRC(2,3)=SRC(3,5)=SRC(4,7)= (lt + 2*t0 + t1 + 2) >> 2;
  1012. SRC(1,0)=SRC(2,2)=SRC(3,4)=SRC(4,6)= (t0 + t1 + 1) >> 1;
  1013. SRC(2,1)=SRC(3,3)=SRC(4,5)=SRC(5,7)= (t0 + 2*t1 + t2 + 2) >> 2;
  1014. SRC(2,0)=SRC(3,2)=SRC(4,4)=SRC(5,6)= (t1 + t2 + 1) >> 1;
  1015. SRC(3,1)=SRC(4,3)=SRC(5,5)=SRC(6,7)= (t1 + 2*t2 + t3 + 2) >> 2;
  1016. SRC(3,0)=SRC(4,2)=SRC(5,4)=SRC(6,6)= (t2 + t3 + 1) >> 1;
  1017. SRC(4,1)=SRC(5,3)=SRC(6,5)=SRC(7,7)= (t2 + 2*t3 + t4 + 2) >> 2;
  1018. SRC(4,0)=SRC(5,2)=SRC(6,4)=SRC(7,6)= (t3 + t4 + 1) >> 1;
  1019. SRC(5,1)=SRC(6,3)=SRC(7,5)= (t3 + 2*t4 + t5 + 2) >> 2;
  1020. SRC(5,0)=SRC(6,2)=SRC(7,4)= (t4 + t5 + 1) >> 1;
  1021. SRC(6,1)=SRC(7,3)= (t4 + 2*t5 + t6 + 2) >> 2;
  1022. SRC(6,0)=SRC(7,2)= (t5 + t6 + 1) >> 1;
  1023. SRC(7,1)= (t5 + 2*t6 + t7 + 2) >> 2;
  1024. SRC(7,0)= (t6 + t7 + 1) >> 1;
  1025. }
  1026. static void FUNCC(pred8x8l_horizontal_down)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  1027. {
  1028. pixel *src = (pixel*)p_src;
  1029. int stride = p_stride>>(sizeof(pixel)-1);
  1030. PREDICT_8x8_LOAD_TOP;
  1031. PREDICT_8x8_LOAD_LEFT;
  1032. PREDICT_8x8_LOAD_TOPLEFT;
  1033. SRC(0,7)= (l6 + l7 + 1) >> 1;
  1034. SRC(1,7)= (l5 + 2*l6 + l7 + 2) >> 2;
  1035. SRC(0,6)=SRC(2,7)= (l5 + l6 + 1) >> 1;
  1036. SRC(1,6)=SRC(3,7)= (l4 + 2*l5 + l6 + 2) >> 2;
  1037. SRC(0,5)=SRC(2,6)=SRC(4,7)= (l4 + l5 + 1) >> 1;
  1038. SRC(1,5)=SRC(3,6)=SRC(5,7)= (l3 + 2*l4 + l5 + 2) >> 2;
  1039. SRC(0,4)=SRC(2,5)=SRC(4,6)=SRC(6,7)= (l3 + l4 + 1) >> 1;
  1040. SRC(1,4)=SRC(3,5)=SRC(5,6)=SRC(7,7)= (l2 + 2*l3 + l4 + 2) >> 2;
  1041. SRC(0,3)=SRC(2,4)=SRC(4,5)=SRC(6,6)= (l2 + l3 + 1) >> 1;
  1042. SRC(1,3)=SRC(3,4)=SRC(5,5)=SRC(7,6)= (l1 + 2*l2 + l3 + 2) >> 2;
  1043. SRC(0,2)=SRC(2,3)=SRC(4,4)=SRC(6,5)= (l1 + l2 + 1) >> 1;
  1044. SRC(1,2)=SRC(3,3)=SRC(5,4)=SRC(7,5)= (l0 + 2*l1 + l2 + 2) >> 2;
  1045. SRC(0,1)=SRC(2,2)=SRC(4,3)=SRC(6,4)= (l0 + l1 + 1) >> 1;
  1046. SRC(1,1)=SRC(3,2)=SRC(5,3)=SRC(7,4)= (lt + 2*l0 + l1 + 2) >> 2;
  1047. SRC(0,0)=SRC(2,1)=SRC(4,2)=SRC(6,3)= (lt + l0 + 1) >> 1;
  1048. SRC(1,0)=SRC(3,1)=SRC(5,2)=SRC(7,3)= (l0 + 2*lt + t0 + 2) >> 2;
  1049. SRC(2,0)=SRC(4,1)=SRC(6,2)= (t1 + 2*t0 + lt + 2) >> 2;
  1050. SRC(3,0)=SRC(5,1)=SRC(7,2)= (t2 + 2*t1 + t0 + 2) >> 2;
  1051. SRC(4,0)=SRC(6,1)= (t3 + 2*t2 + t1 + 2) >> 2;
  1052. SRC(5,0)=SRC(7,1)= (t4 + 2*t3 + t2 + 2) >> 2;
  1053. SRC(6,0)= (t5 + 2*t4 + t3 + 2) >> 2;
  1054. SRC(7,0)= (t6 + 2*t5 + t4 + 2) >> 2;
  1055. }
  1056. static void FUNCC(pred8x8l_vertical_left)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  1057. {
  1058. pixel *src = (pixel*)p_src;
  1059. int stride = p_stride>>(sizeof(pixel)-1);
  1060. PREDICT_8x8_LOAD_TOP;
  1061. PREDICT_8x8_LOAD_TOPRIGHT;
  1062. SRC(0,0)= (t0 + t1 + 1) >> 1;
  1063. SRC(0,1)= (t0 + 2*t1 + t2 + 2) >> 2;
  1064. SRC(0,2)=SRC(1,0)= (t1 + t2 + 1) >> 1;
  1065. SRC(0,3)=SRC(1,1)= (t1 + 2*t2 + t3 + 2) >> 2;
  1066. SRC(0,4)=SRC(1,2)=SRC(2,0)= (t2 + t3 + 1) >> 1;
  1067. SRC(0,5)=SRC(1,3)=SRC(2,1)= (t2 + 2*t3 + t4 + 2) >> 2;
  1068. SRC(0,6)=SRC(1,4)=SRC(2,2)=SRC(3,0)= (t3 + t4 + 1) >> 1;
  1069. SRC(0,7)=SRC(1,5)=SRC(2,3)=SRC(3,1)= (t3 + 2*t4 + t5 + 2) >> 2;
  1070. SRC(1,6)=SRC(2,4)=SRC(3,2)=SRC(4,0)= (t4 + t5 + 1) >> 1;
  1071. SRC(1,7)=SRC(2,5)=SRC(3,3)=SRC(4,1)= (t4 + 2*t5 + t6 + 2) >> 2;
  1072. SRC(2,6)=SRC(3,4)=SRC(4,2)=SRC(5,0)= (t5 + t6 + 1) >> 1;
  1073. SRC(2,7)=SRC(3,5)=SRC(4,3)=SRC(5,1)= (t5 + 2*t6 + t7 + 2) >> 2;
  1074. SRC(3,6)=SRC(4,4)=SRC(5,2)=SRC(6,0)= (t6 + t7 + 1) >> 1;
  1075. SRC(3,7)=SRC(4,5)=SRC(5,3)=SRC(6,1)= (t6 + 2*t7 + t8 + 2) >> 2;
  1076. SRC(4,6)=SRC(5,4)=SRC(6,2)=SRC(7,0)= (t7 + t8 + 1) >> 1;
  1077. SRC(4,7)=SRC(5,5)=SRC(6,3)=SRC(7,1)= (t7 + 2*t8 + t9 + 2) >> 2;
  1078. SRC(5,6)=SRC(6,4)=SRC(7,2)= (t8 + t9 + 1) >> 1;
  1079. SRC(5,7)=SRC(6,5)=SRC(7,3)= (t8 + 2*t9 + t10 + 2) >> 2;
  1080. SRC(6,6)=SRC(7,4)= (t9 + t10 + 1) >> 1;
  1081. SRC(6,7)=SRC(7,5)= (t9 + 2*t10 + t11 + 2) >> 2;
  1082. SRC(7,6)= (t10 + t11 + 1) >> 1;
  1083. SRC(7,7)= (t10 + 2*t11 + t12 + 2) >> 2;
  1084. }
  1085. static void FUNCC(pred8x8l_horizontal_up)(uint8_t *p_src, int has_topleft, int has_topright, int p_stride)
  1086. {
  1087. pixel *src = (pixel*)p_src;
  1088. int stride = p_stride>>(sizeof(pixel)-1);
  1089. PREDICT_8x8_LOAD_LEFT;
  1090. SRC(0,0)= (l0 + l1 + 1) >> 1;
  1091. SRC(1,0)= (l0 + 2*l1 + l2 + 2) >> 2;
  1092. SRC(0,1)=SRC(2,0)= (l1 + l2 + 1) >> 1;
  1093. SRC(1,1)=SRC(3,0)= (l1 + 2*l2 + l3 + 2) >> 2;
  1094. SRC(0,2)=SRC(2,1)=SRC(4,0)= (l2 + l3 + 1) >> 1;
  1095. SRC(1,2)=SRC(3,1)=SRC(5,0)= (l2 + 2*l3 + l4 + 2) >> 2;
  1096. SRC(0,3)=SRC(2,2)=SRC(4,1)=SRC(6,0)= (l3 + l4 + 1) >> 1;
  1097. SRC(1,3)=SRC(3,2)=SRC(5,1)=SRC(7,0)= (l3 + 2*l4 + l5 + 2) >> 2;
  1098. SRC(0,4)=SRC(2,3)=SRC(4,2)=SRC(6,1)= (l4 + l5 + 1) >> 1;
  1099. SRC(1,4)=SRC(3,3)=SRC(5,2)=SRC(7,1)= (l4 + 2*l5 + l6 + 2) >> 2;
  1100. SRC(0,5)=SRC(2,4)=SRC(4,3)=SRC(6,2)= (l5 + l6 + 1) >> 1;
  1101. SRC(1,5)=SRC(3,4)=SRC(5,3)=SRC(7,2)= (l5 + 2*l6 + l7 + 2) >> 2;
  1102. SRC(0,6)=SRC(2,5)=SRC(4,4)=SRC(6,3)= (l6 + l7 + 1) >> 1;
  1103. SRC(1,6)=SRC(3,5)=SRC(5,4)=SRC(7,3)= (l6 + 3*l7 + 2) >> 2;
  1104. SRC(0,7)=SRC(1,7)=SRC(2,6)=SRC(2,7)=SRC(3,6)=
  1105. SRC(3,7)=SRC(4,5)=SRC(4,6)=SRC(4,7)=SRC(5,5)=
  1106. SRC(5,6)=SRC(5,7)=SRC(6,4)=SRC(6,5)=SRC(6,6)=
  1107. SRC(6,7)=SRC(7,4)=SRC(7,5)=SRC(7,6)=SRC(7,7)= l7;
  1108. }
  1109. #undef PREDICT_8x8_LOAD_LEFT
  1110. #undef PREDICT_8x8_LOAD_TOP
  1111. #undef PREDICT_8x8_LOAD_TOPLEFT
  1112. #undef PREDICT_8x8_LOAD_TOPRIGHT
  1113. #undef PREDICT_8x8_DC
  1114. #undef PTR
  1115. #undef PT
  1116. #undef PL
  1117. #undef SRC
  1118. static void FUNCC(pred4x4_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
  1119. int i;
  1120. pixel *pix = (pixel*)p_pix;
  1121. const dctcoef *block = (const dctcoef*)p_block;
  1122. stride >>= sizeof(pixel)-1;
  1123. pix -= stride;
  1124. for(i=0; i<4; i++){
  1125. pixel v = pix[0];
  1126. pix[1*stride]= v += block[0];
  1127. pix[2*stride]= v += block[4];
  1128. pix[3*stride]= v += block[8];
  1129. pix[4*stride]= v + block[12];
  1130. pix++;
  1131. block++;
  1132. }
  1133. }
  1134. static void FUNCC(pred4x4_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
  1135. int i;
  1136. pixel *pix = (pixel*)p_pix;
  1137. const dctcoef *block = (const dctcoef*)p_block;
  1138. stride >>= sizeof(pixel)-1;
  1139. for(i=0; i<4; i++){
  1140. pixel v = pix[-1];
  1141. pix[0]= v += block[0];
  1142. pix[1]= v += block[1];
  1143. pix[2]= v += block[2];
  1144. pix[3]= v + block[3];
  1145. pix+= stride;
  1146. block+= 4;
  1147. }
  1148. }
  1149. static void FUNCC(pred8x8l_vertical_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
  1150. int i;
  1151. pixel *pix = (pixel*)p_pix;
  1152. const dctcoef *block = (const dctcoef*)p_block;
  1153. stride >>= sizeof(pixel)-1;
  1154. pix -= stride;
  1155. for(i=0; i<8; i++){
  1156. pixel v = pix[0];
  1157. pix[1*stride]= v += block[0];
  1158. pix[2*stride]= v += block[8];
  1159. pix[3*stride]= v += block[16];
  1160. pix[4*stride]= v += block[24];
  1161. pix[5*stride]= v += block[32];
  1162. pix[6*stride]= v += block[40];
  1163. pix[7*stride]= v += block[48];
  1164. pix[8*stride]= v + block[56];
  1165. pix++;
  1166. block++;
  1167. }
  1168. }
  1169. static void FUNCC(pred8x8l_horizontal_add)(uint8_t *p_pix, const DCTELEM *p_block, int stride){
  1170. int i;
  1171. pixel *pix = (pixel*)p_pix;
  1172. const dctcoef *block = (const dctcoef*)p_block;
  1173. stride >>= sizeof(pixel)-1;
  1174. for(i=0; i<8; i++){
  1175. pixel v = pix[-1];
  1176. pix[0]= v += block[0];
  1177. pix[1]= v += block[1];
  1178. pix[2]= v += block[2];
  1179. pix[3]= v += block[3];
  1180. pix[4]= v += block[4];
  1181. pix[5]= v += block[5];
  1182. pix[6]= v += block[6];
  1183. pix[7]= v + block[7];
  1184. pix+= stride;
  1185. block+= 8;
  1186. }
  1187. }
  1188. static void FUNCC(pred16x16_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
  1189. int i;
  1190. for(i=0; i<16; i++)
  1191. FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
  1192. }
  1193. static void FUNCC(pred16x16_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
  1194. int i;
  1195. for(i=0; i<16; i++)
  1196. FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
  1197. }
  1198. static void FUNCC(pred8x8_vertical_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
  1199. int i;
  1200. for(i=0; i<4; i++)
  1201. FUNCC(pred4x4_vertical_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
  1202. }
  1203. static void FUNCC(pred8x8_horizontal_add)(uint8_t *pix, const int *block_offset, const DCTELEM *block, int stride){
  1204. int i;
  1205. for(i=0; i<4; i++)
  1206. FUNCC(pred4x4_horizontal_add)(pix + block_offset[i], block + i*16*sizeof(pixel), stride);
  1207. }