You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

381 lines
13KB

  1. /*
  2. * Blackfin Pixel Operations
  3. * Copyright (C) 2007 Marc Hoffman <marc.hoffman@analog.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "config_bfin.h"
  22. /*
  23. motion compensation
  24. primitives
  25. * Halfpel motion compensation with rounding (a+b+1)>>1.
  26. * This is an array[4][4] of motion compensation funcions for 4
  27. * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
  28. * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
  29. * @param block destination where the result is stored
  30. * @param pixels source
  31. * @param line_size number of bytes in a horizontal line of block
  32. * @param h height
  33. */
  34. DEFUN(put_pixels8uc,mL1,
  35. (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
  36. int dest_size, int line_size, int h)):
  37. i3=r0; // dest
  38. i0=r1; // src0
  39. i1=r2; // src1
  40. r0=[sp+12]; // dest_size
  41. r2=[sp+16]; // line_size
  42. p0=[sp+20]; // h
  43. [--sp] = (r7:6);
  44. r0+=-4;
  45. m3=r0;
  46. r2+=-8;
  47. m0=r2;
  48. LSETUP(pp8$0,pp8$1) LC0=P0;
  49. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  50. pp8$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  51. R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0]|| R2 =[I1++M0];
  52. R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R6 ;
  53. pp8$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7;
  54. (r7:6) = [sp++];
  55. RTS;
  56. DEFUN_END(put_pixels8uc)
  57. DEFUN(put_pixels16uc,mL1,
  58. (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
  59. int dest_size, int line_size, int h)):
  60. link 0;
  61. [--sp] = (r7:6);
  62. i3=r0; // dest
  63. i0=r1; // src0
  64. i1=r2; // src1
  65. r0=[fp+20]; // dest_size
  66. r2=[fp+24]; // line_size
  67. p0=[fp+28]; // h
  68. r0+=-12;
  69. m3=r0; // line_size
  70. r2+=-16;
  71. m0=r2;
  72. LSETUP(pp16$0,pp16$1) LC0=P0;
  73. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  74. pp16$0: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  75. R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++] || R2 =[I1++];
  76. R7 = BYTEOP1P(R1:0,R3:2)(R) || R1 = [I0++] || R3 =[I1++];
  77. [I3++] = R6;
  78. R6 = BYTEOP1P(R1:0,R3:2) || R0 = [I0++M0] || R2 =[I1++M0];
  79. R7 = BYTEOP1P(R1:0,R3:2)(R) || R0 = [I0++] || [I3++] = R7 ;
  80. [I3++] = R6;
  81. pp16$1: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7;
  82. (r7:6) = [sp++];
  83. unlink;
  84. RTS;
  85. DEFUN_END(put_pixels16uc)
  86. DEFUN(put_pixels8uc_nornd,mL1,
  87. (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
  88. int line_size, int h)):
  89. i3=r0; // dest
  90. i0=r1; // src0
  91. i1=r2; // src1
  92. r2=[sp+12]; // line_size
  93. p0=[sp+16]; // h
  94. [--sp] = (r7:6);
  95. r2+=-4;
  96. m3=r2;
  97. r2+=-4;
  98. m0=r2;
  99. LSETUP(pp8$2,pp8$3) LC0=P0;
  100. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  101. pp8$2: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  102. R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0]|| R2 =[I1++M0];
  103. R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R6 ;
  104. pp8$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7;
  105. (r7:6) = [sp++];
  106. RTS;
  107. DEFUN_END(put_pixels8uc_nornd)
  108. DEFUN(put_pixels16uc_nornd,mL1,
  109. (uint8_t *block, const uint8_t *s0, const uint8_t *s1,
  110. int line_size, int h)):
  111. i3=r0; // dest
  112. i0=r1; // src0
  113. i1=r2; // src1
  114. r2=[sp+12]; // line_size
  115. p0=[sp+16]; // h
  116. [--sp] = (r7:6);
  117. r2+=-12;
  118. m3=r2; // line_size
  119. r2+=-4;
  120. m0=r2;
  121. LSETUP(pp16$2,pp16$3) LC0=P0;
  122. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  123. pp16$2:
  124. DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  125. R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++] || R2 =[I1++];
  126. R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R1 = [I0++] || R3 =[I1++];
  127. [I3++] = R6;
  128. R6 = BYTEOP1P(R1:0,R3:2)(T) || R0 = [I0++M0] || R2 =[I1++M0];
  129. R7 = BYTEOP1P(R1:0,R3:2)(T,R) || R0 = [I0++] || [I3++] = R7 ;
  130. [I3++] = R6;
  131. pp16$3: DISALGNEXCPT || R2 = [I1++] || [I3++M3] = R7;
  132. (r7:6) = [sp++];
  133. RTS;
  134. DEFUN_END(put_pixels16uc_nornd)
  135. DEFUN(z_put_pixels16_xy2,mL1,
  136. (uint8_t *block, const uint8_t *s0,
  137. int dest_size, int line_size, int h)):
  138. link 0;
  139. [--sp] = (r7:4);
  140. i3=r0; // dest
  141. i0=r1; // src0--> pixels
  142. i1=r1; // src1--> pixels + line_size
  143. r2+=-12;
  144. m2=r2; // m2=dest_width-4
  145. r2=[fp+20];
  146. m3=r2; // line_size
  147. p0=[fp+24]; // h
  148. r2+=-16;
  149. i1+=m3; /* src1 + line_size */
  150. m0=r2; /* line-size - 20 */
  151. B0 = I0;
  152. B1 = I1;
  153. B3 = I3;
  154. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  155. LSETUP(LS$16E,LE$16E) LC0=P0;
  156. LS$16E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  157. R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++] || R2 =[I1++];
  158. R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R1 = [I0++] || [I3++] = R4 ;
  159. DISALGNEXCPT || R3 = [I1++] || [I3++] = R5;
  160. R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0]|| R2 = [I1++M0];
  161. R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ;
  162. LE$16E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  163. M1 = 1;
  164. I3 = B3;
  165. I1 = B1;
  166. I0 = B0;
  167. I0 += M1;
  168. I1 += M1;
  169. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  170. LSETUP(LS$16O,LE$16O) LC0=P0;
  171. LS$16O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  172. R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++] || R2 =[I1++];
  173. R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R1 = [I0++] || R6 =[I3++];
  174. R4 = R4 +|+ R6 || R7 = [I3--];
  175. R5 = R5 +|+ R7 || [I3++] = R4;
  176. DISALGNEXCPT || R3 =[I1++] || [I3++] = R5;
  177. R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0]|| R2 = [I1++M0];
  178. R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 = [I3++];
  179. R4 = R4 +|+ R6 || R7 = [I3--];
  180. R5 = R5 +|+ R7 || [I3++] = R4;
  181. LE$16O: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  182. (r7:4) = [sp++];
  183. unlink;
  184. rts;
  185. DEFUN_END(z_put_pixels16_xy2)
  186. DEFUN(put_pixels16_xy2_nornd,mL1,
  187. (uint8_t *block, const uint8_t *s0,
  188. int line_size, int h)):
  189. link 0;
  190. [--sp] = (r7:4);
  191. i3=r0; // dest
  192. i0=r1; // src0--> pixels
  193. i1=r1; // src1--> pixels + line_size
  194. m3=r2;
  195. r2+=-12;
  196. m2=r2;
  197. r2+=-4;
  198. i1+=m3; /* src1 + line_size */
  199. m0=r2; /* line-size - 20 */
  200. p0=[fp+20]; // h
  201. B0=I0;
  202. B1=I1;
  203. B3=I3;
  204. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  205. LSETUP(LS$16ET,LE$16ET) LC0=P0;
  206. LS$16ET:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  207. R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++] || R2 =[I1++];
  208. R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R1 = [I0++] || [I3++] = R4 ;
  209. DISALGNEXCPT || R3 = [I1++] || [I3++] = R5;
  210. R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0]|| R2 = [I1++M0];
  211. R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ;
  212. LE$16ET:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  213. M1 = 1;
  214. I3=B3;
  215. I1=B1;
  216. I0=B0;
  217. I0 += M1;
  218. I1 += M1;
  219. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  220. LSETUP(LS$16OT,LE$16OT) LC0=P0;
  221. LS$16OT:DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  222. R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++] || R2 =[I1++];
  223. R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R1 = [I0++] || R6 =[I3++];
  224. R4 = R4 +|+ R6 || R7 = [I3--];
  225. R5 = R5 +|+ R7 || [I3++] = R4;
  226. DISALGNEXCPT || R3 =[I1++] || [I3++] = R5;
  227. R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0]|| R2 = [I1++M0];
  228. R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++];
  229. R4 = R4 +|+ R6 || R7 = [I3--];
  230. R5 = R5 +|+ R7 || [I3++] = R4;
  231. LE$16OT:DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  232. (r7:4) = [sp++];
  233. unlink;
  234. rts;
  235. DEFUN_END(put_pixels16_xy2_nornd)
  236. DEFUN(z_put_pixels8_xy2,mL1,
  237. (uint8_t *block, const uint8_t *s0,
  238. int dest_size, int line_size, int h)):
  239. link 0;
  240. [--sp] = (r7:4);
  241. i3=r0; // dest
  242. i0=r1; // src0--> pixels
  243. i1=r1; // src1--> pixels + line_size
  244. r2+=-4;
  245. m2=r2; // m2=dest_width-4
  246. r2=[fp+20];
  247. m3=r2; // line_size
  248. p0=[fp+24]; // h
  249. r2+=-8;
  250. i1+=m3; /* src1 + line_size */
  251. m0=r2; /* line-size - 20 */
  252. b0 = I0;
  253. b1 = I1;
  254. b3 = I3;
  255. LSETUP(LS$8E,LE$8E) LC0=P0;
  256. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  257. LS$8E: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  258. R4 = BYTEOP2P (R3:2,R1:0) (RNDL) || R0 = [I0++M0] || R2 =[I1++M0];
  259. R5 = BYTEOP2P (R3:2,R1:0) (RNDL,R) || R0 = [I0++] || [I3++] = R4 ;
  260. LE$8E: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  261. M1 = 1;
  262. I3 = b3;
  263. I1 = b1;
  264. I0 = b0;
  265. I0 += M1;
  266. I1 += M1;
  267. LSETUP(LS$8O,LE$8O) LC0=P0;
  268. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  269. LS$8O: DISALGNEXCPT || R1 = [I0++] || R3 =[I1++];
  270. R4 = BYTEOP2P (R3:2,R1:0) (RNDH) || R0 = [I0++M0] || R2 =[I1++M0];
  271. R5 = BYTEOP2P (R3:2,R1:0) (RNDH,R) || R0 = [I0++] || R6 =[I3++];
  272. R4 = R4 +|+ R6 || R7 = [I3--];
  273. R5 = R5 +|+ R7 || [I3++] = R4;
  274. LE$8O: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5;
  275. (r7:4) = [sp++];
  276. unlink;
  277. rts;
  278. DEFUN_END(z_put_pixels8_xy2)
  279. DEFUN(put_pixels8_xy2_nornd,mL1,
  280. (uint8_t *block, const uint8_t *s0, int line_size, int h)):
  281. link 0;
  282. [--sp] = (r7:4);
  283. i3=r0; // dest
  284. i0=r1; // src0--> pixels
  285. i1=r1; // src1--> pixels + line_size
  286. m3=r2;
  287. r2+=-4;
  288. m2=r2;
  289. r2+=-4;
  290. i1+=m3; /* src1 + line_size */
  291. m0=r2; /* line-size - 20 */
  292. p0=[fp+20]; // h
  293. b0 = I0;
  294. b1 = I1;
  295. b3 = I3;
  296. LSETUP(LS$8ET,LE$8ET) LC0=P0;
  297. DISALGNEXCPT || R0 = [I0++] || R2 =[I1++];
  298. LS$8ET: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
  299. R4 = BYTEOP2P (R3:2,R1:0) (TL) || R0 = [I0++M0] || R2 = [I1++M0];
  300. R5 = BYTEOP2P (R3:2,R1:0) (TL,R) || R0 = [I0++] || [I3++] = R4 ;
  301. LE$8ET: DISALGNEXCPT || R2 = [I1++] || [I3++M2] = R5;
  302. M1 = 1;
  303. I3 = b3;
  304. I1 = b1;
  305. I0 = b0;
  306. I0 += M1;
  307. I1 += M1;
  308. LSETUP(LS$8OT,LE$8OT) LC0=P0;
  309. DISALGNEXCPT || R0 = [I0++] || R2 = [I1++];
  310. LS$8OT: DISALGNEXCPT || R1 = [I0++] || R3 = [I1++];
  311. R4 = BYTEOP2P (R3:2,R1:0) (TH) || R0 = [I0++M0] || R2 = [I1++M0];
  312. R5 = BYTEOP2P (R3:2,R1:0) (TH,R) || R0 = [I0++] || R6 = [I3++];
  313. R4 = R4 +|+ R6 || R7 = [I3--];
  314. R5 = R5 +|+ R7 || [I3++] = R4;
  315. LE$8OT: DISALGNEXCPT || R2 =[I1++] || [I3++M2] = R5;
  316. (r7:4) = [sp++];
  317. unlink;
  318. rts;
  319. DEFUN_END(put_pixels8_xy2_nornd)