You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

718 lines
25KB

  1. @
  2. @ ARMv4 optimized DSP utils
  3. @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  4. @
  5. @ This file is part of Libav.
  6. @
  7. @ Libav is free software; you can redistribute it and/or
  8. @ modify it under the terms of the GNU Lesser General Public
  9. @ License as published by the Free Software Foundation; either
  10. @ version 2.1 of the License, or (at your option) any later version.
  11. @
  12. @ Libav is distributed in the hope that it will be useful,
  13. @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. @ Lesser General Public License for more details.
  16. @
  17. @ You should have received a copy of the GNU Lesser General Public
  18. @ License along with Libav; if not, write to the Free Software
  19. @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. @
  21. #include "config.h"
  22. #include "libavutil/arm/asm.S"
  23. #if HAVE_ARMV5TE
  24. function ff_prefetch_arm, export=1
  25. subs r2, r2, #1
  26. pld [r0]
  27. add r0, r0, r1
  28. bne ff_prefetch_arm
  29. bx lr
  30. endfunc
  31. #else
  32. #define pld @
  33. #endif
  34. .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
  35. mov \Rd0, \Rn0, lsr #(\shift * 8)
  36. mov \Rd1, \Rn1, lsr #(\shift * 8)
  37. mov \Rd2, \Rn2, lsr #(\shift * 8)
  38. mov \Rd3, \Rn3, lsr #(\shift * 8)
  39. orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
  40. orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
  41. orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
  42. orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
  43. .endm
  44. .macro ALIGN_DWORD shift, R0, R1, R2
  45. mov \R0, \R0, lsr #(\shift * 8)
  46. orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
  47. mov \R1, \R1, lsr #(\shift * 8)
  48. orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
  49. .endm
  50. .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
  51. mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
  52. mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
  53. orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
  54. orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
  55. .endm
  56. .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  57. @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  58. @ Rmask = 0xFEFEFEFE
  59. @ Rn = destroy
  60. eor \Rd0, \Rn0, \Rm0
  61. eor \Rd1, \Rn1, \Rm1
  62. orr \Rn0, \Rn0, \Rm0
  63. orr \Rn1, \Rn1, \Rm1
  64. and \Rd0, \Rd0, \Rmask
  65. and \Rd1, \Rd1, \Rmask
  66. sub \Rd0, \Rn0, \Rd0, lsr #1
  67. sub \Rd1, \Rn1, \Rd1, lsr #1
  68. .endm
  69. .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  70. @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  71. @ Rmask = 0xFEFEFEFE
  72. @ Rn = destroy
  73. eor \Rd0, \Rn0, \Rm0
  74. eor \Rd1, \Rn1, \Rm1
  75. and \Rn0, \Rn0, \Rm0
  76. and \Rn1, \Rn1, \Rm1
  77. and \Rd0, \Rd0, \Rmask
  78. and \Rd1, \Rd1, \Rmask
  79. add \Rd0, \Rn0, \Rd0, lsr #1
  80. add \Rd1, \Rn1, \Rd1, lsr #1
  81. .endm
  82. .macro JMP_ALIGN tmp, reg
  83. ands \tmp, \reg, #3
  84. bic \reg, \reg, #3
  85. beq 1f
  86. subs \tmp, \tmp, #1
  87. beq 2f
  88. subs \tmp, \tmp, #1
  89. beq 3f
  90. b 4f
  91. .endm
  92. @ ----------------------------------------------------------------
  93. .align 5
  94. function ff_put_pixels16_arm, export=1
  95. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  96. @ block = word aligned, pixles = unaligned
  97. pld [r1]
  98. push {r4-r11, lr}
  99. JMP_ALIGN r5, r1
  100. 1:
  101. ldm r1, {r4-r7}
  102. add r1, r1, r2
  103. stm r0, {r4-r7}
  104. pld [r1]
  105. subs r3, r3, #1
  106. add r0, r0, r2
  107. bne 1b
  108. pop {r4-r11, pc}
  109. .align 5
  110. 2:
  111. ldm r1, {r4-r8}
  112. add r1, r1, r2
  113. ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
  114. pld [r1]
  115. subs r3, r3, #1
  116. stm r0, {r9-r12}
  117. add r0, r0, r2
  118. bne 2b
  119. pop {r4-r11, pc}
  120. .align 5
  121. 3:
  122. ldm r1, {r4-r8}
  123. add r1, r1, r2
  124. ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
  125. pld [r1]
  126. subs r3, r3, #1
  127. stm r0, {r9-r12}
  128. add r0, r0, r2
  129. bne 3b
  130. pop {r4-r11, pc}
  131. .align 5
  132. 4:
  133. ldm r1, {r4-r8}
  134. add r1, r1, r2
  135. ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
  136. pld [r1]
  137. subs r3, r3, #1
  138. stm r0, {r9-r12}
  139. add r0, r0, r2
  140. bne 4b
  141. pop {r4-r11,pc}
  142. endfunc
  143. @ ----------------------------------------------------------------
  144. .align 5
  145. function ff_put_pixels8_arm, export=1
  146. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  147. @ block = word aligned, pixles = unaligned
  148. pld [r1]
  149. push {r4-r5,lr}
  150. JMP_ALIGN r5, r1
  151. 1:
  152. ldm r1, {r4-r5}
  153. add r1, r1, r2
  154. subs r3, r3, #1
  155. pld [r1]
  156. stm r0, {r4-r5}
  157. add r0, r0, r2
  158. bne 1b
  159. pop {r4-r5,pc}
  160. .align 5
  161. 2:
  162. ldm r1, {r4-r5, r12}
  163. add r1, r1, r2
  164. ALIGN_DWORD 1, r4, r5, r12
  165. pld [r1]
  166. subs r3, r3, #1
  167. stm r0, {r4-r5}
  168. add r0, r0, r2
  169. bne 2b
  170. pop {r4-r5,pc}
  171. .align 5
  172. 3:
  173. ldm r1, {r4-r5, r12}
  174. add r1, r1, r2
  175. ALIGN_DWORD 2, r4, r5, r12
  176. pld [r1]
  177. subs r3, r3, #1
  178. stm r0, {r4-r5}
  179. add r0, r0, r2
  180. bne 3b
  181. pop {r4-r5,pc}
  182. .align 5
  183. 4:
  184. ldm r1, {r4-r5, r12}
  185. add r1, r1, r2
  186. ALIGN_DWORD 3, r4, r5, r12
  187. pld [r1]
  188. subs r3, r3, #1
  189. stm r0, {r4-r5}
  190. add r0, r0, r2
  191. bne 4b
  192. pop {r4-r5,pc}
  193. endfunc
  194. @ ----------------------------------------------------------------
  195. .align 5
  196. function ff_put_pixels8_x2_arm, export=1
  197. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  198. @ block = word aligned, pixles = unaligned
  199. pld [r1]
  200. push {r4-r10,lr}
  201. ldr r12, =0xfefefefe
  202. JMP_ALIGN r5, r1
  203. 1:
  204. ldm r1, {r4-r5, r10}
  205. add r1, r1, r2
  206. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  207. pld [r1]
  208. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  209. subs r3, r3, #1
  210. stm r0, {r8-r9}
  211. add r0, r0, r2
  212. bne 1b
  213. pop {r4-r10,pc}
  214. .align 5
  215. 2:
  216. ldm r1, {r4-r5, r10}
  217. add r1, r1, r2
  218. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  219. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  220. pld [r1]
  221. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  222. subs r3, r3, #1
  223. stm r0, {r4-r5}
  224. add r0, r0, r2
  225. bne 2b
  226. pop {r4-r10,pc}
  227. .align 5
  228. 3:
  229. ldm r1, {r4-r5, r10}
  230. add r1, r1, r2
  231. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  232. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  233. pld [r1]
  234. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  235. subs r3, r3, #1
  236. stm r0, {r4-r5}
  237. add r0, r0, r2
  238. bne 3b
  239. pop {r4-r10,pc}
  240. .align 5
  241. 4:
  242. ldm r1, {r4-r5, r10}
  243. add r1, r1, r2
  244. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  245. pld [r1]
  246. RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  247. subs r3, r3, #1
  248. stm r0, {r8-r9}
  249. add r0, r0, r2
  250. bne 4b
  251. pop {r4-r10,pc}
  252. endfunc
  253. .align 5
  254. function ff_put_no_rnd_pixels8_x2_arm, export=1
  255. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  256. @ block = word aligned, pixles = unaligned
  257. pld [r1]
  258. push {r4-r10,lr}
  259. ldr r12, =0xfefefefe
  260. JMP_ALIGN r5, r1
  261. 1:
  262. ldm r1, {r4-r5, r10}
  263. add r1, r1, r2
  264. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  265. pld [r1]
  266. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  267. subs r3, r3, #1
  268. stm r0, {r8-r9}
  269. add r0, r0, r2
  270. bne 1b
  271. pop {r4-r10,pc}
  272. .align 5
  273. 2:
  274. ldm r1, {r4-r5, r10}
  275. add r1, r1, r2
  276. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  277. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  278. pld [r1]
  279. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  280. subs r3, r3, #1
  281. stm r0, {r4-r5}
  282. add r0, r0, r2
  283. bne 2b
  284. pop {r4-r10,pc}
  285. .align 5
  286. 3:
  287. ldm r1, {r4-r5, r10}
  288. add r1, r1, r2
  289. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  290. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  291. pld [r1]
  292. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  293. subs r3, r3, #1
  294. stm r0, {r4-r5}
  295. add r0, r0, r2
  296. bne 3b
  297. pop {r4-r10,pc}
  298. .align 5
  299. 4:
  300. ldm r1, {r4-r5, r10}
  301. add r1, r1, r2
  302. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  303. pld [r1]
  304. NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  305. subs r3, r3, #1
  306. stm r0, {r8-r9}
  307. add r0, r0, r2
  308. bne 4b
  309. pop {r4-r10,pc}
  310. endfunc
  311. @ ----------------------------------------------------------------
  312. .align 5
  313. function ff_put_pixels8_y2_arm, export=1
  314. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  315. @ block = word aligned, pixles = unaligned
  316. pld [r1]
  317. push {r4-r11,lr}
  318. mov r3, r3, lsr #1
  319. ldr r12, =0xfefefefe
  320. JMP_ALIGN r5, r1
  321. 1:
  322. ldm r1, {r4-r5}
  323. add r1, r1, r2
  324. 6: ldm r1, {r6-r7}
  325. add r1, r1, r2
  326. pld [r1]
  327. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  328. ldm r1, {r4-r5}
  329. add r1, r1, r2
  330. stm r0, {r8-r9}
  331. add r0, r0, r2
  332. pld [r1]
  333. RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  334. subs r3, r3, #1
  335. stm r0, {r8-r9}
  336. add r0, r0, r2
  337. bne 6b
  338. pop {r4-r11,pc}
  339. .align 5
  340. 2:
  341. ldm r1, {r4-r6}
  342. add r1, r1, r2
  343. pld [r1]
  344. ALIGN_DWORD 1, r4, r5, r6
  345. 6: ldm r1, {r7-r9}
  346. add r1, r1, r2
  347. pld [r1]
  348. ALIGN_DWORD 1, r7, r8, r9
  349. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  350. stm r0, {r10-r11}
  351. add r0, r0, r2
  352. ldm r1, {r4-r6}
  353. add r1, r1, r2
  354. pld [r1]
  355. ALIGN_DWORD 1, r4, r5, r6
  356. subs r3, r3, #1
  357. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  358. stm r0, {r10-r11}
  359. add r0, r0, r2
  360. bne 6b
  361. pop {r4-r11,pc}
  362. .align 5
  363. 3:
  364. ldm r1, {r4-r6}
  365. add r1, r1, r2
  366. pld [r1]
  367. ALIGN_DWORD 2, r4, r5, r6
  368. 6: ldm r1, {r7-r9}
  369. add r1, r1, r2
  370. pld [r1]
  371. ALIGN_DWORD 2, r7, r8, r9
  372. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  373. stm r0, {r10-r11}
  374. add r0, r0, r2
  375. ldm r1, {r4-r6}
  376. add r1, r1, r2
  377. pld [r1]
  378. ALIGN_DWORD 2, r4, r5, r6
  379. subs r3, r3, #1
  380. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  381. stm r0, {r10-r11}
  382. add r0, r0, r2
  383. bne 6b
  384. pop {r4-r11,pc}
  385. .align 5
  386. 4:
  387. ldm r1, {r4-r6}
  388. add r1, r1, r2
  389. pld [r1]
  390. ALIGN_DWORD 3, r4, r5, r6
  391. 6: ldm r1, {r7-r9}
  392. add r1, r1, r2
  393. pld [r1]
  394. ALIGN_DWORD 3, r7, r8, r9
  395. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  396. stm r0, {r10-r11}
  397. add r0, r0, r2
  398. ldm r1, {r4-r6}
  399. add r1, r1, r2
  400. pld [r1]
  401. ALIGN_DWORD 3, r4, r5, r6
  402. subs r3, r3, #1
  403. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  404. stm r0, {r10-r11}
  405. add r0, r0, r2
  406. bne 6b
  407. pop {r4-r11,pc}
  408. endfunc
  409. .align 5
  410. function ff_put_no_rnd_pixels8_y2_arm, export=1
  411. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  412. @ block = word aligned, pixles = unaligned
  413. pld [r1]
  414. push {r4-r11,lr}
  415. mov r3, r3, lsr #1
  416. ldr r12, =0xfefefefe
  417. JMP_ALIGN r5, r1
  418. 1:
  419. ldm r1, {r4-r5}
  420. add r1, r1, r2
  421. 6: ldm r1, {r6-r7}
  422. add r1, r1, r2
  423. pld [r1]
  424. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  425. ldm r1, {r4-r5}
  426. add r1, r1, r2
  427. stm r0, {r8-r9}
  428. add r0, r0, r2
  429. pld [r1]
  430. NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  431. subs r3, r3, #1
  432. stm r0, {r8-r9}
  433. add r0, r0, r2
  434. bne 6b
  435. pop {r4-r11,pc}
  436. .align 5
  437. 2:
  438. ldm r1, {r4-r6}
  439. add r1, r1, r2
  440. pld [r1]
  441. ALIGN_DWORD 1, r4, r5, r6
  442. 6: ldm r1, {r7-r9}
  443. add r1, r1, r2
  444. pld [r1]
  445. ALIGN_DWORD 1, r7, r8, r9
  446. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  447. stm r0, {r10-r11}
  448. add r0, r0, r2
  449. ldm r1, {r4-r6}
  450. add r1, r1, r2
  451. pld [r1]
  452. ALIGN_DWORD 1, r4, r5, r6
  453. subs r3, r3, #1
  454. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  455. stm r0, {r10-r11}
  456. add r0, r0, r2
  457. bne 6b
  458. pop {r4-r11,pc}
  459. .align 5
  460. 3:
  461. ldm r1, {r4-r6}
  462. add r1, r1, r2
  463. pld [r1]
  464. ALIGN_DWORD 2, r4, r5, r6
  465. 6: ldm r1, {r7-r9}
  466. add r1, r1, r2
  467. pld [r1]
  468. ALIGN_DWORD 2, r7, r8, r9
  469. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  470. stm r0, {r10-r11}
  471. add r0, r0, r2
  472. ldm r1, {r4-r6}
  473. add r1, r1, r2
  474. pld [r1]
  475. ALIGN_DWORD 2, r4, r5, r6
  476. subs r3, r3, #1
  477. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  478. stm r0, {r10-r11}
  479. add r0, r0, r2
  480. bne 6b
  481. pop {r4-r11,pc}
  482. .align 5
  483. 4:
  484. ldm r1, {r4-r6}
  485. add r1, r1, r2
  486. pld [r1]
  487. ALIGN_DWORD 3, r4, r5, r6
  488. 6: ldm r1, {r7-r9}
  489. add r1, r1, r2
  490. pld [r1]
  491. ALIGN_DWORD 3, r7, r8, r9
  492. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  493. stm r0, {r10-r11}
  494. add r0, r0, r2
  495. ldm r1, {r4-r6}
  496. add r1, r1, r2
  497. pld [r1]
  498. ALIGN_DWORD 3, r4, r5, r6
  499. subs r3, r3, #1
  500. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  501. stm r0, {r10-r11}
  502. add r0, r0, r2
  503. bne 6b
  504. pop {r4-r11,pc}
  505. endfunc
  506. .ltorg
  507. @ ----------------------------------------------------------------
  508. .macro RND_XY2_IT align, rnd
  509. @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
  510. @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
  511. .if \align == 0
  512. ldm r1, {r6-r8}
  513. .elseif \align == 3
  514. ldm r1, {r5-r7}
  515. .else
  516. ldm r1, {r8-r10}
  517. .endif
  518. add r1, r1, r2
  519. pld [r1]
  520. .if \align == 0
  521. ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
  522. .elseif \align == 1
  523. ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
  524. ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
  525. .elseif \align == 2
  526. ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
  527. ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
  528. .elseif \align == 3
  529. ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
  530. .endif
  531. ldr r14, =0x03030303
  532. tst r3, #1
  533. and r8, r4, r14
  534. and r9, r5, r14
  535. and r10, r6, r14
  536. and r11, r7, r14
  537. it eq
  538. andeq r14, r14, r14, \rnd #1
  539. add r8, r8, r10
  540. add r9, r9, r11
  541. ldr r12, =0xfcfcfcfc >> 2
  542. itt eq
  543. addeq r8, r8, r14
  544. addeq r9, r9, r14
  545. and r4, r12, r4, lsr #2
  546. and r5, r12, r5, lsr #2
  547. and r6, r12, r6, lsr #2
  548. and r7, r12, r7, lsr #2
  549. add r10, r4, r6
  550. add r11, r5, r7
  551. subs r3, r3, #1
  552. .endm
  553. .macro RND_XY2_EXPAND align, rnd
  554. RND_XY2_IT \align, \rnd
  555. 6: push {r8-r11}
  556. RND_XY2_IT \align, \rnd
  557. pop {r4-r7}
  558. add r4, r4, r8
  559. add r5, r5, r9
  560. ldr r14, =0x0f0f0f0f
  561. add r6, r6, r10
  562. add r7, r7, r11
  563. and r4, r14, r4, lsr #2
  564. and r5, r14, r5, lsr #2
  565. add r4, r4, r6
  566. add r5, r5, r7
  567. stm r0, {r4-r5}
  568. add r0, r0, r2
  569. bge 6b
  570. pop {r4-r11,pc}
  571. .endm
  572. .align 5
  573. function ff_put_pixels8_xy2_arm, export=1
  574. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  575. @ block = word aligned, pixles = unaligned
  576. pld [r1]
  577. push {r4-r11,lr} @ R14 is also called LR
  578. JMP_ALIGN r5, r1
  579. 1: RND_XY2_EXPAND 0, lsl
  580. .align 5
  581. 2: RND_XY2_EXPAND 1, lsl
  582. .align 5
  583. 3: RND_XY2_EXPAND 2, lsl
  584. .align 5
  585. 4: RND_XY2_EXPAND 3, lsl
  586. endfunc
  587. .align 5
  588. function ff_put_no_rnd_pixels8_xy2_arm, export=1
  589. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  590. @ block = word aligned, pixles = unaligned
  591. pld [r1]
  592. push {r4-r11,lr}
  593. JMP_ALIGN r5, r1
  594. 1: RND_XY2_EXPAND 0, lsr
  595. .align 5
  596. 2: RND_XY2_EXPAND 1, lsr
  597. .align 5
  598. 3: RND_XY2_EXPAND 2, lsr
  599. .align 5
  600. 4: RND_XY2_EXPAND 3, lsr
  601. endfunc
  602. .align 5
  603. @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
  604. function ff_add_pixels_clamped_arm, export=1
  605. push {r4-r10}
  606. mov r10, #8
  607. 1:
  608. ldr r4, [r1] /* load dest */
  609. /* block[0] and block[1]*/
  610. ldrsh r5, [r0]
  611. ldrsh r7, [r0, #2]
  612. and r6, r4, #0xFF
  613. and r8, r4, #0xFF00
  614. add r6, r6, r5
  615. add r8, r7, r8, lsr #8
  616. mvn r5, r5
  617. mvn r7, r7
  618. tst r6, #0x100
  619. it ne
  620. movne r6, r5, lsr #24
  621. tst r8, #0x100
  622. it ne
  623. movne r8, r7, lsr #24
  624. mov r9, r6
  625. ldrsh r5, [r0, #4] /* moved form [A] */
  626. orr r9, r9, r8, lsl #8
  627. /* block[2] and block[3] */
  628. /* [A] */
  629. ldrsh r7, [r0, #6]
  630. and r6, r4, #0xFF0000
  631. and r8, r4, #0xFF000000
  632. add r6, r5, r6, lsr #16
  633. add r8, r7, r8, lsr #24
  634. mvn r5, r5
  635. mvn r7, r7
  636. tst r6, #0x100
  637. it ne
  638. movne r6, r5, lsr #24
  639. tst r8, #0x100
  640. it ne
  641. movne r8, r7, lsr #24
  642. orr r9, r9, r6, lsl #16
  643. ldr r4, [r1, #4] /* moved form [B] */
  644. orr r9, r9, r8, lsl #24
  645. /* store dest */
  646. ldrsh r5, [r0, #8] /* moved form [C] */
  647. str r9, [r1]
  648. /* load dest */
  649. /* [B] */
  650. /* block[4] and block[5] */
  651. /* [C] */
  652. ldrsh r7, [r0, #10]
  653. and r6, r4, #0xFF
  654. and r8, r4, #0xFF00
  655. add r6, r6, r5
  656. add r8, r7, r8, lsr #8
  657. mvn r5, r5
  658. mvn r7, r7
  659. tst r6, #0x100
  660. it ne
  661. movne r6, r5, lsr #24
  662. tst r8, #0x100
  663. it ne
  664. movne r8, r7, lsr #24
  665. mov r9, r6
  666. ldrsh r5, [r0, #12] /* moved from [D] */
  667. orr r9, r9, r8, lsl #8
  668. /* block[6] and block[7] */
  669. /* [D] */
  670. ldrsh r7, [r0, #14]
  671. and r6, r4, #0xFF0000
  672. and r8, r4, #0xFF000000
  673. add r6, r5, r6, lsr #16
  674. add r8, r7, r8, lsr #24
  675. mvn r5, r5
  676. mvn r7, r7
  677. tst r6, #0x100
  678. it ne
  679. movne r6, r5, lsr #24
  680. tst r8, #0x100
  681. it ne
  682. movne r8, r7, lsr #24
  683. orr r9, r9, r6, lsl #16
  684. add r0, r0, #16 /* moved from [E] */
  685. orr r9, r9, r8, lsl #24
  686. subs r10, r10, #1 /* moved from [F] */
  687. /* store dest */
  688. str r9, [r1, #4]
  689. /* [E] */
  690. /* [F] */
  691. add r1, r1, r2
  692. bne 1b
  693. pop {r4-r10}
  694. bx lr
  695. endfunc