You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

720 lines
25KB

  1. @
  2. @ ARMv4 optimized DSP utils
  3. @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  4. @
  5. @ This file is part of Libav.
  6. @
  7. @ Libav is free software; you can redistribute it and/or
  8. @ modify it under the terms of the GNU Lesser General Public
  9. @ License as published by the Free Software Foundation; either
  10. @ version 2.1 of the License, or (at your option) any later version.
  11. @
  12. @ Libav is distributed in the hope that it will be useful,
  13. @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. @ Lesser General Public License for more details.
  16. @
  17. @ You should have received a copy of the GNU Lesser General Public
  18. @ License along with Libav; if not, write to the Free Software
  19. @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. @
  21. #include "config.h"
  22. #include "libavutil/arm/asm.S"
  23. preserve8
  24. #if HAVE_ARMV5TE
  25. function ff_prefetch_arm, export=1
  26. subs r2, r2, #1
  27. pld [r0]
  28. add r0, r0, r1
  29. bne ff_prefetch_arm
  30. bx lr
  31. endfunc
  32. #else
  33. #define pld @
  34. #endif
  35. .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
  36. mov \Rd0, \Rn0, lsr #(\shift * 8)
  37. mov \Rd1, \Rn1, lsr #(\shift * 8)
  38. mov \Rd2, \Rn2, lsr #(\shift * 8)
  39. mov \Rd3, \Rn3, lsr #(\shift * 8)
  40. orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
  41. orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
  42. orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
  43. orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
  44. .endm
  45. .macro ALIGN_DWORD shift, R0, R1, R2
  46. mov \R0, \R0, lsr #(\shift * 8)
  47. orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
  48. mov \R1, \R1, lsr #(\shift * 8)
  49. orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
  50. .endm
  51. .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
  52. mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
  53. mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
  54. orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
  55. orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
  56. .endm
  57. .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  58. @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  59. @ Rmask = 0xFEFEFEFE
  60. @ Rn = destroy
  61. eor \Rd0, \Rn0, \Rm0
  62. eor \Rd1, \Rn1, \Rm1
  63. orr \Rn0, \Rn0, \Rm0
  64. orr \Rn1, \Rn1, \Rm1
  65. and \Rd0, \Rd0, \Rmask
  66. and \Rd1, \Rd1, \Rmask
  67. sub \Rd0, \Rn0, \Rd0, lsr #1
  68. sub \Rd1, \Rn1, \Rd1, lsr #1
  69. .endm
  70. .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  71. @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  72. @ Rmask = 0xFEFEFEFE
  73. @ Rn = destroy
  74. eor \Rd0, \Rn0, \Rm0
  75. eor \Rd1, \Rn1, \Rm1
  76. and \Rn0, \Rn0, \Rm0
  77. and \Rn1, \Rn1, \Rm1
  78. and \Rd0, \Rd0, \Rmask
  79. and \Rd1, \Rd1, \Rmask
  80. add \Rd0, \Rn0, \Rd0, lsr #1
  81. add \Rd1, \Rn1, \Rd1, lsr #1
  82. .endm
  83. .macro JMP_ALIGN tmp, reg
  84. ands \tmp, \reg, #3
  85. bic \reg, \reg, #3
  86. beq 1f
  87. subs \tmp, \tmp, #1
  88. beq 2f
  89. subs \tmp, \tmp, #1
  90. beq 3f
  91. b 4f
  92. .endm
  93. @ ----------------------------------------------------------------
  94. .align 5
  95. function ff_put_pixels16_arm, export=1
  96. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  97. @ block = word aligned, pixles = unaligned
  98. pld [r1]
  99. push {r4-r11, lr}
  100. JMP_ALIGN r5, r1
  101. 1:
  102. ldm r1, {r4-r7}
  103. add r1, r1, r2
  104. stm r0, {r4-r7}
  105. pld [r1]
  106. subs r3, r3, #1
  107. add r0, r0, r2
  108. bne 1b
  109. pop {r4-r11, pc}
  110. .align 5
  111. 2:
  112. ldm r1, {r4-r8}
  113. add r1, r1, r2
  114. ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
  115. pld [r1]
  116. subs r3, r3, #1
  117. stm r0, {r9-r12}
  118. add r0, r0, r2
  119. bne 2b
  120. pop {r4-r11, pc}
  121. .align 5
  122. 3:
  123. ldm r1, {r4-r8}
  124. add r1, r1, r2
  125. ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
  126. pld [r1]
  127. subs r3, r3, #1
  128. stm r0, {r9-r12}
  129. add r0, r0, r2
  130. bne 3b
  131. pop {r4-r11, pc}
  132. .align 5
  133. 4:
  134. ldm r1, {r4-r8}
  135. add r1, r1, r2
  136. ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
  137. pld [r1]
  138. subs r3, r3, #1
  139. stm r0, {r9-r12}
  140. add r0, r0, r2
  141. bne 4b
  142. pop {r4-r11,pc}
  143. endfunc
  144. @ ----------------------------------------------------------------
  145. .align 5
  146. function ff_put_pixels8_arm, export=1
  147. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  148. @ block = word aligned, pixles = unaligned
  149. pld [r1]
  150. push {r4-r5,lr}
  151. JMP_ALIGN r5, r1
  152. 1:
  153. ldm r1, {r4-r5}
  154. add r1, r1, r2
  155. subs r3, r3, #1
  156. pld [r1]
  157. stm r0, {r4-r5}
  158. add r0, r0, r2
  159. bne 1b
  160. pop {r4-r5,pc}
  161. .align 5
  162. 2:
  163. ldm r1, {r4-r5, r12}
  164. add r1, r1, r2
  165. ALIGN_DWORD 1, r4, r5, r12
  166. pld [r1]
  167. subs r3, r3, #1
  168. stm r0, {r4-r5}
  169. add r0, r0, r2
  170. bne 2b
  171. pop {r4-r5,pc}
  172. .align 5
  173. 3:
  174. ldm r1, {r4-r5, r12}
  175. add r1, r1, r2
  176. ALIGN_DWORD 2, r4, r5, r12
  177. pld [r1]
  178. subs r3, r3, #1
  179. stm r0, {r4-r5}
  180. add r0, r0, r2
  181. bne 3b
  182. pop {r4-r5,pc}
  183. .align 5
  184. 4:
  185. ldm r1, {r4-r5, r12}
  186. add r1, r1, r2
  187. ALIGN_DWORD 3, r4, r5, r12
  188. pld [r1]
  189. subs r3, r3, #1
  190. stm r0, {r4-r5}
  191. add r0, r0, r2
  192. bne 4b
  193. pop {r4-r5,pc}
  194. endfunc
  195. @ ----------------------------------------------------------------
  196. .align 5
  197. function ff_put_pixels8_x2_arm, export=1
  198. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  199. @ block = word aligned, pixles = unaligned
  200. pld [r1]
  201. push {r4-r10,lr}
  202. ldr r12, =0xfefefefe
  203. JMP_ALIGN r5, r1
  204. 1:
  205. ldm r1, {r4-r5, r10}
  206. add r1, r1, r2
  207. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  208. pld [r1]
  209. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  210. subs r3, r3, #1
  211. stm r0, {r8-r9}
  212. add r0, r0, r2
  213. bne 1b
  214. pop {r4-r10,pc}
  215. .align 5
  216. 2:
  217. ldm r1, {r4-r5, r10}
  218. add r1, r1, r2
  219. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  220. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  221. pld [r1]
  222. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  223. subs r3, r3, #1
  224. stm r0, {r4-r5}
  225. add r0, r0, r2
  226. bne 2b
  227. pop {r4-r10,pc}
  228. .align 5
  229. 3:
  230. ldm r1, {r4-r5, r10}
  231. add r1, r1, r2
  232. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  233. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  234. pld [r1]
  235. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  236. subs r3, r3, #1
  237. stm r0, {r4-r5}
  238. add r0, r0, r2
  239. bne 3b
  240. pop {r4-r10,pc}
  241. .align 5
  242. 4:
  243. ldm r1, {r4-r5, r10}
  244. add r1, r1, r2
  245. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  246. pld [r1]
  247. RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  248. subs r3, r3, #1
  249. stm r0, {r8-r9}
  250. add r0, r0, r2
  251. bne 4b
  252. pop {r4-r10,pc}
  253. endfunc
  254. .align 5
  255. function ff_put_no_rnd_pixels8_x2_arm, export=1
  256. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  257. @ block = word aligned, pixles = unaligned
  258. pld [r1]
  259. push {r4-r10,lr}
  260. ldr r12, =0xfefefefe
  261. JMP_ALIGN r5, r1
  262. 1:
  263. ldm r1, {r4-r5, r10}
  264. add r1, r1, r2
  265. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  266. pld [r1]
  267. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  268. subs r3, r3, #1
  269. stm r0, {r8-r9}
  270. add r0, r0, r2
  271. bne 1b
  272. pop {r4-r10,pc}
  273. .align 5
  274. 2:
  275. ldm r1, {r4-r5, r10}
  276. add r1, r1, r2
  277. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  278. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  279. pld [r1]
  280. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  281. subs r3, r3, #1
  282. stm r0, {r4-r5}
  283. add r0, r0, r2
  284. bne 2b
  285. pop {r4-r10,pc}
  286. .align 5
  287. 3:
  288. ldm r1, {r4-r5, r10}
  289. add r1, r1, r2
  290. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  291. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  292. pld [r1]
  293. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  294. subs r3, r3, #1
  295. stm r0, {r4-r5}
  296. add r0, r0, r2
  297. bne 3b
  298. pop {r4-r10,pc}
  299. .align 5
  300. 4:
  301. ldm r1, {r4-r5, r10}
  302. add r1, r1, r2
  303. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  304. pld [r1]
  305. NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  306. subs r3, r3, #1
  307. stm r0, {r8-r9}
  308. add r0, r0, r2
  309. bne 4b
  310. pop {r4-r10,pc}
  311. endfunc
  312. @ ----------------------------------------------------------------
  313. .align 5
  314. function ff_put_pixels8_y2_arm, export=1
  315. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  316. @ block = word aligned, pixles = unaligned
  317. pld [r1]
  318. push {r4-r11,lr}
  319. mov r3, r3, lsr #1
  320. ldr r12, =0xfefefefe
  321. JMP_ALIGN r5, r1
  322. 1:
  323. ldm r1, {r4-r5}
  324. add r1, r1, r2
  325. 6: ldm r1, {r6-r7}
  326. add r1, r1, r2
  327. pld [r1]
  328. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  329. ldm r1, {r4-r5}
  330. add r1, r1, r2
  331. stm r0, {r8-r9}
  332. add r0, r0, r2
  333. pld [r1]
  334. RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  335. subs r3, r3, #1
  336. stm r0, {r8-r9}
  337. add r0, r0, r2
  338. bne 6b
  339. pop {r4-r11,pc}
  340. .align 5
  341. 2:
  342. ldm r1, {r4-r6}
  343. add r1, r1, r2
  344. pld [r1]
  345. ALIGN_DWORD 1, r4, r5, r6
  346. 6: ldm r1, {r7-r9}
  347. add r1, r1, r2
  348. pld [r1]
  349. ALIGN_DWORD 1, r7, r8, r9
  350. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  351. stm r0, {r10-r11}
  352. add r0, r0, r2
  353. ldm r1, {r4-r6}
  354. add r1, r1, r2
  355. pld [r1]
  356. ALIGN_DWORD 1, r4, r5, r6
  357. subs r3, r3, #1
  358. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  359. stm r0, {r10-r11}
  360. add r0, r0, r2
  361. bne 6b
  362. pop {r4-r11,pc}
  363. .align 5
  364. 3:
  365. ldm r1, {r4-r6}
  366. add r1, r1, r2
  367. pld [r1]
  368. ALIGN_DWORD 2, r4, r5, r6
  369. 6: ldm r1, {r7-r9}
  370. add r1, r1, r2
  371. pld [r1]
  372. ALIGN_DWORD 2, r7, r8, r9
  373. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  374. stm r0, {r10-r11}
  375. add r0, r0, r2
  376. ldm r1, {r4-r6}
  377. add r1, r1, r2
  378. pld [r1]
  379. ALIGN_DWORD 2, r4, r5, r6
  380. subs r3, r3, #1
  381. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  382. stm r0, {r10-r11}
  383. add r0, r0, r2
  384. bne 6b
  385. pop {r4-r11,pc}
  386. .align 5
  387. 4:
  388. ldm r1, {r4-r6}
  389. add r1, r1, r2
  390. pld [r1]
  391. ALIGN_DWORD 3, r4, r5, r6
  392. 6: ldm r1, {r7-r9}
  393. add r1, r1, r2
  394. pld [r1]
  395. ALIGN_DWORD 3, r7, r8, r9
  396. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  397. stm r0, {r10-r11}
  398. add r0, r0, r2
  399. ldm r1, {r4-r6}
  400. add r1, r1, r2
  401. pld [r1]
  402. ALIGN_DWORD 3, r4, r5, r6
  403. subs r3, r3, #1
  404. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  405. stm r0, {r10-r11}
  406. add r0, r0, r2
  407. bne 6b
  408. pop {r4-r11,pc}
  409. endfunc
  410. .align 5
  411. function ff_put_no_rnd_pixels8_y2_arm, export=1
  412. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  413. @ block = word aligned, pixles = unaligned
  414. pld [r1]
  415. push {r4-r11,lr}
  416. mov r3, r3, lsr #1
  417. ldr r12, =0xfefefefe
  418. JMP_ALIGN r5, r1
  419. 1:
  420. ldm r1, {r4-r5}
  421. add r1, r1, r2
  422. 6: ldm r1, {r6-r7}
  423. add r1, r1, r2
  424. pld [r1]
  425. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  426. ldm r1, {r4-r5}
  427. add r1, r1, r2
  428. stm r0, {r8-r9}
  429. add r0, r0, r2
  430. pld [r1]
  431. NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  432. subs r3, r3, #1
  433. stm r0, {r8-r9}
  434. add r0, r0, r2
  435. bne 6b
  436. pop {r4-r11,pc}
  437. .align 5
  438. 2:
  439. ldm r1, {r4-r6}
  440. add r1, r1, r2
  441. pld [r1]
  442. ALIGN_DWORD 1, r4, r5, r6
  443. 6: ldm r1, {r7-r9}
  444. add r1, r1, r2
  445. pld [r1]
  446. ALIGN_DWORD 1, r7, r8, r9
  447. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  448. stm r0, {r10-r11}
  449. add r0, r0, r2
  450. ldm r1, {r4-r6}
  451. add r1, r1, r2
  452. pld [r1]
  453. ALIGN_DWORD 1, r4, r5, r6
  454. subs r3, r3, #1
  455. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  456. stm r0, {r10-r11}
  457. add r0, r0, r2
  458. bne 6b
  459. pop {r4-r11,pc}
  460. .align 5
  461. 3:
  462. ldm r1, {r4-r6}
  463. add r1, r1, r2
  464. pld [r1]
  465. ALIGN_DWORD 2, r4, r5, r6
  466. 6: ldm r1, {r7-r9}
  467. add r1, r1, r2
  468. pld [r1]
  469. ALIGN_DWORD 2, r7, r8, r9
  470. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  471. stm r0, {r10-r11}
  472. add r0, r0, r2
  473. ldm r1, {r4-r6}
  474. add r1, r1, r2
  475. pld [r1]
  476. ALIGN_DWORD 2, r4, r5, r6
  477. subs r3, r3, #1
  478. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  479. stm r0, {r10-r11}
  480. add r0, r0, r2
  481. bne 6b
  482. pop {r4-r11,pc}
  483. .align 5
  484. 4:
  485. ldm r1, {r4-r6}
  486. add r1, r1, r2
  487. pld [r1]
  488. ALIGN_DWORD 3, r4, r5, r6
  489. 6: ldm r1, {r7-r9}
  490. add r1, r1, r2
  491. pld [r1]
  492. ALIGN_DWORD 3, r7, r8, r9
  493. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  494. stm r0, {r10-r11}
  495. add r0, r0, r2
  496. ldm r1, {r4-r6}
  497. add r1, r1, r2
  498. pld [r1]
  499. ALIGN_DWORD 3, r4, r5, r6
  500. subs r3, r3, #1
  501. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  502. stm r0, {r10-r11}
  503. add r0, r0, r2
  504. bne 6b
  505. pop {r4-r11,pc}
  506. endfunc
  507. .ltorg
  508. @ ----------------------------------------------------------------
  509. .macro RND_XY2_IT align, rnd
  510. @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
  511. @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
  512. .if \align == 0
  513. ldm r1, {r6-r8}
  514. .elseif \align == 3
  515. ldm r1, {r5-r7}
  516. .else
  517. ldm r1, {r8-r10}
  518. .endif
  519. add r1, r1, r2
  520. pld [r1]
  521. .if \align == 0
  522. ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
  523. .elseif \align == 1
  524. ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
  525. ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
  526. .elseif \align == 2
  527. ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
  528. ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
  529. .elseif \align == 3
  530. ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
  531. .endif
  532. ldr r14, =0x03030303
  533. tst r3, #1
  534. and r8, r4, r14
  535. and r9, r5, r14
  536. and r10, r6, r14
  537. and r11, r7, r14
  538. it eq
  539. andeq r14, r14, r14, \rnd #1
  540. add r8, r8, r10
  541. add r9, r9, r11
  542. ldr r12, =0xfcfcfcfc >> 2
  543. itt eq
  544. addeq r8, r8, r14
  545. addeq r9, r9, r14
  546. and r4, r12, r4, lsr #2
  547. and r5, r12, r5, lsr #2
  548. and r6, r12, r6, lsr #2
  549. and r7, r12, r7, lsr #2
  550. add r10, r4, r6
  551. add r11, r5, r7
  552. subs r3, r3, #1
  553. .endm
  554. .macro RND_XY2_EXPAND align, rnd
  555. RND_XY2_IT \align, \rnd
  556. 6: push {r8-r11}
  557. RND_XY2_IT \align, \rnd
  558. pop {r4-r7}
  559. add r4, r4, r8
  560. add r5, r5, r9
  561. ldr r14, =0x0f0f0f0f
  562. add r6, r6, r10
  563. add r7, r7, r11
  564. and r4, r14, r4, lsr #2
  565. and r5, r14, r5, lsr #2
  566. add r4, r4, r6
  567. add r5, r5, r7
  568. stm r0, {r4-r5}
  569. add r0, r0, r2
  570. bge 6b
  571. pop {r4-r11,pc}
  572. .endm
  573. .align 5
  574. function ff_put_pixels8_xy2_arm, export=1
  575. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  576. @ block = word aligned, pixles = unaligned
  577. pld [r1]
  578. push {r4-r11,lr} @ R14 is also called LR
  579. JMP_ALIGN r5, r1
  580. 1: RND_XY2_EXPAND 0, lsl
  581. .align 5
  582. 2: RND_XY2_EXPAND 1, lsl
  583. .align 5
  584. 3: RND_XY2_EXPAND 2, lsl
  585. .align 5
  586. 4: RND_XY2_EXPAND 3, lsl
  587. endfunc
  588. .align 5
  589. function ff_put_no_rnd_pixels8_xy2_arm, export=1
  590. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  591. @ block = word aligned, pixles = unaligned
  592. pld [r1]
  593. push {r4-r11,lr}
  594. JMP_ALIGN r5, r1
  595. 1: RND_XY2_EXPAND 0, lsr
  596. .align 5
  597. 2: RND_XY2_EXPAND 1, lsr
  598. .align 5
  599. 3: RND_XY2_EXPAND 2, lsr
  600. .align 5
  601. 4: RND_XY2_EXPAND 3, lsr
  602. endfunc
  603. .align 5
  604. @ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
  605. function ff_add_pixels_clamped_arm, export=1
  606. push {r4-r10}
  607. mov r10, #8
  608. 1:
  609. ldr r4, [r1] /* load dest */
  610. /* block[0] and block[1]*/
  611. ldrsh r5, [r0]
  612. ldrsh r7, [r0, #2]
  613. and r6, r4, #0xFF
  614. and r8, r4, #0xFF00
  615. add r6, r5, r6
  616. add r8, r7, r8, lsr #8
  617. mvn r5, r5
  618. mvn r7, r7
  619. tst r6, #0x100
  620. it ne
  621. movne r6, r5, lsr #24
  622. tst r8, #0x100
  623. it ne
  624. movne r8, r7, lsr #24
  625. mov r9, r6
  626. ldrsh r5, [r0, #4] /* moved form [A] */
  627. orr r9, r9, r8, lsl #8
  628. /* block[2] and block[3] */
  629. /* [A] */
  630. ldrsh r7, [r0, #6]
  631. and r6, r4, #0xFF0000
  632. and r8, r4, #0xFF000000
  633. add r6, r5, r6, lsr #16
  634. add r8, r7, r8, lsr #24
  635. mvn r5, r5
  636. mvn r7, r7
  637. tst r6, #0x100
  638. it ne
  639. movne r6, r5, lsr #24
  640. tst r8, #0x100
  641. it ne
  642. movne r8, r7, lsr #24
  643. orr r9, r9, r6, lsl #16
  644. ldr r4, [r1, #4] /* moved form [B] */
  645. orr r9, r9, r8, lsl #24
  646. /* store dest */
  647. ldrsh r5, [r0, #8] /* moved form [C] */
  648. str r9, [r1]
  649. /* load dest */
  650. /* [B] */
  651. /* block[4] and block[5] */
  652. /* [C] */
  653. ldrsh r7, [r0, #10]
  654. and r6, r4, #0xFF
  655. and r8, r4, #0xFF00
  656. add r6, r5, r6
  657. add r8, r7, r8, lsr #8
  658. mvn r5, r5
  659. mvn r7, r7
  660. tst r6, #0x100
  661. it ne
  662. movne r6, r5, lsr #24
  663. tst r8, #0x100
  664. it ne
  665. movne r8, r7, lsr #24
  666. mov r9, r6
  667. ldrsh r5, [r0, #12] /* moved from [D] */
  668. orr r9, r9, r8, lsl #8
  669. /* block[6] and block[7] */
  670. /* [D] */
  671. ldrsh r7, [r0, #14]
  672. and r6, r4, #0xFF0000
  673. and r8, r4, #0xFF000000
  674. add r6, r5, r6, lsr #16
  675. add r8, r7, r8, lsr #24
  676. mvn r5, r5
  677. mvn r7, r7
  678. tst r6, #0x100
  679. it ne
  680. movne r6, r5, lsr #24
  681. tst r8, #0x100
  682. it ne
  683. movne r8, r7, lsr #24
  684. orr r9, r9, r6, lsl #16
  685. add r0, r0, #16 /* moved from [E] */
  686. orr r9, r9, r8, lsl #24
  687. subs r10, r10, #1 /* moved from [F] */
  688. /* store dest */
  689. str r9, [r1, #4]
  690. /* [E] */
  691. /* [F] */
  692. add r1, r1, r2
  693. bne 1b
  694. pop {r4-r10}
  695. bx lr
  696. endfunc