You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

697 lines
18KB

  1. @
  2. @ ARMv4L optimized DSP utils
  3. @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  4. @
  5. @ This file is part of FFmpeg.
  6. @
  7. @ FFmpeg is free software; you can redistribute it and/or
  8. @ modify it under the terms of the GNU Lesser General Public
  9. @ License as published by the Free Software Foundation; either
  10. @ version 2.1 of the License, or (at your option) any later version.
  11. @
  12. @ FFmpeg is distributed in the hope that it will be useful,
  13. @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. @ Lesser General Public License for more details.
  16. @
  17. @ You should have received a copy of the GNU Lesser General Public
  18. @ License along with FFmpeg; if not, write to the Free Software
  19. @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. @
  21. .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
  22. mov \Rd0, \Rn0, lsr #(\shift * 8)
  23. mov \Rd1, \Rn1, lsr #(\shift * 8)
  24. mov \Rd2, \Rn2, lsr #(\shift * 8)
  25. mov \Rd3, \Rn3, lsr #(\shift * 8)
  26. orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
  27. orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
  28. orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
  29. orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
  30. .endm
  31. .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
  32. mov \R0, \R0, lsr #(\shift * 8)
  33. orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
  34. mov \R1, \R1, lsr #(\shift * 8)
  35. orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
  36. .endm
  37. .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
  38. mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
  39. mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
  40. orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
  41. orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
  42. .endm
  43. .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  44. @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  45. @ Rmask = 0xFEFEFEFE
  46. @ Rn = destroy
  47. eor \Rd0, \Rn0, \Rm0
  48. eor \Rd1, \Rn1, \Rm1
  49. orr \Rn0, \Rn0, \Rm0
  50. orr \Rn1, \Rn1, \Rm1
  51. and \Rd0, \Rd0, \Rmask
  52. and \Rd1, \Rd1, \Rmask
  53. sub \Rd0, \Rn0, \Rd0, lsr #1
  54. sub \Rd1, \Rn1, \Rd1, lsr #1
  55. .endm
  56. .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  57. @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  58. @ Rmask = 0xFEFEFEFE
  59. @ Rn = destroy
  60. eor \Rd0, \Rn0, \Rm0
  61. eor \Rd1, \Rn1, \Rm1
  62. and \Rn0, \Rn0, \Rm0
  63. and \Rn1, \Rn1, \Rm1
  64. and \Rd0, \Rd0, \Rmask
  65. and \Rd1, \Rd1, \Rmask
  66. add \Rd0, \Rn0, \Rd0, lsr #1
  67. add \Rd1, \Rn1, \Rd1, lsr #1
  68. .endm
  69. @ ----------------------------------------------------------------
  70. .align 8
  71. .global put_pixels16_arm
  72. put_pixels16_arm:
  73. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  74. @ block = word aligned, pixles = unaligned
  75. pld [r1]
  76. stmfd sp!, {r4-r11, lr} @ R14 is also called LR
  77. adr r5, 5f
  78. ands r4, r1, #3
  79. bic r1, r1, #3
  80. add r5, r5, r4, lsl #2
  81. ldrne pc, [r5]
  82. 1:
  83. ldmia r1, {r4-r7}
  84. add r1, r1, r2
  85. stmia r0, {r4-r7}
  86. pld [r1]
  87. subs r3, r3, #1
  88. add r0, r0, r2
  89. bne 1b
  90. ldmfd sp!, {r4-r11, pc}
  91. .align 8
  92. 2:
  93. ldmia r1, {r4-r8}
  94. add r1, r1, r2
  95. ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
  96. pld [r1]
  97. subs r3, r3, #1
  98. stmia r0, {r9-r12}
  99. add r0, r0, r2
  100. bne 2b
  101. ldmfd sp!, {r4-r11, pc}
  102. .align 8
  103. 3:
  104. ldmia r1, {r4-r8}
  105. add r1, r1, r2
  106. ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
  107. pld [r1]
  108. subs r3, r3, #1
  109. stmia r0, {r9-r12}
  110. add r0, r0, r2
  111. bne 3b
  112. ldmfd sp!, {r4-r11, pc}
  113. .align 8
  114. 4:
  115. ldmia r1, {r4-r8}
  116. add r1, r1, r2
  117. ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
  118. pld [r1]
  119. subs r3, r3, #1
  120. stmia r0, {r9-r12}
  121. add r0, r0, r2
  122. bne 4b
  123. ldmfd sp!, {r4-r11,pc}
  124. .align 8
  125. 5:
  126. .word 1b
  127. .word 2b
  128. .word 3b
  129. .word 4b
  130. @ ----------------------------------------------------------------
  131. .align 8
  132. .global put_pixels8_arm
  133. put_pixels8_arm:
  134. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  135. @ block = word aligned, pixles = unaligned
  136. pld [r1]
  137. stmfd sp!, {r4-r5,lr} @ R14 is also called LR
  138. adr r5, 5f
  139. ands r4, r1, #3
  140. bic r1, r1, #3
  141. add r5, r5, r4, lsl #2
  142. ldrne pc, [r5]
  143. 1:
  144. ldmia r1, {r4-r5}
  145. add r1, r1, r2
  146. subs r3, r3, #1
  147. pld [r1]
  148. stmia r0, {r4-r5}
  149. add r0, r0, r2
  150. bne 1b
  151. ldmfd sp!, {r4-r5,pc}
  152. .align 8
  153. 2:
  154. ldmia r1, {r4-r5, r12}
  155. add r1, r1, r2
  156. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
  157. pld [r1]
  158. subs r3, r3, #1
  159. stmia r0, {r4-r5}
  160. add r0, r0, r2
  161. bne 2b
  162. ldmfd sp!, {r4-r5,pc}
  163. .align 8
  164. 3:
  165. ldmia r1, {r4-r5, r12}
  166. add r1, r1, r2
  167. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
  168. pld [r1]
  169. subs r3, r3, #1
  170. stmia r0, {r4-r5}
  171. add r0, r0, r2
  172. bne 3b
  173. ldmfd sp!, {r4-r5,pc}
  174. .align 8
  175. 4:
  176. ldmia r1, {r4-r5, r12}
  177. add r1, r1, r2
  178. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
  179. pld [r1]
  180. subs r3, r3, #1
  181. stmia r0, {r4-r5}
  182. add r0, r0, r2
  183. bne 4b
  184. ldmfd sp!, {r4-r5,pc}
  185. .align 8
  186. 5:
  187. .word 1b
  188. .word 2b
  189. .word 3b
  190. .word 4b
  191. @ ----------------------------------------------------------------
  192. .align 8
  193. .global put_pixels8_x2_arm
  194. put_pixels8_x2_arm:
  195. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  196. @ block = word aligned, pixles = unaligned
  197. pld [r1]
  198. stmfd sp!, {r4-r10,lr} @ R14 is also called LR
  199. adr r5, 5f
  200. ands r4, r1, #3
  201. ldr r12, [r5]
  202. add r5, r5, r4, lsl #2
  203. bic r1, r1, #3
  204. ldrne pc, [r5]
  205. 1:
  206. ldmia r1, {r4-r5, r10}
  207. add r1, r1, r2
  208. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  209. pld [r1]
  210. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  211. subs r3, r3, #1
  212. stmia r0, {r8-r9}
  213. add r0, r0, r2
  214. bne 1b
  215. ldmfd sp!, {r4-r10,pc}
  216. .align 8
  217. 2:
  218. ldmia r1, {r4-r5, r10}
  219. add r1, r1, r2
  220. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  221. ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
  222. pld [r1]
  223. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  224. subs r3, r3, #1
  225. stmia r0, {r4-r5}
  226. add r0, r0, r2
  227. bne 2b
  228. ldmfd sp!, {r4-r10,pc}
  229. .align 8
  230. 3:
  231. ldmia r1, {r4-r5, r10}
  232. add r1, r1, r2
  233. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
  234. ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
  235. pld [r1]
  236. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  237. subs r3, r3, #1
  238. stmia r0, {r4-r5}
  239. add r0, r0, r2
  240. bne 3b
  241. ldmfd sp!, {r4-r10,pc}
  242. .align 8
  243. 4:
  244. ldmia r1, {r4-r5, r10}
  245. add r1, r1, r2
  246. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
  247. pld [r1]
  248. RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  249. subs r3, r3, #1
  250. stmia r0, {r8-r9}
  251. add r0, r0, r2
  252. bne 4b
  253. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
  254. .align 8
  255. 5:
  256. .word 0xFEFEFEFE
  257. .word 2b
  258. .word 3b
  259. .word 4b
  260. .align 8
  261. .global put_no_rnd_pixels8_x2_arm
  262. put_no_rnd_pixels8_x2_arm:
  263. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  264. @ block = word aligned, pixles = unaligned
  265. pld [r1]
  266. stmfd sp!, {r4-r10,lr} @ R14 is also called LR
  267. adr r5, 5f
  268. ands r4, r1, #3
  269. ldr r12, [r5]
  270. add r5, r5, r4, lsl #2
  271. bic r1, r1, #3
  272. ldrne pc, [r5]
  273. 1:
  274. ldmia r1, {r4-r5, r10}
  275. add r1, r1, r2
  276. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  277. pld [r1]
  278. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  279. subs r3, r3, #1
  280. stmia r0, {r8-r9}
  281. add r0, r0, r2
  282. bne 1b
  283. ldmfd sp!, {r4-r10,pc}
  284. .align 8
  285. 2:
  286. ldmia r1, {r4-r5, r10}
  287. add r1, r1, r2
  288. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  289. ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
  290. pld [r1]
  291. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  292. subs r3, r3, #1
  293. stmia r0, {r4-r5}
  294. add r0, r0, r2
  295. bne 2b
  296. ldmfd sp!, {r4-r10,pc}
  297. .align 8
  298. 3:
  299. ldmia r1, {r4-r5, r10}
  300. add r1, r1, r2
  301. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
  302. ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
  303. pld [r1]
  304. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  305. subs r3, r3, #1
  306. stmia r0, {r4-r5}
  307. add r0, r0, r2
  308. bne 3b
  309. ldmfd sp!, {r4-r10,pc}
  310. .align 8
  311. 4:
  312. ldmia r1, {r4-r5, r10}
  313. add r1, r1, r2
  314. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
  315. pld [r1]
  316. NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  317. subs r3, r3, #1
  318. stmia r0, {r8-r9}
  319. add r0, r0, r2
  320. bne 4b
  321. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
  322. .align 8
  323. 5:
  324. .word 0xFEFEFEFE
  325. .word 2b
  326. .word 3b
  327. .word 4b
  328. @ ----------------------------------------------------------------
  329. .align 8
  330. .global put_pixels8_y2_arm
  331. put_pixels8_y2_arm:
  332. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  333. @ block = word aligned, pixles = unaligned
  334. pld [r1]
  335. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  336. adr r5, 5f
  337. ands r4, r1, #3
  338. mov r3, r3, lsr #1
  339. ldr r12, [r5]
  340. add r5, r5, r4, lsl #2
  341. bic r1, r1, #3
  342. ldrne pc, [r5]
  343. 1:
  344. ldmia r1, {r4-r5}
  345. add r1, r1, r2
  346. 6: ldmia r1, {r6-r7}
  347. add r1, r1, r2
  348. pld [r1]
  349. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  350. ldmia r1, {r4-r5}
  351. add r1, r1, r2
  352. stmia r0, {r8-r9}
  353. add r0, r0, r2
  354. pld [r1]
  355. RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  356. subs r3, r3, #1
  357. stmia r0, {r8-r9}
  358. add r0, r0, r2
  359. bne 6b
  360. ldmfd sp!, {r4-r11,pc}
  361. .align 8
  362. 2:
  363. ldmia r1, {r4-r6}
  364. add r1, r1, r2
  365. pld [r1]
  366. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  367. 6: ldmia r1, {r7-r9}
  368. add r1, r1, r2
  369. pld [r1]
  370. ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
  371. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  372. stmia r0, {r10-r11}
  373. add r0, r0, r2
  374. ldmia r1, {r4-r6}
  375. add r1, r1, r2
  376. pld [r1]
  377. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  378. subs r3, r3, #1
  379. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  380. stmia r0, {r10-r11}
  381. add r0, r0, r2
  382. bne 6b
  383. ldmfd sp!, {r4-r11,pc}
  384. .align 8
  385. 3:
  386. ldmia r1, {r4-r6}
  387. add r1, r1, r2
  388. pld [r1]
  389. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  390. 6: ldmia r1, {r7-r9}
  391. add r1, r1, r2
  392. pld [r1]
  393. ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
  394. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  395. stmia r0, {r10-r11}
  396. add r0, r0, r2
  397. ldmia r1, {r4-r6}
  398. add r1, r1, r2
  399. pld [r1]
  400. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  401. subs r3, r3, #1
  402. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  403. stmia r0, {r10-r11}
  404. add r0, r0, r2
  405. bne 6b
  406. ldmfd sp!, {r4-r11,pc}
  407. .align 8
  408. 4:
  409. ldmia r1, {r4-r6}
  410. add r1, r1, r2
  411. pld [r1]
  412. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  413. 6: ldmia r1, {r7-r9}
  414. add r1, r1, r2
  415. pld [r1]
  416. ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
  417. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  418. stmia r0, {r10-r11}
  419. add r0, r0, r2
  420. ldmia r1, {r4-r6}
  421. add r1, r1, r2
  422. pld [r1]
  423. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  424. subs r3, r3, #1
  425. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  426. stmia r0, {r10-r11}
  427. add r0, r0, r2
  428. bne 6b
  429. ldmfd sp!, {r4-r11,pc}
  430. .align 8
  431. 5:
  432. .word 0xFEFEFEFE
  433. .word 2b
  434. .word 3b
  435. .word 4b
  436. .align 8
  437. .global put_no_rnd_pixels8_y2_arm
  438. put_no_rnd_pixels8_y2_arm:
  439. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  440. @ block = word aligned, pixles = unaligned
  441. pld [r1]
  442. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  443. adr r5, 5f
  444. ands r4, r1, #3
  445. mov r3, r3, lsr #1
  446. ldr r12, [r5]
  447. add r5, r5, r4, lsl #2
  448. bic r1, r1, #3
  449. ldrne pc, [r5]
  450. 1:
  451. ldmia r1, {r4-r5}
  452. add r1, r1, r2
  453. 6: ldmia r1, {r6-r7}
  454. add r1, r1, r2
  455. pld [r1]
  456. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  457. ldmia r1, {r4-r5}
  458. add r1, r1, r2
  459. stmia r0, {r8-r9}
  460. add r0, r0, r2
  461. pld [r1]
  462. NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  463. subs r3, r3, #1
  464. stmia r0, {r8-r9}
  465. add r0, r0, r2
  466. bne 6b
  467. ldmfd sp!, {r4-r11,pc}
  468. .align 8
  469. 2:
  470. ldmia r1, {r4-r6}
  471. add r1, r1, r2
  472. pld [r1]
  473. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  474. 6: ldmia r1, {r7-r9}
  475. add r1, r1, r2
  476. pld [r1]
  477. ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
  478. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  479. stmia r0, {r10-r11}
  480. add r0, r0, r2
  481. ldmia r1, {r4-r6}
  482. add r1, r1, r2
  483. pld [r1]
  484. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  485. subs r3, r3, #1
  486. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  487. stmia r0, {r10-r11}
  488. add r0, r0, r2
  489. bne 6b
  490. ldmfd sp!, {r4-r11,pc}
  491. .align 8
  492. 3:
  493. ldmia r1, {r4-r6}
  494. add r1, r1, r2
  495. pld [r1]
  496. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  497. 6: ldmia r1, {r7-r9}
  498. add r1, r1, r2
  499. pld [r1]
  500. ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
  501. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  502. stmia r0, {r10-r11}
  503. add r0, r0, r2
  504. ldmia r1, {r4-r6}
  505. add r1, r1, r2
  506. pld [r1]
  507. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  508. subs r3, r3, #1
  509. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  510. stmia r0, {r10-r11}
  511. add r0, r0, r2
  512. bne 6b
  513. ldmfd sp!, {r4-r11,pc}
  514. .align 8
  515. 4:
  516. ldmia r1, {r4-r6}
  517. add r1, r1, r2
  518. pld [r1]
  519. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  520. 6: ldmia r1, {r7-r9}
  521. add r1, r1, r2
  522. pld [r1]
  523. ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
  524. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  525. stmia r0, {r10-r11}
  526. add r0, r0, r2
  527. ldmia r1, {r4-r6}
  528. add r1, r1, r2
  529. pld [r1]
  530. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  531. subs r3, r3, #1
  532. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  533. stmia r0, {r10-r11}
  534. add r0, r0, r2
  535. bne 6b
  536. ldmfd sp!, {r4-r11,pc}
  537. .align 8
  538. 5:
  539. .word 0xFEFEFEFE
  540. .word 2b
  541. .word 3b
  542. .word 4b
  543. @ ----------------------------------------------------------------
  544. .macro RND_XY2_IT align, rnd
  545. @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
  546. @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
  547. .if \align == 0
  548. ldmia r1, {r6-r8}
  549. .elseif \align == 3
  550. ldmia r1, {r5-r7}
  551. .else
  552. ldmia r1, {r8-r10}
  553. .endif
  554. add r1, r1, r2
  555. pld [r1]
  556. .if \align == 0
  557. ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
  558. .elseif \align == 1
  559. ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
  560. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
  561. .elseif \align == 2
  562. ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
  563. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
  564. .elseif \align == 3
  565. ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
  566. .endif
  567. ldr r14, [r12, #0] @ 0x03030303
  568. tst r3, #1
  569. and r8, r4, r14
  570. and r9, r5, r14
  571. and r10, r6, r14
  572. and r11, r7, r14
  573. .if \rnd == 1
  574. ldreq r14, [r12, #16] @ 0x02020202
  575. .else
  576. ldreq r14, [r12, #28] @ 0x01010101
  577. .endif
  578. add r8, r8, r10
  579. add r9, r9, r11
  580. addeq r8, r8, r14
  581. addeq r9, r9, r14
  582. ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
  583. and r4, r14, r4, lsr #2
  584. and r5, r14, r5, lsr #2
  585. and r6, r14, r6, lsr #2
  586. and r7, r14, r7, lsr #2
  587. add r10, r4, r6
  588. add r11, r5, r7
  589. .endm
  590. .macro RND_XY2_EXPAND align, rnd
  591. RND_XY2_IT \align, \rnd
  592. 6: stmfd sp!, {r8-r11}
  593. RND_XY2_IT \align, \rnd
  594. ldmfd sp!, {r4-r7}
  595. add r4, r4, r8
  596. add r5, r5, r9
  597. add r6, r6, r10
  598. add r7, r7, r11
  599. ldr r14, [r12, #24] @ 0x0F0F0F0F
  600. and r4, r14, r4, lsr #2
  601. and r5, r14, r5, lsr #2
  602. add r4, r4, r6
  603. add r5, r5, r7
  604. subs r3, r3, #1
  605. stmia r0, {r4-r5}
  606. add r0, r0, r2
  607. bne 6b
  608. ldmfd sp!, {r4-r11,pc}
  609. .endm
  610. .align 8
  611. .global put_pixels8_xy2_arm
  612. put_pixels8_xy2_arm:
  613. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  614. @ block = word aligned, pixles = unaligned
  615. pld [r1]
  616. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  617. adrl r12, 5f
  618. ands r4, r1, #3
  619. add r5, r12, r4, lsl #2
  620. bic r1, r1, #3
  621. ldrne pc, [r5]
  622. 1:
  623. RND_XY2_EXPAND 0, 1
  624. .align 8
  625. 2:
  626. RND_XY2_EXPAND 1, 1
  627. .align 8
  628. 3:
  629. RND_XY2_EXPAND 2, 1
  630. .align 8
  631. 4:
  632. RND_XY2_EXPAND 3, 1
  633. 5:
  634. .word 0x03030303
  635. .word 2b
  636. .word 3b
  637. .word 4b
  638. .word 0x02020202
  639. .word 0xFCFCFCFC >> 2
  640. .word 0x0F0F0F0F
  641. .word 0x01010101
  642. .align 8
  643. .global put_no_rnd_pixels8_xy2_arm
  644. put_no_rnd_pixels8_xy2_arm:
  645. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  646. @ block = word aligned, pixles = unaligned
  647. pld [r1]
  648. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  649. adrl r12, 5f
  650. ands r4, r1, #3
  651. add r5, r12, r4, lsl #2
  652. bic r1, r1, #3
  653. ldrne pc, [r5]
  654. 1:
  655. RND_XY2_EXPAND 0, 0
  656. .align 8
  657. 2:
  658. RND_XY2_EXPAND 1, 0
  659. .align 8
  660. 3:
  661. RND_XY2_EXPAND 2, 0
  662. .align 8
  663. 4:
  664. RND_XY2_EXPAND 3, 0
  665. 5:
  666. .word 0x03030303
  667. .word 2b
  668. .word 3b
  669. .word 4b
  670. .word 0x02020202
  671. .word 0xFCFCFCFC >> 2
  672. .word 0x0F0F0F0F
  673. .word 0x01010101