You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

698 lines
18KB

  1. @
  2. @ ARMv4L optimized DSP utils
  3. @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  4. @
  5. @ This file is part of FFmpeg.
  6. @
  7. @ FFmpeg is free software; you can redistribute it and/or
  8. @ modify it under the terms of the GNU Lesser General Public
  9. @ License as published by the Free Software Foundation; either
  10. @ version 2.1 of the License, or (at your option) any later version.
  11. @
  12. @ FFmpeg is distributed in the hope that it will be useful,
  13. @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. @ Lesser General Public License for more details.
  16. @
  17. @ You should have received a copy of the GNU Lesser General Public
  18. @ License along with FFmpeg; if not, write to the Free Software
  19. @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. @
  21. #include "config.h"
  22. #ifndef HAVE_PLD
  23. .macro pld reg
  24. .endm
  25. #endif
  26. .macro ADJ_ALIGN_QUADWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
  27. mov \Rd0, \Rn0, lsr #(\shift * 8)
  28. mov \Rd1, \Rn1, lsr #(\shift * 8)
  29. mov \Rd2, \Rn2, lsr #(\shift * 8)
  30. mov \Rd3, \Rn3, lsr #(\shift * 8)
  31. orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
  32. orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
  33. orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
  34. orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
  35. .endm
  36. .macro ADJ_ALIGN_DOUBLEWORD shift, R0, R1, R2
  37. mov \R0, \R0, lsr #(\shift * 8)
  38. orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
  39. mov \R1, \R1, lsr #(\shift * 8)
  40. orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
  41. .endm
  42. .macro ADJ_ALIGN_DOUBLEWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
  43. mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
  44. mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
  45. orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
  46. orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
  47. .endm
  48. .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  49. @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  50. @ Rmask = 0xFEFEFEFE
  51. @ Rn = destroy
  52. eor \Rd0, \Rn0, \Rm0
  53. eor \Rd1, \Rn1, \Rm1
  54. orr \Rn0, \Rn0, \Rm0
  55. orr \Rn1, \Rn1, \Rm1
  56. and \Rd0, \Rd0, \Rmask
  57. and \Rd1, \Rd1, \Rmask
  58. sub \Rd0, \Rn0, \Rd0, lsr #1
  59. sub \Rd1, \Rn1, \Rd1, lsr #1
  60. .endm
  61. .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  62. @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  63. @ Rmask = 0xFEFEFEFE
  64. @ Rn = destroy
  65. eor \Rd0, \Rn0, \Rm0
  66. eor \Rd1, \Rn1, \Rm1
  67. and \Rn0, \Rn0, \Rm0
  68. and \Rn1, \Rn1, \Rm1
  69. and \Rd0, \Rd0, \Rmask
  70. and \Rd1, \Rd1, \Rmask
  71. add \Rd0, \Rn0, \Rd0, lsr #1
  72. add \Rd1, \Rn1, \Rd1, lsr #1
  73. .endm
  74. @ ----------------------------------------------------------------
  75. .align 8
  76. .global put_pixels16_arm
  77. put_pixels16_arm:
  78. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  79. @ block = word aligned, pixles = unaligned
  80. pld [r1]
  81. stmfd sp!, {r4-r11, lr} @ R14 is also called LR
  82. adr r5, 5f
  83. ands r4, r1, #3
  84. bic r1, r1, #3
  85. add r5, r5, r4, lsl #2
  86. ldrne pc, [r5]
  87. 1:
  88. ldmia r1, {r4-r7}
  89. add r1, r1, r2
  90. stmia r0, {r4-r7}
  91. pld [r1]
  92. subs r3, r3, #1
  93. add r0, r0, r2
  94. bne 1b
  95. ldmfd sp!, {r4-r11, pc}
  96. .align 8
  97. 2:
  98. ldmia r1, {r4-r8}
  99. add r1, r1, r2
  100. ADJ_ALIGN_QUADWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
  101. pld [r1]
  102. subs r3, r3, #1
  103. stmia r0, {r9-r12}
  104. add r0, r0, r2
  105. bne 2b
  106. ldmfd sp!, {r4-r11, pc}
  107. .align 8
  108. 3:
  109. ldmia r1, {r4-r8}
  110. add r1, r1, r2
  111. ADJ_ALIGN_QUADWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
  112. pld [r1]
  113. subs r3, r3, #1
  114. stmia r0, {r9-r12}
  115. add r0, r0, r2
  116. bne 3b
  117. ldmfd sp!, {r4-r11, pc}
  118. .align 8
  119. 4:
  120. ldmia r1, {r4-r8}
  121. add r1, r1, r2
  122. ADJ_ALIGN_QUADWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
  123. pld [r1]
  124. subs r3, r3, #1
  125. stmia r0, {r9-r12}
  126. add r0, r0, r2
  127. bne 4b
  128. ldmfd sp!, {r4-r11,pc}
  129. .align 8
  130. 5:
  131. .word 1b
  132. .word 2b
  133. .word 3b
  134. .word 4b
  135. @ ----------------------------------------------------------------
  136. .align 8
  137. .global put_pixels8_arm
  138. put_pixels8_arm:
  139. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  140. @ block = word aligned, pixles = unaligned
  141. pld [r1]
  142. stmfd sp!, {r4-r5,lr} @ R14 is also called LR
  143. adr r5, 5f
  144. ands r4, r1, #3
  145. bic r1, r1, #3
  146. add r5, r5, r4, lsl #2
  147. ldrne pc, [r5]
  148. 1:
  149. ldmia r1, {r4-r5}
  150. add r1, r1, r2
  151. subs r3, r3, #1
  152. pld [r1]
  153. stmia r0, {r4-r5}
  154. add r0, r0, r2
  155. bne 1b
  156. ldmfd sp!, {r4-r5,pc}
  157. .align 8
  158. 2:
  159. ldmia r1, {r4-r5, r12}
  160. add r1, r1, r2
  161. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r12
  162. pld [r1]
  163. subs r3, r3, #1
  164. stmia r0, {r4-r5}
  165. add r0, r0, r2
  166. bne 2b
  167. ldmfd sp!, {r4-r5,pc}
  168. .align 8
  169. 3:
  170. ldmia r1, {r4-r5, r12}
  171. add r1, r1, r2
  172. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r12
  173. pld [r1]
  174. subs r3, r3, #1
  175. stmia r0, {r4-r5}
  176. add r0, r0, r2
  177. bne 3b
  178. ldmfd sp!, {r4-r5,pc}
  179. .align 8
  180. 4:
  181. ldmia r1, {r4-r5, r12}
  182. add r1, r1, r2
  183. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r12
  184. pld [r1]
  185. subs r3, r3, #1
  186. stmia r0, {r4-r5}
  187. add r0, r0, r2
  188. bne 4b
  189. ldmfd sp!, {r4-r5,pc}
  190. .align 8
  191. 5:
  192. .word 1b
  193. .word 2b
  194. .word 3b
  195. .word 4b
  196. @ ----------------------------------------------------------------
  197. .align 8
  198. .global put_pixels8_x2_arm
  199. put_pixels8_x2_arm:
  200. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  201. @ block = word aligned, pixles = unaligned
  202. pld [r1]
  203. stmfd sp!, {r4-r10,lr} @ R14 is also called LR
  204. adr r5, 5f
  205. ands r4, r1, #3
  206. ldr r12, [r5]
  207. add r5, r5, r4, lsl #2
  208. bic r1, r1, #3
  209. ldrne pc, [r5]
  210. 1:
  211. ldmia r1, {r4-r5, r10}
  212. add r1, r1, r2
  213. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  214. pld [r1]
  215. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  216. subs r3, r3, #1
  217. stmia r0, {r8-r9}
  218. add r0, r0, r2
  219. bne 1b
  220. ldmfd sp!, {r4-r10,pc}
  221. .align 8
  222. 2:
  223. ldmia r1, {r4-r5, r10}
  224. add r1, r1, r2
  225. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  226. ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
  227. pld [r1]
  228. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  229. subs r3, r3, #1
  230. stmia r0, {r4-r5}
  231. add r0, r0, r2
  232. bne 2b
  233. ldmfd sp!, {r4-r10,pc}
  234. .align 8
  235. 3:
  236. ldmia r1, {r4-r5, r10}
  237. add r1, r1, r2
  238. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
  239. ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
  240. pld [r1]
  241. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  242. subs r3, r3, #1
  243. stmia r0, {r4-r5}
  244. add r0, r0, r2
  245. bne 3b
  246. ldmfd sp!, {r4-r10,pc}
  247. .align 8
  248. 4:
  249. ldmia r1, {r4-r5, r10}
  250. add r1, r1, r2
  251. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
  252. pld [r1]
  253. RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  254. subs r3, r3, #1
  255. stmia r0, {r8-r9}
  256. add r0, r0, r2
  257. bne 4b
  258. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
  259. .align 8
  260. 5:
  261. .word 0xFEFEFEFE
  262. .word 2b
  263. .word 3b
  264. .word 4b
  265. .align 8
  266. .global put_no_rnd_pixels8_x2_arm
  267. put_no_rnd_pixels8_x2_arm:
  268. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  269. @ block = word aligned, pixles = unaligned
  270. pld [r1]
  271. stmfd sp!, {r4-r10,lr} @ R14 is also called LR
  272. adr r5, 5f
  273. ands r4, r1, #3
  274. ldr r12, [r5]
  275. add r5, r5, r4, lsl #2
  276. bic r1, r1, #3
  277. ldrne pc, [r5]
  278. 1:
  279. ldmia r1, {r4-r5, r10}
  280. add r1, r1, r2
  281. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  282. pld [r1]
  283. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  284. subs r3, r3, #1
  285. stmia r0, {r8-r9}
  286. add r0, r0, r2
  287. bne 1b
  288. ldmfd sp!, {r4-r10,pc}
  289. .align 8
  290. 2:
  291. ldmia r1, {r4-r5, r10}
  292. add r1, r1, r2
  293. ADJ_ALIGN_DOUBLEWORD_D 1, r6, r7, r4, r5, r10
  294. ADJ_ALIGN_DOUBLEWORD_D 2, r8, r9, r4, r5, r10
  295. pld [r1]
  296. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  297. subs r3, r3, #1
  298. stmia r0, {r4-r5}
  299. add r0, r0, r2
  300. bne 2b
  301. ldmfd sp!, {r4-r10,pc}
  302. .align 8
  303. 3:
  304. ldmia r1, {r4-r5, r10}
  305. add r1, r1, r2
  306. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r4, r5, r10
  307. ADJ_ALIGN_DOUBLEWORD_D 3, r8, r9, r4, r5, r10
  308. pld [r1]
  309. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  310. subs r3, r3, #1
  311. stmia r0, {r4-r5}
  312. add r0, r0, r2
  313. bne 3b
  314. ldmfd sp!, {r4-r10,pc}
  315. .align 8
  316. 4:
  317. ldmia r1, {r4-r5, r10}
  318. add r1, r1, r2
  319. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r4, r5, r10
  320. pld [r1]
  321. NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  322. subs r3, r3, #1
  323. stmia r0, {r8-r9}
  324. add r0, r0, r2
  325. bne 4b
  326. ldmfd sp!, {r4-r10,pc} @@ update PC with LR content.
  327. .align 8
  328. 5:
  329. .word 0xFEFEFEFE
  330. .word 2b
  331. .word 3b
  332. .word 4b
  333. @ ----------------------------------------------------------------
  334. .align 8
  335. .global put_pixels8_y2_arm
  336. put_pixels8_y2_arm:
  337. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  338. @ block = word aligned, pixles = unaligned
  339. pld [r1]
  340. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  341. adr r5, 5f
  342. ands r4, r1, #3
  343. mov r3, r3, lsr #1
  344. ldr r12, [r5]
  345. add r5, r5, r4, lsl #2
  346. bic r1, r1, #3
  347. ldrne pc, [r5]
  348. 1:
  349. ldmia r1, {r4-r5}
  350. add r1, r1, r2
  351. 6: ldmia r1, {r6-r7}
  352. add r1, r1, r2
  353. pld [r1]
  354. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  355. ldmia r1, {r4-r5}
  356. add r1, r1, r2
  357. stmia r0, {r8-r9}
  358. add r0, r0, r2
  359. pld [r1]
  360. RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  361. subs r3, r3, #1
  362. stmia r0, {r8-r9}
  363. add r0, r0, r2
  364. bne 6b
  365. ldmfd sp!, {r4-r11,pc}
  366. .align 8
  367. 2:
  368. ldmia r1, {r4-r6}
  369. add r1, r1, r2
  370. pld [r1]
  371. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  372. 6: ldmia r1, {r7-r9}
  373. add r1, r1, r2
  374. pld [r1]
  375. ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
  376. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  377. stmia r0, {r10-r11}
  378. add r0, r0, r2
  379. ldmia r1, {r4-r6}
  380. add r1, r1, r2
  381. pld [r1]
  382. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  383. subs r3, r3, #1
  384. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  385. stmia r0, {r10-r11}
  386. add r0, r0, r2
  387. bne 6b
  388. ldmfd sp!, {r4-r11,pc}
  389. .align 8
  390. 3:
  391. ldmia r1, {r4-r6}
  392. add r1, r1, r2
  393. pld [r1]
  394. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  395. 6: ldmia r1, {r7-r9}
  396. add r1, r1, r2
  397. pld [r1]
  398. ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
  399. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  400. stmia r0, {r10-r11}
  401. add r0, r0, r2
  402. ldmia r1, {r4-r6}
  403. add r1, r1, r2
  404. pld [r1]
  405. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  406. subs r3, r3, #1
  407. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  408. stmia r0, {r10-r11}
  409. add r0, r0, r2
  410. bne 6b
  411. ldmfd sp!, {r4-r11,pc}
  412. .align 8
  413. 4:
  414. ldmia r1, {r4-r6}
  415. add r1, r1, r2
  416. pld [r1]
  417. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  418. 6: ldmia r1, {r7-r9}
  419. add r1, r1, r2
  420. pld [r1]
  421. ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
  422. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  423. stmia r0, {r10-r11}
  424. add r0, r0, r2
  425. ldmia r1, {r4-r6}
  426. add r1, r1, r2
  427. pld [r1]
  428. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  429. subs r3, r3, #1
  430. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  431. stmia r0, {r10-r11}
  432. add r0, r0, r2
  433. bne 6b
  434. ldmfd sp!, {r4-r11,pc}
  435. .align 8
  436. 5:
  437. .word 0xFEFEFEFE
  438. .word 2b
  439. .word 3b
  440. .word 4b
  441. .align 8
  442. .global put_no_rnd_pixels8_y2_arm
  443. put_no_rnd_pixels8_y2_arm:
  444. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  445. @ block = word aligned, pixles = unaligned
  446. pld [r1]
  447. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  448. adr r5, 5f
  449. ands r4, r1, #3
  450. mov r3, r3, lsr #1
  451. ldr r12, [r5]
  452. add r5, r5, r4, lsl #2
  453. bic r1, r1, #3
  454. ldrne pc, [r5]
  455. 1:
  456. ldmia r1, {r4-r5}
  457. add r1, r1, r2
  458. 6: ldmia r1, {r6-r7}
  459. add r1, r1, r2
  460. pld [r1]
  461. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  462. ldmia r1, {r4-r5}
  463. add r1, r1, r2
  464. stmia r0, {r8-r9}
  465. add r0, r0, r2
  466. pld [r1]
  467. NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  468. subs r3, r3, #1
  469. stmia r0, {r8-r9}
  470. add r0, r0, r2
  471. bne 6b
  472. ldmfd sp!, {r4-r11,pc}
  473. .align 8
  474. 2:
  475. ldmia r1, {r4-r6}
  476. add r1, r1, r2
  477. pld [r1]
  478. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  479. 6: ldmia r1, {r7-r9}
  480. add r1, r1, r2
  481. pld [r1]
  482. ADJ_ALIGN_DOUBLEWORD 1, r7, r8, r9
  483. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  484. stmia r0, {r10-r11}
  485. add r0, r0, r2
  486. ldmia r1, {r4-r6}
  487. add r1, r1, r2
  488. pld [r1]
  489. ADJ_ALIGN_DOUBLEWORD 1, r4, r5, r6
  490. subs r3, r3, #1
  491. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  492. stmia r0, {r10-r11}
  493. add r0, r0, r2
  494. bne 6b
  495. ldmfd sp!, {r4-r11,pc}
  496. .align 8
  497. 3:
  498. ldmia r1, {r4-r6}
  499. add r1, r1, r2
  500. pld [r1]
  501. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  502. 6: ldmia r1, {r7-r9}
  503. add r1, r1, r2
  504. pld [r1]
  505. ADJ_ALIGN_DOUBLEWORD 2, r7, r8, r9
  506. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  507. stmia r0, {r10-r11}
  508. add r0, r0, r2
  509. ldmia r1, {r4-r6}
  510. add r1, r1, r2
  511. pld [r1]
  512. ADJ_ALIGN_DOUBLEWORD 2, r4, r5, r6
  513. subs r3, r3, #1
  514. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  515. stmia r0, {r10-r11}
  516. add r0, r0, r2
  517. bne 6b
  518. ldmfd sp!, {r4-r11,pc}
  519. .align 8
  520. 4:
  521. ldmia r1, {r4-r6}
  522. add r1, r1, r2
  523. pld [r1]
  524. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  525. 6: ldmia r1, {r7-r9}
  526. add r1, r1, r2
  527. pld [r1]
  528. ADJ_ALIGN_DOUBLEWORD 3, r7, r8, r9
  529. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  530. stmia r0, {r10-r11}
  531. add r0, r0, r2
  532. ldmia r1, {r4-r6}
  533. add r1, r1, r2
  534. pld [r1]
  535. ADJ_ALIGN_DOUBLEWORD 3, r4, r5, r6
  536. subs r3, r3, #1
  537. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  538. stmia r0, {r10-r11}
  539. add r0, r0, r2
  540. bne 6b
  541. ldmfd sp!, {r4-r11,pc}
  542. .align 8
  543. 5:
  544. .word 0xFEFEFEFE
  545. .word 2b
  546. .word 3b
  547. .word 4b
  548. @ ----------------------------------------------------------------
  549. .macro RND_XY2_IT align
  550. @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
  551. @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
  552. .if \align == 0
  553. ldmia r1, {r6-r8}
  554. .elseif \align == 3
  555. ldmia r1, {r5-r7}
  556. .else
  557. ldmia r1, {r8-r10}
  558. .endif
  559. add r1, r1, r2
  560. pld [r1]
  561. .if \align == 0
  562. ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r6, r7, r8
  563. .elseif \align == 1
  564. ADJ_ALIGN_DOUBLEWORD_D 1, r4, r5, r8, r9, r10
  565. ADJ_ALIGN_DOUBLEWORD_D 2, r6, r7, r8, r9, r10
  566. .elseif \align == 2
  567. ADJ_ALIGN_DOUBLEWORD_D 2, r4, r5, r8, r9, r10
  568. ADJ_ALIGN_DOUBLEWORD_D 3, r6, r7, r8, r9, r10
  569. .elseif \align == 3
  570. ADJ_ALIGN_DOUBLEWORD_D 3, r4, r5, r5, r6, r7
  571. .endif
  572. ldr r14, [r12, #0] @ 0x03030303
  573. tst r3, #1
  574. and r8, r4, r14
  575. and r9, r5, r14
  576. and r10, r6, r14
  577. and r11, r7, r14
  578. ldreq r14, [r12, #16] @ 0x02020202/0x01010101
  579. add r8, r8, r10
  580. add r9, r9, r11
  581. addeq r8, r8, r14
  582. addeq r9, r9, r14
  583. ldr r14, [r12, #20] @ 0xFCFCFCFC >> 2
  584. and r4, r14, r4, lsr #2
  585. and r5, r14, r5, lsr #2
  586. and r6, r14, r6, lsr #2
  587. and r7, r14, r7, lsr #2
  588. add r10, r4, r6
  589. add r11, r5, r7
  590. subs r3, r3, #1
  591. .endm
  592. .macro RND_XY2_EXPAND align
  593. RND_XY2_IT \align
  594. 6: stmfd sp!, {r8-r11}
  595. RND_XY2_IT \align
  596. ldmfd sp!, {r4-r7}
  597. add r4, r4, r8
  598. add r5, r5, r9
  599. add r6, r6, r10
  600. add r7, r7, r11
  601. ldr r14, [r12, #24] @ 0x0F0F0F0F
  602. and r4, r14, r4, lsr #2
  603. and r5, r14, r5, lsr #2
  604. add r4, r4, r6
  605. add r5, r5, r7
  606. stmia r0, {r4-r5}
  607. add r0, r0, r2
  608. bge 6b
  609. ldmfd sp!, {r4-r11,pc}
  610. .endm
  611. .align 8
  612. .global put_pixels8_xy2_arm
  613. put_pixels8_xy2_arm:
  614. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  615. @ block = word aligned, pixles = unaligned
  616. pld [r1]
  617. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  618. adrl r12, 5f
  619. ands r4, r1, #3
  620. add r5, r12, r4, lsl #2
  621. bic r1, r1, #3
  622. ldrne pc, [r5]
  623. 1:
  624. RND_XY2_EXPAND 0
  625. .align 8
  626. 2:
  627. RND_XY2_EXPAND 1
  628. .align 8
  629. 3:
  630. RND_XY2_EXPAND 2
  631. .align 8
  632. 4:
  633. RND_XY2_EXPAND 3
  634. 5:
  635. .word 0x03030303
  636. .word 2b
  637. .word 3b
  638. .word 4b
  639. .word 0x02020202
  640. .word 0xFCFCFCFC >> 2
  641. .word 0x0F0F0F0F
  642. .align 8
  643. .global put_no_rnd_pixels8_xy2_arm
  644. put_no_rnd_pixels8_xy2_arm:
  645. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  646. @ block = word aligned, pixles = unaligned
  647. pld [r1]
  648. stmfd sp!, {r4-r11,lr} @ R14 is also called LR
  649. adrl r12, 5f
  650. ands r4, r1, #3
  651. add r5, r12, r4, lsl #2
  652. bic r1, r1, #3
  653. ldrne pc, [r5]
  654. 1:
  655. RND_XY2_EXPAND 0
  656. .align 8
  657. 2:
  658. RND_XY2_EXPAND 1
  659. .align 8
  660. 3:
  661. RND_XY2_EXPAND 2
  662. .align 8
  663. 4:
  664. RND_XY2_EXPAND 3
  665. 5:
  666. .word 0x03030303
  667. .word 2b
  668. .word 3b
  669. .word 4b
  670. .word 0x01010101
  671. .word 0xFCFCFCFC >> 2
  672. .word 0x0F0F0F0F