You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

604 lines
21KB

  1. @
  2. @ ARMv4-optimized halfpel functions
  3. @ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
  4. @
  5. @ This file is part of Libav.
  6. @
  7. @ Libav is free software; you can redistribute it and/or
  8. @ modify it under the terms of the GNU Lesser General Public
  9. @ License as published by the Free Software Foundation; either
  10. @ version 2.1 of the License, or (at your option) any later version.
  11. @
  12. @ Libav is distributed in the hope that it will be useful,
  13. @ but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. @ Lesser General Public License for more details.
  16. @
  17. @ You should have received a copy of the GNU Lesser General Public
  18. @ License along with Libav; if not, write to the Free Software
  19. @ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. @
  21. #include "config.h"
  22. #include "libavutil/arm/asm.S"
  23. #if !HAVE_ARMV5TE_EXTERNAL
  24. #define pld @
  25. #endif
  26. .macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
  27. mov \Rd0, \Rn0, lsr #(\shift * 8)
  28. mov \Rd1, \Rn1, lsr #(\shift * 8)
  29. mov \Rd2, \Rn2, lsr #(\shift * 8)
  30. mov \Rd3, \Rn3, lsr #(\shift * 8)
  31. orr \Rd0, \Rd0, \Rn1, lsl #(32 - \shift * 8)
  32. orr \Rd1, \Rd1, \Rn2, lsl #(32 - \shift * 8)
  33. orr \Rd2, \Rd2, \Rn3, lsl #(32 - \shift * 8)
  34. orr \Rd3, \Rd3, \Rn4, lsl #(32 - \shift * 8)
  35. .endm
  36. .macro ALIGN_DWORD shift, R0, R1, R2
  37. mov \R0, \R0, lsr #(\shift * 8)
  38. orr \R0, \R0, \R1, lsl #(32 - \shift * 8)
  39. mov \R1, \R1, lsr #(\shift * 8)
  40. orr \R1, \R1, \R2, lsl #(32 - \shift * 8)
  41. .endm
  42. .macro ALIGN_DWORD_D shift, Rdst0, Rdst1, Rsrc0, Rsrc1, Rsrc2
  43. mov \Rdst0, \Rsrc0, lsr #(\shift * 8)
  44. mov \Rdst1, \Rsrc1, lsr #(\shift * 8)
  45. orr \Rdst0, \Rdst0, \Rsrc1, lsl #(32 - (\shift * 8))
  46. orr \Rdst1, \Rdst1, \Rsrc2, lsl #(32 - (\shift * 8))
  47. .endm
  48. .macro RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  49. @ Rd = (Rn | Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  50. @ Rmask = 0xFEFEFEFE
  51. @ Rn = destroy
  52. eor \Rd0, \Rn0, \Rm0
  53. eor \Rd1, \Rn1, \Rm1
  54. orr \Rn0, \Rn0, \Rm0
  55. orr \Rn1, \Rn1, \Rm1
  56. and \Rd0, \Rd0, \Rmask
  57. and \Rd1, \Rd1, \Rmask
  58. sub \Rd0, \Rn0, \Rd0, lsr #1
  59. sub \Rd1, \Rn1, \Rd1, lsr #1
  60. .endm
  61. .macro NO_RND_AVG32 Rd0, Rd1, Rn0, Rn1, Rm0, Rm1, Rmask
  62. @ Rd = (Rn & Rm) - (((Rn ^ Rm) & ~0x01010101) >> 1)
  63. @ Rmask = 0xFEFEFEFE
  64. @ Rn = destroy
  65. eor \Rd0, \Rn0, \Rm0
  66. eor \Rd1, \Rn1, \Rm1
  67. and \Rn0, \Rn0, \Rm0
  68. and \Rn1, \Rn1, \Rm1
  69. and \Rd0, \Rd0, \Rmask
  70. and \Rd1, \Rd1, \Rmask
  71. add \Rd0, \Rn0, \Rd0, lsr #1
  72. add \Rd1, \Rn1, \Rd1, lsr #1
  73. .endm
  74. .macro JMP_ALIGN tmp, reg
  75. ands \tmp, \reg, #3
  76. bic \reg, \reg, #3
  77. beq 1f
  78. subs \tmp, \tmp, #1
  79. beq 2f
  80. subs \tmp, \tmp, #1
  81. beq 3f
  82. b 4f
  83. .endm
  84. @ ----------------------------------------------------------------
  85. function ff_put_pixels16_arm, export=1, align=5
  86. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  87. @ block = word aligned, pixles = unaligned
  88. pld [r1]
  89. push {r4-r11, lr}
  90. JMP_ALIGN r5, r1
  91. 1:
  92. ldm r1, {r4-r7}
  93. add r1, r1, r2
  94. stm r0, {r4-r7}
  95. pld [r1]
  96. subs r3, r3, #1
  97. add r0, r0, r2
  98. bne 1b
  99. pop {r4-r11, pc}
  100. .align 5
  101. 2:
  102. ldm r1, {r4-r8}
  103. add r1, r1, r2
  104. ALIGN_QWORD_D 1, r9, r10, r11, r12, r4, r5, r6, r7, r8
  105. pld [r1]
  106. subs r3, r3, #1
  107. stm r0, {r9-r12}
  108. add r0, r0, r2
  109. bne 2b
  110. pop {r4-r11, pc}
  111. .align 5
  112. 3:
  113. ldm r1, {r4-r8}
  114. add r1, r1, r2
  115. ALIGN_QWORD_D 2, r9, r10, r11, r12, r4, r5, r6, r7, r8
  116. pld [r1]
  117. subs r3, r3, #1
  118. stm r0, {r9-r12}
  119. add r0, r0, r2
  120. bne 3b
  121. pop {r4-r11, pc}
  122. .align 5
  123. 4:
  124. ldm r1, {r4-r8}
  125. add r1, r1, r2
  126. ALIGN_QWORD_D 3, r9, r10, r11, r12, r4, r5, r6, r7, r8
  127. pld [r1]
  128. subs r3, r3, #1
  129. stm r0, {r9-r12}
  130. add r0, r0, r2
  131. bne 4b
  132. pop {r4-r11,pc}
  133. endfunc
  134. @ ----------------------------------------------------------------
  135. function ff_put_pixels8_arm, export=1, align=5
  136. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  137. @ block = word aligned, pixles = unaligned
  138. pld [r1]
  139. push {r4-r5,lr}
  140. JMP_ALIGN r5, r1
  141. 1:
  142. ldm r1, {r4-r5}
  143. add r1, r1, r2
  144. subs r3, r3, #1
  145. pld [r1]
  146. stm r0, {r4-r5}
  147. add r0, r0, r2
  148. bne 1b
  149. pop {r4-r5,pc}
  150. .align 5
  151. 2:
  152. ldm r1, {r4-r5, r12}
  153. add r1, r1, r2
  154. ALIGN_DWORD 1, r4, r5, r12
  155. pld [r1]
  156. subs r3, r3, #1
  157. stm r0, {r4-r5}
  158. add r0, r0, r2
  159. bne 2b
  160. pop {r4-r5,pc}
  161. .align 5
  162. 3:
  163. ldm r1, {r4-r5, r12}
  164. add r1, r1, r2
  165. ALIGN_DWORD 2, r4, r5, r12
  166. pld [r1]
  167. subs r3, r3, #1
  168. stm r0, {r4-r5}
  169. add r0, r0, r2
  170. bne 3b
  171. pop {r4-r5,pc}
  172. .align 5
  173. 4:
  174. ldm r1, {r4-r5, r12}
  175. add r1, r1, r2
  176. ALIGN_DWORD 3, r4, r5, r12
  177. pld [r1]
  178. subs r3, r3, #1
  179. stm r0, {r4-r5}
  180. add r0, r0, r2
  181. bne 4b
  182. pop {r4-r5,pc}
  183. endfunc
  184. @ ----------------------------------------------------------------
  185. function ff_put_pixels8_x2_arm, export=1, align=5
  186. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  187. @ block = word aligned, pixles = unaligned
  188. pld [r1]
  189. push {r4-r10,lr}
  190. ldr r12, =0xfefefefe
  191. JMP_ALIGN r5, r1
  192. 1:
  193. ldm r1, {r4-r5, r10}
  194. add r1, r1, r2
  195. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  196. pld [r1]
  197. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  198. subs r3, r3, #1
  199. stm r0, {r8-r9}
  200. add r0, r0, r2
  201. bne 1b
  202. pop {r4-r10,pc}
  203. .align 5
  204. 2:
  205. ldm r1, {r4-r5, r10}
  206. add r1, r1, r2
  207. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  208. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  209. pld [r1]
  210. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  211. subs r3, r3, #1
  212. stm r0, {r4-r5}
  213. add r0, r0, r2
  214. bne 2b
  215. pop {r4-r10,pc}
  216. .align 5
  217. 3:
  218. ldm r1, {r4-r5, r10}
  219. add r1, r1, r2
  220. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  221. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  222. pld [r1]
  223. RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  224. subs r3, r3, #1
  225. stm r0, {r4-r5}
  226. add r0, r0, r2
  227. bne 3b
  228. pop {r4-r10,pc}
  229. .align 5
  230. 4:
  231. ldm r1, {r4-r5, r10}
  232. add r1, r1, r2
  233. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  234. pld [r1]
  235. RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  236. subs r3, r3, #1
  237. stm r0, {r8-r9}
  238. add r0, r0, r2
  239. bne 4b
  240. pop {r4-r10,pc}
  241. endfunc
  242. function ff_put_no_rnd_pixels8_x2_arm, export=1, align=5
  243. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  244. @ block = word aligned, pixles = unaligned
  245. pld [r1]
  246. push {r4-r10,lr}
  247. ldr r12, =0xfefefefe
  248. JMP_ALIGN r5, r1
  249. 1:
  250. ldm r1, {r4-r5, r10}
  251. add r1, r1, r2
  252. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  253. pld [r1]
  254. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  255. subs r3, r3, #1
  256. stm r0, {r8-r9}
  257. add r0, r0, r2
  258. bne 1b
  259. pop {r4-r10,pc}
  260. .align 5
  261. 2:
  262. ldm r1, {r4-r5, r10}
  263. add r1, r1, r2
  264. ALIGN_DWORD_D 1, r6, r7, r4, r5, r10
  265. ALIGN_DWORD_D 2, r8, r9, r4, r5, r10
  266. pld [r1]
  267. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  268. subs r3, r3, #1
  269. stm r0, {r4-r5}
  270. add r0, r0, r2
  271. bne 2b
  272. pop {r4-r10,pc}
  273. .align 5
  274. 3:
  275. ldm r1, {r4-r5, r10}
  276. add r1, r1, r2
  277. ALIGN_DWORD_D 2, r6, r7, r4, r5, r10
  278. ALIGN_DWORD_D 3, r8, r9, r4, r5, r10
  279. pld [r1]
  280. NO_RND_AVG32 r4, r5, r6, r7, r8, r9, r12
  281. subs r3, r3, #1
  282. stm r0, {r4-r5}
  283. add r0, r0, r2
  284. bne 3b
  285. pop {r4-r10,pc}
  286. .align 5
  287. 4:
  288. ldm r1, {r4-r5, r10}
  289. add r1, r1, r2
  290. ALIGN_DWORD_D 3, r6, r7, r4, r5, r10
  291. pld [r1]
  292. NO_RND_AVG32 r8, r9, r6, r7, r5, r10, r12
  293. subs r3, r3, #1
  294. stm r0, {r8-r9}
  295. add r0, r0, r2
  296. bne 4b
  297. pop {r4-r10,pc}
  298. endfunc
  299. @ ----------------------------------------------------------------
  300. function ff_put_pixels8_y2_arm, export=1, align=5
  301. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  302. @ block = word aligned, pixles = unaligned
  303. pld [r1]
  304. push {r4-r11,lr}
  305. mov r3, r3, lsr #1
  306. ldr r12, =0xfefefefe
  307. JMP_ALIGN r5, r1
  308. 1:
  309. ldm r1, {r4-r5}
  310. add r1, r1, r2
  311. 6: ldm r1, {r6-r7}
  312. add r1, r1, r2
  313. pld [r1]
  314. RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  315. ldm r1, {r4-r5}
  316. add r1, r1, r2
  317. stm r0, {r8-r9}
  318. add r0, r0, r2
  319. pld [r1]
  320. RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  321. subs r3, r3, #1
  322. stm r0, {r8-r9}
  323. add r0, r0, r2
  324. bne 6b
  325. pop {r4-r11,pc}
  326. .align 5
  327. 2:
  328. ldm r1, {r4-r6}
  329. add r1, r1, r2
  330. pld [r1]
  331. ALIGN_DWORD 1, r4, r5, r6
  332. 6: ldm r1, {r7-r9}
  333. add r1, r1, r2
  334. pld [r1]
  335. ALIGN_DWORD 1, r7, r8, r9
  336. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  337. stm r0, {r10-r11}
  338. add r0, r0, r2
  339. ldm r1, {r4-r6}
  340. add r1, r1, r2
  341. pld [r1]
  342. ALIGN_DWORD 1, r4, r5, r6
  343. subs r3, r3, #1
  344. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  345. stm r0, {r10-r11}
  346. add r0, r0, r2
  347. bne 6b
  348. pop {r4-r11,pc}
  349. .align 5
  350. 3:
  351. ldm r1, {r4-r6}
  352. add r1, r1, r2
  353. pld [r1]
  354. ALIGN_DWORD 2, r4, r5, r6
  355. 6: ldm r1, {r7-r9}
  356. add r1, r1, r2
  357. pld [r1]
  358. ALIGN_DWORD 2, r7, r8, r9
  359. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  360. stm r0, {r10-r11}
  361. add r0, r0, r2
  362. ldm r1, {r4-r6}
  363. add r1, r1, r2
  364. pld [r1]
  365. ALIGN_DWORD 2, r4, r5, r6
  366. subs r3, r3, #1
  367. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  368. stm r0, {r10-r11}
  369. add r0, r0, r2
  370. bne 6b
  371. pop {r4-r11,pc}
  372. .align 5
  373. 4:
  374. ldm r1, {r4-r6}
  375. add r1, r1, r2
  376. pld [r1]
  377. ALIGN_DWORD 3, r4, r5, r6
  378. 6: ldm r1, {r7-r9}
  379. add r1, r1, r2
  380. pld [r1]
  381. ALIGN_DWORD 3, r7, r8, r9
  382. RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  383. stm r0, {r10-r11}
  384. add r0, r0, r2
  385. ldm r1, {r4-r6}
  386. add r1, r1, r2
  387. pld [r1]
  388. ALIGN_DWORD 3, r4, r5, r6
  389. subs r3, r3, #1
  390. RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  391. stm r0, {r10-r11}
  392. add r0, r0, r2
  393. bne 6b
  394. pop {r4-r11,pc}
  395. endfunc
  396. function ff_put_no_rnd_pixels8_y2_arm, export=1, align=5
  397. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  398. @ block = word aligned, pixles = unaligned
  399. pld [r1]
  400. push {r4-r11,lr}
  401. mov r3, r3, lsr #1
  402. ldr r12, =0xfefefefe
  403. JMP_ALIGN r5, r1
  404. 1:
  405. ldm r1, {r4-r5}
  406. add r1, r1, r2
  407. 6: ldm r1, {r6-r7}
  408. add r1, r1, r2
  409. pld [r1]
  410. NO_RND_AVG32 r8, r9, r4, r5, r6, r7, r12
  411. ldm r1, {r4-r5}
  412. add r1, r1, r2
  413. stm r0, {r8-r9}
  414. add r0, r0, r2
  415. pld [r1]
  416. NO_RND_AVG32 r8, r9, r6, r7, r4, r5, r12
  417. subs r3, r3, #1
  418. stm r0, {r8-r9}
  419. add r0, r0, r2
  420. bne 6b
  421. pop {r4-r11,pc}
  422. .align 5
  423. 2:
  424. ldm r1, {r4-r6}
  425. add r1, r1, r2
  426. pld [r1]
  427. ALIGN_DWORD 1, r4, r5, r6
  428. 6: ldm r1, {r7-r9}
  429. add r1, r1, r2
  430. pld [r1]
  431. ALIGN_DWORD 1, r7, r8, r9
  432. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  433. stm r0, {r10-r11}
  434. add r0, r0, r2
  435. ldm r1, {r4-r6}
  436. add r1, r1, r2
  437. pld [r1]
  438. ALIGN_DWORD 1, r4, r5, r6
  439. subs r3, r3, #1
  440. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  441. stm r0, {r10-r11}
  442. add r0, r0, r2
  443. bne 6b
  444. pop {r4-r11,pc}
  445. .align 5
  446. 3:
  447. ldm r1, {r4-r6}
  448. add r1, r1, r2
  449. pld [r1]
  450. ALIGN_DWORD 2, r4, r5, r6
  451. 6: ldm r1, {r7-r9}
  452. add r1, r1, r2
  453. pld [r1]
  454. ALIGN_DWORD 2, r7, r8, r9
  455. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  456. stm r0, {r10-r11}
  457. add r0, r0, r2
  458. ldm r1, {r4-r6}
  459. add r1, r1, r2
  460. pld [r1]
  461. ALIGN_DWORD 2, r4, r5, r6
  462. subs r3, r3, #1
  463. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  464. stm r0, {r10-r11}
  465. add r0, r0, r2
  466. bne 6b
  467. pop {r4-r11,pc}
  468. .align 5
  469. 4:
  470. ldm r1, {r4-r6}
  471. add r1, r1, r2
  472. pld [r1]
  473. ALIGN_DWORD 3, r4, r5, r6
  474. 6: ldm r1, {r7-r9}
  475. add r1, r1, r2
  476. pld [r1]
  477. ALIGN_DWORD 3, r7, r8, r9
  478. NO_RND_AVG32 r10, r11, r4, r5, r7, r8, r12
  479. stm r0, {r10-r11}
  480. add r0, r0, r2
  481. ldm r1, {r4-r6}
  482. add r1, r1, r2
  483. pld [r1]
  484. ALIGN_DWORD 3, r4, r5, r6
  485. subs r3, r3, #1
  486. NO_RND_AVG32 r10, r11, r7, r8, r4, r5, r12
  487. stm r0, {r10-r11}
  488. add r0, r0, r2
  489. bne 6b
  490. pop {r4-r11,pc}
  491. endfunc
  492. .ltorg
  493. @ ----------------------------------------------------------------
  494. .macro RND_XY2_IT align, rnd
  495. @ l1= (a & 0x03030303) + (b & 0x03030303) ?(+ 0x02020202)
  496. @ h1= ((a & 0xFCFCFCFCUL) >> 2) + ((b & 0xFCFCFCFCUL) >> 2)
  497. .if \align == 0
  498. ldm r1, {r6-r8}
  499. .elseif \align == 3
  500. ldm r1, {r5-r7}
  501. .else
  502. ldm r1, {r8-r10}
  503. .endif
  504. add r1, r1, r2
  505. pld [r1]
  506. .if \align == 0
  507. ALIGN_DWORD_D 1, r4, r5, r6, r7, r8
  508. .elseif \align == 1
  509. ALIGN_DWORD_D 1, r4, r5, r8, r9, r10
  510. ALIGN_DWORD_D 2, r6, r7, r8, r9, r10
  511. .elseif \align == 2
  512. ALIGN_DWORD_D 2, r4, r5, r8, r9, r10
  513. ALIGN_DWORD_D 3, r6, r7, r8, r9, r10
  514. .elseif \align == 3
  515. ALIGN_DWORD_D 3, r4, r5, r5, r6, r7
  516. .endif
  517. ldr r14, =0x03030303
  518. tst r3, #1
  519. and r8, r4, r14
  520. and r9, r5, r14
  521. and r10, r6, r14
  522. and r11, r7, r14
  523. it eq
  524. andeq r14, r14, r14, \rnd #1
  525. add r8, r8, r10
  526. add r9, r9, r11
  527. ldr r12, =0xfcfcfcfc >> 2
  528. itt eq
  529. addeq r8, r8, r14
  530. addeq r9, r9, r14
  531. and r4, r12, r4, lsr #2
  532. and r5, r12, r5, lsr #2
  533. and r6, r12, r6, lsr #2
  534. and r7, r12, r7, lsr #2
  535. add r10, r4, r6
  536. add r11, r5, r7
  537. subs r3, r3, #1
  538. .endm
  539. .macro RND_XY2_EXPAND align, rnd
  540. RND_XY2_IT \align, \rnd
  541. 6: push {r8-r11}
  542. RND_XY2_IT \align, \rnd
  543. pop {r4-r7}
  544. add r4, r4, r8
  545. add r5, r5, r9
  546. ldr r14, =0x0f0f0f0f
  547. add r6, r6, r10
  548. add r7, r7, r11
  549. and r4, r14, r4, lsr #2
  550. and r5, r14, r5, lsr #2
  551. add r4, r4, r6
  552. add r5, r5, r7
  553. stm r0, {r4-r5}
  554. add r0, r0, r2
  555. bge 6b
  556. pop {r4-r11,pc}
  557. .endm
  558. function ff_put_pixels8_xy2_arm, export=1, align=5
  559. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  560. @ block = word aligned, pixles = unaligned
  561. pld [r1]
  562. push {r4-r11,lr} @ R14 is also called LR
  563. JMP_ALIGN r5, r1
  564. 1: RND_XY2_EXPAND 0, lsl
  565. .align 5
  566. 2: RND_XY2_EXPAND 1, lsl
  567. .align 5
  568. 3: RND_XY2_EXPAND 2, lsl
  569. .align 5
  570. 4: RND_XY2_EXPAND 3, lsl
  571. endfunc
  572. function ff_put_no_rnd_pixels8_xy2_arm, export=1, align=5
  573. @ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
  574. @ block = word aligned, pixles = unaligned
  575. pld [r1]
  576. push {r4-r11,lr}
  577. JMP_ALIGN r5, r1
  578. 1: RND_XY2_EXPAND 0, lsr
  579. .align 5
  580. 2: RND_XY2_EXPAND 1, lsr
  581. .align 5
  582. 3: RND_XY2_EXPAND 2, lsr
  583. .align 5
  584. 4: RND_XY2_EXPAND 3, lsr
  585. endfunc