You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

598 lines
20KB

  1. /*
  2. * ARM NEON optimised DSP functions
  3. * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. function ff_clear_block_neon, export=1
  23. vmov.i16 q0, #0
  24. .rept 8
  25. vst1.16 {q0}, [r0,:128]!
  26. .endr
  27. bx lr
  28. endfunc
  29. function ff_clear_blocks_neon, export=1
  30. vmov.i16 q0, #0
  31. .rept 8*6
  32. vst1.16 {q0}, [r0,:128]!
  33. .endr
  34. bx lr
  35. endfunc
  36. .macro pixels16 rnd=1, avg=0
  37. .if \avg
  38. mov r12, r0
  39. .endif
  40. 1: vld1.8 {q0}, [r1], r2
  41. vld1.8 {q1}, [r1], r2
  42. vld1.8 {q2}, [r1], r2
  43. pld [r1, r2, lsl #2]
  44. vld1.8 {q3}, [r1], r2
  45. pld [r1]
  46. pld [r1, r2]
  47. pld [r1, r2, lsl #1]
  48. .if \avg
  49. vld1.8 {q8}, [r12,:128], r2
  50. vrhadd.u8 q0, q0, q8
  51. vld1.8 {q9}, [r12,:128], r2
  52. vrhadd.u8 q1, q1, q9
  53. vld1.8 {q10}, [r12,:128], r2
  54. vrhadd.u8 q2, q2, q10
  55. vld1.8 {q11}, [r12,:128], r2
  56. vrhadd.u8 q3, q3, q11
  57. .endif
  58. subs r3, r3, #4
  59. vst1.64 {q0}, [r0,:128], r2
  60. vst1.64 {q1}, [r0,:128], r2
  61. vst1.64 {q2}, [r0,:128], r2
  62. vst1.64 {q3}, [r0,:128], r2
  63. bne 1b
  64. bx lr
  65. .endm
  66. .macro pixels16_x2 rnd=1, avg=0
  67. 1: vld1.8 {d0-d2}, [r1], r2
  68. vld1.8 {d4-d6}, [r1], r2
  69. pld [r1]
  70. pld [r1, r2]
  71. subs r3, r3, #2
  72. vext.8 q1, q0, q1, #1
  73. avg q0, q0, q1
  74. vext.8 q3, q2, q3, #1
  75. avg q2, q2, q3
  76. .if \avg
  77. vld1.8 {q1}, [r0,:128], r2
  78. vld1.8 {q3}, [r0,:128]
  79. vrhadd.u8 q0, q0, q1
  80. vrhadd.u8 q2, q2, q3
  81. sub r0, r0, r2
  82. .endif
  83. vst1.8 {q0}, [r0,:128], r2
  84. vst1.8 {q2}, [r0,:128], r2
  85. bne 1b
  86. bx lr
  87. .endm
  88. .macro pixels16_y2 rnd=1, avg=0
  89. sub r3, r3, #2
  90. vld1.8 {q0}, [r1], r2
  91. vld1.8 {q1}, [r1], r2
  92. 1: subs r3, r3, #2
  93. avg q2, q0, q1
  94. vld1.8 {q0}, [r1], r2
  95. avg q3, q0, q1
  96. vld1.8 {q1}, [r1], r2
  97. pld [r1]
  98. pld [r1, r2]
  99. .if \avg
  100. vld1.8 {q8}, [r0,:128], r2
  101. vld1.8 {q9}, [r0,:128]
  102. vrhadd.u8 q2, q2, q8
  103. vrhadd.u8 q3, q3, q9
  104. sub r0, r0, r2
  105. .endif
  106. vst1.8 {q2}, [r0,:128], r2
  107. vst1.8 {q3}, [r0,:128], r2
  108. bne 1b
  109. avg q2, q0, q1
  110. vld1.8 {q0}, [r1], r2
  111. avg q3, q0, q1
  112. .if \avg
  113. vld1.8 {q8}, [r0,:128], r2
  114. vld1.8 {q9}, [r0,:128]
  115. vrhadd.u8 q2, q2, q8
  116. vrhadd.u8 q3, q3, q9
  117. sub r0, r0, r2
  118. .endif
  119. vst1.8 {q2}, [r0,:128], r2
  120. vst1.8 {q3}, [r0,:128], r2
  121. bx lr
  122. .endm
  123. .macro pixels16_xy2 rnd=1, avg=0
  124. sub r3, r3, #2
  125. vld1.8 {d0-d2}, [r1], r2
  126. vld1.8 {d4-d6}, [r1], r2
  127. NRND vmov.i16 q13, #1
  128. pld [r1]
  129. pld [r1, r2]
  130. vext.8 q1, q0, q1, #1
  131. vext.8 q3, q2, q3, #1
  132. vaddl.u8 q8, d0, d2
  133. vaddl.u8 q10, d1, d3
  134. vaddl.u8 q9, d4, d6
  135. vaddl.u8 q11, d5, d7
  136. 1: subs r3, r3, #2
  137. vld1.8 {d0-d2}, [r1], r2
  138. vadd.u16 q12, q8, q9
  139. pld [r1]
  140. NRND vadd.u16 q12, q12, q13
  141. vext.8 q15, q0, q1, #1
  142. vadd.u16 q1 , q10, q11
  143. shrn d28, q12, #2
  144. NRND vadd.u16 q1, q1, q13
  145. shrn d29, q1, #2
  146. .if \avg
  147. vld1.8 {q8}, [r0,:128]
  148. vrhadd.u8 q14, q14, q8
  149. .endif
  150. vaddl.u8 q8, d0, d30
  151. vld1.8 {d2-d4}, [r1], r2
  152. vaddl.u8 q10, d1, d31
  153. vst1.8 {q14}, [r0,:128], r2
  154. vadd.u16 q12, q8, q9
  155. pld [r1, r2]
  156. NRND vadd.u16 q12, q12, q13
  157. vext.8 q2, q1, q2, #1
  158. vadd.u16 q0, q10, q11
  159. shrn d30, q12, #2
  160. NRND vadd.u16 q0, q0, q13
  161. shrn d31, q0, #2
  162. .if \avg
  163. vld1.8 {q9}, [r0,:128]
  164. vrhadd.u8 q15, q15, q9
  165. .endif
  166. vaddl.u8 q9, d2, d4
  167. vaddl.u8 q11, d3, d5
  168. vst1.8 {q15}, [r0,:128], r2
  169. bgt 1b
  170. vld1.8 {d0-d2}, [r1], r2
  171. vadd.u16 q12, q8, q9
  172. NRND vadd.u16 q12, q12, q13
  173. vext.8 q15, q0, q1, #1
  174. vadd.u16 q1 , q10, q11
  175. shrn d28, q12, #2
  176. NRND vadd.u16 q1, q1, q13
  177. shrn d29, q1, #2
  178. .if \avg
  179. vld1.8 {q8}, [r0,:128]
  180. vrhadd.u8 q14, q14, q8
  181. .endif
  182. vaddl.u8 q8, d0, d30
  183. vaddl.u8 q10, d1, d31
  184. vst1.8 {q14}, [r0,:128], r2
  185. vadd.u16 q12, q8, q9
  186. NRND vadd.u16 q12, q12, q13
  187. vadd.u16 q0, q10, q11
  188. shrn d30, q12, #2
  189. NRND vadd.u16 q0, q0, q13
  190. shrn d31, q0, #2
  191. .if \avg
  192. vld1.8 {q9}, [r0,:128]
  193. vrhadd.u8 q15, q15, q9
  194. .endif
  195. vst1.8 {q15}, [r0,:128], r2
  196. bx lr
  197. .endm
  198. .macro pixels8 rnd=1, avg=0
  199. 1: vld1.8 {d0}, [r1], r2
  200. vld1.8 {d1}, [r1], r2
  201. vld1.8 {d2}, [r1], r2
  202. pld [r1, r2, lsl #2]
  203. vld1.8 {d3}, [r1], r2
  204. pld [r1]
  205. pld [r1, r2]
  206. pld [r1, r2, lsl #1]
  207. .if \avg
  208. vld1.8 {d4}, [r0,:64], r2
  209. vrhadd.u8 d0, d0, d4
  210. vld1.8 {d5}, [r0,:64], r2
  211. vrhadd.u8 d1, d1, d5
  212. vld1.8 {d6}, [r0,:64], r2
  213. vrhadd.u8 d2, d2, d6
  214. vld1.8 {d7}, [r0,:64], r2
  215. vrhadd.u8 d3, d3, d7
  216. sub r0, r0, r2, lsl #2
  217. .endif
  218. subs r3, r3, #4
  219. vst1.8 {d0}, [r0,:64], r2
  220. vst1.8 {d1}, [r0,:64], r2
  221. vst1.8 {d2}, [r0,:64], r2
  222. vst1.8 {d3}, [r0,:64], r2
  223. bne 1b
  224. bx lr
  225. .endm
  226. .macro pixels8_x2 rnd=1, avg=0
  227. 1: vld1.8 {q0}, [r1], r2
  228. vext.8 d1, d0, d1, #1
  229. vld1.8 {q1}, [r1], r2
  230. vext.8 d3, d2, d3, #1
  231. pld [r1]
  232. pld [r1, r2]
  233. subs r3, r3, #2
  234. vswp d1, d2
  235. avg q0, q0, q1
  236. .if \avg
  237. vld1.8 {d4}, [r0,:64], r2
  238. vld1.8 {d5}, [r0,:64]
  239. vrhadd.u8 q0, q0, q2
  240. sub r0, r0, r2
  241. .endif
  242. vst1.8 {d0}, [r0,:64], r2
  243. vst1.8 {d1}, [r0,:64], r2
  244. bne 1b
  245. bx lr
  246. .endm
  247. .macro pixels8_y2 rnd=1, avg=0
  248. sub r3, r3, #2
  249. vld1.8 {d0}, [r1], r2
  250. vld1.8 {d1}, [r1], r2
  251. 1: subs r3, r3, #2
  252. avg d4, d0, d1
  253. vld1.8 {d0}, [r1], r2
  254. avg d5, d0, d1
  255. vld1.8 {d1}, [r1], r2
  256. pld [r1]
  257. pld [r1, r2]
  258. .if \avg
  259. vld1.8 {d2}, [r0,:64], r2
  260. vld1.8 {d3}, [r0,:64]
  261. vrhadd.u8 q2, q2, q1
  262. sub r0, r0, r2
  263. .endif
  264. vst1.8 {d4}, [r0,:64], r2
  265. vst1.8 {d5}, [r0,:64], r2
  266. bne 1b
  267. avg d4, d0, d1
  268. vld1.8 {d0}, [r1], r2
  269. avg d5, d0, d1
  270. .if \avg
  271. vld1.8 {d2}, [r0,:64], r2
  272. vld1.8 {d3}, [r0,:64]
  273. vrhadd.u8 q2, q2, q1
  274. sub r0, r0, r2
  275. .endif
  276. vst1.8 {d4}, [r0,:64], r2
  277. vst1.8 {d5}, [r0,:64], r2
  278. bx lr
  279. .endm
  280. .macro pixels8_xy2 rnd=1, avg=0
  281. sub r3, r3, #2
  282. vld1.8 {q0}, [r1], r2
  283. vld1.8 {q1}, [r1], r2
  284. NRND vmov.i16 q11, #1
  285. pld [r1]
  286. pld [r1, r2]
  287. vext.8 d4, d0, d1, #1
  288. vext.8 d6, d2, d3, #1
  289. vaddl.u8 q8, d0, d4
  290. vaddl.u8 q9, d2, d6
  291. 1: subs r3, r3, #2
  292. vld1.8 {q0}, [r1], r2
  293. pld [r1]
  294. vadd.u16 q10, q8, q9
  295. vext.8 d4, d0, d1, #1
  296. NRND vadd.u16 q10, q10, q11
  297. vaddl.u8 q8, d0, d4
  298. shrn d5, q10, #2
  299. vld1.8 {q1}, [r1], r2
  300. vadd.u16 q10, q8, q9
  301. pld [r1, r2]
  302. .if \avg
  303. vld1.8 {d7}, [r0,:64]
  304. vrhadd.u8 d5, d5, d7
  305. .endif
  306. NRND vadd.u16 q10, q10, q11
  307. vst1.8 {d5}, [r0,:64], r2
  308. shrn d7, q10, #2
  309. .if \avg
  310. vld1.8 {d5}, [r0,:64]
  311. vrhadd.u8 d7, d7, d5
  312. .endif
  313. vext.8 d6, d2, d3, #1
  314. vaddl.u8 q9, d2, d6
  315. vst1.8 {d7}, [r0,:64], r2
  316. bgt 1b
  317. vld1.8 {q0}, [r1], r2
  318. vadd.u16 q10, q8, q9
  319. vext.8 d4, d0, d1, #1
  320. NRND vadd.u16 q10, q10, q11
  321. vaddl.u8 q8, d0, d4
  322. shrn d5, q10, #2
  323. vadd.u16 q10, q8, q9
  324. .if \avg
  325. vld1.8 {d7}, [r0,:64]
  326. vrhadd.u8 d5, d5, d7
  327. .endif
  328. NRND vadd.u16 q10, q10, q11
  329. vst1.8 {d5}, [r0,:64], r2
  330. shrn d7, q10, #2
  331. .if \avg
  332. vld1.8 {d5}, [r0,:64]
  333. vrhadd.u8 d7, d7, d5
  334. .endif
  335. vst1.8 {d7}, [r0,:64], r2
  336. bx lr
  337. .endm
  338. .macro pixfunc pfx, name, suf, rnd=1, avg=0
  339. .if \rnd
  340. .macro avg rd, rn, rm
  341. vrhadd.u8 \rd, \rn, \rm
  342. .endm
  343. .macro shrn rd, rn, rm
  344. vrshrn.u16 \rd, \rn, \rm
  345. .endm
  346. .macro NRND insn:vararg
  347. .endm
  348. .else
  349. .macro avg rd, rn, rm
  350. vhadd.u8 \rd, \rn, \rm
  351. .endm
  352. .macro shrn rd, rn, rm
  353. vshrn.u16 \rd, \rn, \rm
  354. .endm
  355. .macro NRND insn:vararg
  356. \insn
  357. .endm
  358. .endif
  359. function ff_\pfx\name\suf\()_neon, export=1
  360. \name \rnd, \avg
  361. endfunc
  362. .purgem avg
  363. .purgem shrn
  364. .purgem NRND
  365. .endm
  366. .macro pixfunc2 pfx, name, avg=0
  367. pixfunc \pfx, \name, rnd=1, avg=\avg
  368. pixfunc \pfx, \name, _no_rnd, rnd=0, avg=\avg
  369. .endm
  370. function ff_put_h264_qpel16_mc00_neon, export=1
  371. mov r3, #16
  372. endfunc
  373. pixfunc put_, pixels16, avg=0
  374. pixfunc2 put_, pixels16_x2, avg=0
  375. pixfunc2 put_, pixels16_y2, avg=0
  376. pixfunc2 put_, pixels16_xy2, avg=0
  377. function ff_avg_h264_qpel16_mc00_neon, export=1
  378. mov r3, #16
  379. endfunc
  380. pixfunc avg_, pixels16, avg=1
  381. pixfunc2 avg_, pixels16_x2, avg=1
  382. pixfunc2 avg_, pixels16_y2, avg=1
  383. pixfunc2 avg_, pixels16_xy2, avg=1
  384. function ff_put_h264_qpel8_mc00_neon, export=1
  385. mov r3, #8
  386. endfunc
  387. pixfunc put_, pixels8, avg=0
  388. pixfunc2 put_, pixels8_x2, avg=0
  389. pixfunc2 put_, pixels8_y2, avg=0
  390. pixfunc2 put_, pixels8_xy2, avg=0
  391. function ff_avg_h264_qpel8_mc00_neon, export=1
  392. mov r3, #8
  393. endfunc
  394. pixfunc avg_, pixels8, avg=1
  395. pixfunc avg_, pixels8_x2, avg=1
  396. pixfunc avg_, pixels8_y2, avg=1
  397. pixfunc avg_, pixels8_xy2, avg=1
  398. function ff_put_pixels_clamped_neon, export=1
  399. vld1.16 {d16-d19}, [r0,:128]!
  400. vqmovun.s16 d0, q8
  401. vld1.16 {d20-d23}, [r0,:128]!
  402. vqmovun.s16 d1, q9
  403. vld1.16 {d24-d27}, [r0,:128]!
  404. vqmovun.s16 d2, q10
  405. vld1.16 {d28-d31}, [r0,:128]!
  406. vqmovun.s16 d3, q11
  407. vst1.8 {d0}, [r1,:64], r2
  408. vqmovun.s16 d4, q12
  409. vst1.8 {d1}, [r1,:64], r2
  410. vqmovun.s16 d5, q13
  411. vst1.8 {d2}, [r1,:64], r2
  412. vqmovun.s16 d6, q14
  413. vst1.8 {d3}, [r1,:64], r2
  414. vqmovun.s16 d7, q15
  415. vst1.8 {d4}, [r1,:64], r2
  416. vst1.8 {d5}, [r1,:64], r2
  417. vst1.8 {d6}, [r1,:64], r2
  418. vst1.8 {d7}, [r1,:64], r2
  419. bx lr
  420. endfunc
  421. function ff_put_signed_pixels_clamped_neon, export=1
  422. vmov.u8 d31, #128
  423. vld1.16 {d16-d17}, [r0,:128]!
  424. vqmovn.s16 d0, q8
  425. vld1.16 {d18-d19}, [r0,:128]!
  426. vqmovn.s16 d1, q9
  427. vld1.16 {d16-d17}, [r0,:128]!
  428. vqmovn.s16 d2, q8
  429. vld1.16 {d18-d19}, [r0,:128]!
  430. vadd.u8 d0, d0, d31
  431. vld1.16 {d20-d21}, [r0,:128]!
  432. vadd.u8 d1, d1, d31
  433. vld1.16 {d22-d23}, [r0,:128]!
  434. vadd.u8 d2, d2, d31
  435. vst1.8 {d0}, [r1,:64], r2
  436. vqmovn.s16 d3, q9
  437. vst1.8 {d1}, [r1,:64], r2
  438. vqmovn.s16 d4, q10
  439. vst1.8 {d2}, [r1,:64], r2
  440. vqmovn.s16 d5, q11
  441. vld1.16 {d24-d25}, [r0,:128]!
  442. vadd.u8 d3, d3, d31
  443. vld1.16 {d26-d27}, [r0,:128]!
  444. vadd.u8 d4, d4, d31
  445. vadd.u8 d5, d5, d31
  446. vst1.8 {d3}, [r1,:64], r2
  447. vqmovn.s16 d6, q12
  448. vst1.8 {d4}, [r1,:64], r2
  449. vqmovn.s16 d7, q13
  450. vst1.8 {d5}, [r1,:64], r2
  451. vadd.u8 d6, d6, d31
  452. vadd.u8 d7, d7, d31
  453. vst1.8 {d6}, [r1,:64], r2
  454. vst1.8 {d7}, [r1,:64], r2
  455. bx lr
  456. endfunc
  457. function ff_add_pixels_clamped_neon, export=1
  458. mov r3, r1
  459. vld1.8 {d16}, [r1,:64], r2
  460. vld1.16 {d0-d1}, [r0,:128]!
  461. vaddw.u8 q0, q0, d16
  462. vld1.8 {d17}, [r1,:64], r2
  463. vld1.16 {d2-d3}, [r0,:128]!
  464. vqmovun.s16 d0, q0
  465. vld1.8 {d18}, [r1,:64], r2
  466. vaddw.u8 q1, q1, d17
  467. vld1.16 {d4-d5}, [r0,:128]!
  468. vaddw.u8 q2, q2, d18
  469. vst1.8 {d0}, [r3,:64], r2
  470. vqmovun.s16 d2, q1
  471. vld1.8 {d19}, [r1,:64], r2
  472. vld1.16 {d6-d7}, [r0,:128]!
  473. vaddw.u8 q3, q3, d19
  474. vqmovun.s16 d4, q2
  475. vst1.8 {d2}, [r3,:64], r2
  476. vld1.8 {d16}, [r1,:64], r2
  477. vqmovun.s16 d6, q3
  478. vld1.16 {d0-d1}, [r0,:128]!
  479. vaddw.u8 q0, q0, d16
  480. vst1.8 {d4}, [r3,:64], r2
  481. vld1.8 {d17}, [r1,:64], r2
  482. vld1.16 {d2-d3}, [r0,:128]!
  483. vaddw.u8 q1, q1, d17
  484. vst1.8 {d6}, [r3,:64], r2
  485. vqmovun.s16 d0, q0
  486. vld1.8 {d18}, [r1,:64], r2
  487. vld1.16 {d4-d5}, [r0,:128]!
  488. vaddw.u8 q2, q2, d18
  489. vst1.8 {d0}, [r3,:64], r2
  490. vqmovun.s16 d2, q1
  491. vld1.8 {d19}, [r1,:64], r2
  492. vqmovun.s16 d4, q2
  493. vld1.16 {d6-d7}, [r0,:128]!
  494. vaddw.u8 q3, q3, d19
  495. vst1.8 {d2}, [r3,:64], r2
  496. vqmovun.s16 d6, q3
  497. vst1.8 {d4}, [r3,:64], r2
  498. vst1.8 {d6}, [r3,:64], r2
  499. bx lr
  500. endfunc
  501. function ff_vector_clipf_neon, export=1
  502. VFP vdup.32 q1, d0[1]
  503. VFP vdup.32 q0, d0[0]
  504. NOVFP vdup.32 q0, r2
  505. NOVFP vdup.32 q1, r3
  506. NOVFP ldr r2, [sp]
  507. vld1.f32 {q2},[r1,:128]!
  508. vmin.f32 q10, q2, q1
  509. vld1.f32 {q3},[r1,:128]!
  510. vmin.f32 q11, q3, q1
  511. 1: vmax.f32 q8, q10, q0
  512. vmax.f32 q9, q11, q0
  513. subs r2, r2, #8
  514. beq 2f
  515. vld1.f32 {q2},[r1,:128]!
  516. vmin.f32 q10, q2, q1
  517. vld1.f32 {q3},[r1,:128]!
  518. vmin.f32 q11, q3, q1
  519. vst1.f32 {q8},[r0,:128]!
  520. vst1.f32 {q9},[r0,:128]!
  521. b 1b
  522. 2: vst1.f32 {q8},[r0,:128]!
  523. vst1.f32 {q9},[r0,:128]!
  524. bx lr
  525. endfunc
  526. function ff_apply_window_int16_neon, export=1
  527. push {r4,lr}
  528. add r4, r1, r3, lsl #1
  529. add lr, r0, r3, lsl #1
  530. sub r4, r4, #16
  531. sub lr, lr, #16
  532. mov r12, #-16
  533. 1:
  534. vld1.16 {q0}, [r1,:128]!
  535. vld1.16 {q2}, [r2,:128]!
  536. vld1.16 {q1}, [r4,:128], r12
  537. vrev64.16 q3, q2
  538. vqrdmulh.s16 q0, q0, q2
  539. vqrdmulh.s16 d2, d2, d7
  540. vqrdmulh.s16 d3, d3, d6
  541. vst1.16 {q0}, [r0,:128]!
  542. vst1.16 {q1}, [lr,:128], r12
  543. subs r3, r3, #16
  544. bgt 1b
  545. pop {r4,pc}
  546. endfunc
  547. function ff_vector_clip_int32_neon, export=1
  548. vdup.32 q0, r2
  549. vdup.32 q1, r3
  550. ldr r2, [sp]
  551. 1:
  552. vld1.32 {q2-q3}, [r1,:128]!
  553. vmin.s32 q2, q2, q1
  554. vmin.s32 q3, q3, q1
  555. vmax.s32 q2, q2, q0
  556. vmax.s32 q3, q3, q0
  557. vst1.32 {q2-q3}, [r0,:128]!
  558. subs r2, r2, #8
  559. bgt 1b
  560. bx lr
  561. endfunc