You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

534 lines
15KB

  1. /*
  2. * Copyright (c) 2014 RISC OS Open Ltd
  3. * Author: Ben Avison <bavison@riscosopen.org>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. .macro loadregoffsh2 group, index, base, offgroup, offindex
  23. .altmacro
  24. loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
  25. .noaltmacro
  26. .endm
  27. .macro loadregoffsh2_ group, index, base, offgroup, offindex
  28. ldr \group\index, [\base, \offgroup\offindex, lsl #2]
  29. .endm
  30. .macro eorlslreg check, data, group, index
  31. .altmacro
  32. eorlslreg_ \check, \data, \group, %(\index)
  33. .noaltmacro
  34. .endm
  35. .macro eorlslreg_ check, data, group, index
  36. eor \check, \check, \data, lsl \group\index
  37. .endm
  38. .macro decr_modulo var, by, modulus
  39. .set \var, \var - \by
  40. .if \var == 0
  41. .set \var, \modulus
  42. .endif
  43. .endm
  44. .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0
  45. .if \size == 2
  46. ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4
  47. .else // size == 4
  48. .if IDX1 > 4 || \channels==8
  49. ldm IN!, {\r0, \r1, \r2, \r3}
  50. .else
  51. ldm IN, {\r0, \r1, \r2, \r3}
  52. .if !\pointer_dead
  53. add IN, IN, #(4 + 8 - \channels) * 4
  54. .endif
  55. .endif
  56. .endif
  57. decr_modulo IDX1, \size, \channels
  58. .endm
  59. .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0
  60. .if \size == 2
  61. .if IDX1 > 2
  62. ldm IN!, {\r2, \r3}
  63. .else
  64. //A .ifc \r2, ip
  65. //A .if \pointer_dead
  66. //A ldm IN, {\r2, \r3}
  67. //A .else
  68. //A ldr \r2, [IN], #4
  69. //A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4
  70. //A .endif
  71. //A .else
  72. ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4
  73. //A .endif
  74. .endif
  75. .endif
  76. decr_modulo IDX1, \size, \channels
  77. .endm
  78. .macro implement_pack inorder, channels, shift
  79. .if \inorder
  80. .ifc \shift, mixed
  81. CHECK .req a1
  82. COUNT .req a2
  83. IN .req a3
  84. OUT .req a4
  85. DAT0 .req v1
  86. DAT1 .req v2
  87. DAT2 .req v3
  88. DAT3 .req v4
  89. SHIFT0 .req v5
  90. SHIFT1 .req v6
  91. SHIFT2 .req sl
  92. SHIFT3 .req fp
  93. SHIFT4 .req ip
  94. SHIFT5 .req lr
  95. .macro output4words
  96. .set SIZE_GROUP1, IDX1
  97. .if SIZE_GROUP1 > 4
  98. .set SIZE_GROUP1, 4
  99. .endif
  100. .set SIZE_GROUP2, 4 - SIZE_GROUP1
  101. load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
  102. load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
  103. .if \channels == 2
  104. lsl DAT0, SHIFT0
  105. lsl DAT1, SHIFT1
  106. lsl DAT2, SHIFT0
  107. lsl DAT3, SHIFT1
  108. .elseif \channels == 6
  109. .if IDX2 == 6
  110. lsl DAT0, SHIFT0
  111. lsl DAT1, SHIFT1
  112. lsl DAT2, SHIFT2
  113. lsl DAT3, SHIFT3
  114. .elseif IDX2 == 2
  115. lsl DAT0, SHIFT4
  116. lsl DAT1, SHIFT5
  117. lsl DAT2, SHIFT0
  118. lsl DAT3, SHIFT1
  119. .else // IDX2 == 4
  120. lsl DAT0, SHIFT2
  121. lsl DAT1, SHIFT3
  122. lsl DAT2, SHIFT4
  123. lsl DAT3, SHIFT5
  124. .endif
  125. .elseif \channels == 8
  126. .if IDX2 == 8
  127. uxtb SHIFT0, SHIFT4, ror #0
  128. uxtb SHIFT1, SHIFT4, ror #8
  129. uxtb SHIFT2, SHIFT4, ror #16
  130. uxtb SHIFT3, SHIFT4, ror #24
  131. .else
  132. uxtb SHIFT0, SHIFT5, ror #0
  133. uxtb SHIFT1, SHIFT5, ror #8
  134. uxtb SHIFT2, SHIFT5, ror #16
  135. uxtb SHIFT3, SHIFT5, ror #24
  136. .endif
  137. lsl DAT0, SHIFT0
  138. lsl DAT1, SHIFT1
  139. lsl DAT2, SHIFT2
  140. lsl DAT3, SHIFT3
  141. .endif
  142. eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
  143. eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
  144. decr_modulo IDX2, 2, \channels
  145. eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
  146. eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
  147. decr_modulo IDX2, 2, \channels
  148. stm OUT!, {DAT0 - DAT3}
  149. .endm
  150. .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
  151. .if (WORDS_PER_LOOP % 2) == 0
  152. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  153. .endif
  154. .if (WORDS_PER_LOOP % 2) == 0
  155. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  156. .endif
  157. .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
  158. .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
  159. function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
  160. .if SAMPLES_PER_LOOP > 1
  161. tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
  162. it ne
  163. bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
  164. .endif
  165. teq COUNT, #0
  166. it eq
  167. bxeq lr
  168. push {v1-v6,sl,fp,lr}
  169. ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack
  170. ldr SHIFT1, =0x08080808
  171. ldr SHIFT4, [SHIFT0]
  172. .if \channels == 2
  173. uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
  174. uxtb SHIFT0, SHIFT4, ror #0
  175. uxtb SHIFT1, SHIFT4, ror #8
  176. .else
  177. ldr SHIFT5, [SHIFT0, #4]
  178. uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
  179. uadd8 SHIFT5, SHIFT5, SHIFT1
  180. .if \channels == 6
  181. uxtb SHIFT0, SHIFT4, ror #0
  182. uxtb SHIFT1, SHIFT4, ror #8
  183. uxtb SHIFT2, SHIFT4, ror #16
  184. uxtb SHIFT3, SHIFT4, ror #24
  185. uxtb SHIFT4, SHIFT5, ror #0
  186. uxtb SHIFT5, SHIFT5, ror #8
  187. .endif
  188. .endif
  189. .set IDX1, \channels
  190. .set IDX2, \channels
  191. 0:
  192. .rept WORDS_PER_LOOP / 4
  193. output4words
  194. .endr
  195. subs COUNT, COUNT, #SAMPLES_PER_LOOP
  196. bne 0b
  197. pop {v1-v6,sl,fp,pc}
  198. .ltorg
  199. endfunc
  200. .purgem output4words
  201. .unreq CHECK
  202. .unreq COUNT
  203. .unreq IN
  204. .unreq OUT
  205. .unreq DAT0
  206. .unreq DAT1
  207. .unreq DAT2
  208. .unreq DAT3
  209. .unreq SHIFT0
  210. .unreq SHIFT1
  211. .unreq SHIFT2
  212. .unreq SHIFT3
  213. .unreq SHIFT4
  214. .unreq SHIFT5
  215. .else // not mixed
  216. CHECK .req a1
  217. COUNT .req a2
  218. IN .req a3
  219. OUT .req a4
  220. DAT0 .req v1
  221. DAT1 .req v2
  222. DAT2 .req v3
  223. DAT3 .req v4
  224. DAT4 .req v5
  225. DAT5 .req v6
  226. DAT6 .req sl // use these rather than the otherwise unused
  227. DAT7 .req fp // ip and lr so that we can load them using LDRD
  228. .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
  229. .if \head
  230. .set SIZE_GROUP1, IDX1
  231. .if SIZE_GROUP1 > 4
  232. .set SIZE_GROUP1, 4
  233. .endif
  234. .set SIZE_GROUP2, 4 - SIZE_GROUP1
  235. load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
  236. .endif
  237. .if \tail
  238. eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
  239. eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
  240. decr_modulo IDX2, 2, \channels
  241. .endif
  242. .if \head
  243. load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
  244. .endif
  245. .if \tail
  246. eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
  247. eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
  248. decr_modulo IDX2, 2, \channels
  249. stm OUT!, {\r4, \r5, \r6, \r7}
  250. .endif
  251. .if \head
  252. lsl \r0, #8 + \shift
  253. lsl \r1, #8 + \shift
  254. lsl \r2, #8 + \shift
  255. lsl \r3, #8 + \shift
  256. .endif
  257. .endm
  258. .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8)
  259. .if (WORDS_PER_LOOP % 2) == 0
  260. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  261. .endif
  262. .if (WORDS_PER_LOOP % 2) == 0
  263. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  264. .endif
  265. .if (WORDS_PER_LOOP % 2) == 0
  266. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  267. .endif
  268. .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
  269. .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
  270. function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
  271. .if SAMPLES_PER_LOOP > 1
  272. tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
  273. it ne
  274. bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
  275. .endif
  276. subs COUNT, COUNT, #SAMPLES_PER_LOOP
  277. it lo
  278. bxlo lr
  279. push {v1-v6,sl,fp,lr}
  280. .set IDX1, \channels
  281. .set IDX2, \channels
  282. output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
  283. 0: beq 1f
  284. .rept WORDS_PER_LOOP / 8
  285. output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
  286. output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
  287. .endr
  288. subs COUNT, COUNT, #SAMPLES_PER_LOOP
  289. bne 0b
  290. 1:
  291. .rept WORDS_PER_LOOP / 8 - 1
  292. output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
  293. output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
  294. .endr
  295. output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
  296. output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
  297. pop {v1-v6,sl,fp,pc}
  298. endfunc
  299. .purgem output4words
  300. .unreq CHECK
  301. .unreq COUNT
  302. .unreq IN
  303. .unreq OUT
  304. .unreq DAT0
  305. .unreq DAT1
  306. .unreq DAT2
  307. .unreq DAT3
  308. .unreq DAT4
  309. .unreq DAT5
  310. .unreq DAT6
  311. .unreq DAT7
  312. .endif // mixed
  313. .else // not inorder
  314. .ifc \shift, mixed
  315. // This case not currently handled
  316. .else // not mixed
  317. #if !CONFIG_THUMB
  318. CHECK .req a1
  319. COUNT .req a2
  320. IN .req a3
  321. OUT .req a4
  322. DAT0 .req v1
  323. DAT1 .req v2
  324. DAT2 .req v3
  325. DAT3 .req v4
  326. CHAN0 .req v5
  327. CHAN1 .req v6
  328. CHAN2 .req sl
  329. CHAN3 .req fp
  330. CHAN4 .req ip
  331. CHAN5 .req lr
  332. .macro output4words
  333. .if \channels == 8
  334. .if IDX1 == 8
  335. uxtb CHAN0, CHAN4, ror #0
  336. uxtb CHAN1, CHAN4, ror #8
  337. uxtb CHAN2, CHAN4, ror #16
  338. uxtb CHAN3, CHAN4, ror #24
  339. .else
  340. uxtb CHAN0, CHAN5, ror #0
  341. uxtb CHAN1, CHAN5, ror #8
  342. uxtb CHAN2, CHAN5, ror #16
  343. uxtb CHAN3, CHAN5, ror #24
  344. .endif
  345. ldr DAT0, [IN, CHAN0, lsl #2]
  346. ldr DAT1, [IN, CHAN1, lsl #2]
  347. ldr DAT2, [IN, CHAN2, lsl #2]
  348. ldr DAT3, [IN, CHAN3, lsl #2]
  349. .if IDX1 == 4
  350. add IN, IN, #8*4
  351. .endif
  352. decr_modulo IDX1, 4, \channels
  353. .else
  354. .set SIZE_GROUP1, IDX1
  355. .if SIZE_GROUP1 > 4
  356. .set SIZE_GROUP1, 4
  357. .endif
  358. .set SIZE_GROUP2, 4 - SIZE_GROUP1
  359. .if SIZE_GROUP1 == 2
  360. loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
  361. loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
  362. add IN, IN, #8*4
  363. .else // SIZE_GROUP1 == 4
  364. loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
  365. loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
  366. loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
  367. loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
  368. .if IDX1 == 4
  369. add IN, IN, #8*4
  370. .endif
  371. .endif
  372. decr_modulo IDX1, SIZE_GROUP1, \channels
  373. .if SIZE_GROUP2 == 2
  374. loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
  375. loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
  376. .if IDX1 == 2
  377. add IN, IN, #8*4
  378. .endif
  379. .endif
  380. decr_modulo IDX1, SIZE_GROUP2, \channels
  381. .endif
  382. .if \channels == 8 // in this case we can corrupt CHAN0-3
  383. rsb CHAN0, CHAN0, #8
  384. rsb CHAN1, CHAN1, #8
  385. rsb CHAN2, CHAN2, #8
  386. rsb CHAN3, CHAN3, #8
  387. lsl DAT0, #8 + \shift
  388. lsl DAT1, #8 + \shift
  389. lsl DAT2, #8 + \shift
  390. lsl DAT3, #8 + \shift
  391. eor CHECK, CHECK, DAT0, lsr CHAN0
  392. eor CHECK, CHECK, DAT1, lsr CHAN1
  393. eor CHECK, CHECK, DAT2, lsr CHAN2
  394. eor CHECK, CHECK, DAT3, lsr CHAN3
  395. .else
  396. .if \shift != 0
  397. lsl DAT0, #\shift
  398. lsl DAT1, #\shift
  399. lsl DAT2, #\shift
  400. lsl DAT3, #\shift
  401. .endif
  402. bic DAT0, DAT0, #0xff000000
  403. bic DAT1, DAT1, #0xff000000
  404. bic DAT2, DAT2, #0xff000000
  405. bic DAT3, DAT3, #0xff000000
  406. eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
  407. eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
  408. decr_modulo IDX2, 2, \channels
  409. eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
  410. eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
  411. decr_modulo IDX2, 2, \channels
  412. lsl DAT0, #8
  413. lsl DAT1, #8
  414. lsl DAT2, #8
  415. lsl DAT3, #8
  416. .endif
  417. stm OUT!, {DAT0 - DAT3}
  418. .endm
  419. .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
  420. .if (WORDS_PER_LOOP % 2) == 0
  421. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  422. .endif
  423. .if (WORDS_PER_LOOP % 2) == 0
  424. .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
  425. .endif
  426. .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
  427. .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
  428. function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
  429. .if SAMPLES_PER_LOOP > 1
  430. tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
  431. it ne
  432. bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
  433. .endif
  434. teq COUNT, #0
  435. it eq
  436. bxeq lr
  437. push {v1-v6,sl,fp,lr}
  438. ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack
  439. ldr CHAN4, [CHAN0]
  440. .if \channels == 2
  441. uxtb CHAN0, CHAN4, ror #0
  442. uxtb CHAN1, CHAN4, ror #8
  443. .else
  444. ldr CHAN5, [CHAN0, #4]
  445. .if \channels == 6
  446. uxtb CHAN0, CHAN4, ror #0
  447. uxtb CHAN1, CHAN4, ror #8
  448. uxtb CHAN2, CHAN4, ror #16
  449. uxtb CHAN3, CHAN4, ror #24
  450. uxtb CHAN4, CHAN5, ror #0
  451. uxtb CHAN5, CHAN5, ror #8
  452. .endif
  453. .endif
  454. .set IDX1, \channels
  455. .set IDX2, \channels
  456. 0:
  457. .rept WORDS_PER_LOOP / 4
  458. output4words
  459. .endr
  460. subs COUNT, COUNT, #SAMPLES_PER_LOOP
  461. bne 0b
  462. pop {v1-v6,sl,fp,pc}
  463. .ltorg
  464. endfunc
  465. .purgem output4words
  466. .unreq CHECK
  467. .unreq COUNT
  468. .unreq IN
  469. .unreq OUT
  470. .unreq DAT0
  471. .unreq DAT1
  472. .unreq DAT2
  473. .unreq DAT3
  474. .unreq CHAN0
  475. .unreq CHAN1
  476. .unreq CHAN2
  477. .unreq CHAN3
  478. .unreq CHAN4
  479. .unreq CHAN5
  480. #endif // !CONFIG_THUMB
  481. .endif // mixed
  482. .endif // inorder
  483. .endm // implement_pack
  484. .macro pack_channels inorder, channels
  485. implement_pack \inorder, \channels, 0
  486. implement_pack \inorder, \channels, 1
  487. implement_pack \inorder, \channels, 2
  488. implement_pack \inorder, \channels, 3
  489. implement_pack \inorder, \channels, 4
  490. implement_pack \inorder, \channels, 5
  491. implement_pack \inorder, \channels, mixed
  492. .endm
  493. .macro pack_order inorder
  494. pack_channels \inorder, 2
  495. pack_channels \inorder, 6
  496. pack_channels \inorder, 8
  497. .endm
  498. pack_order 0
  499. pack_order 1