You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

663 lines
19KB

  1. /*
  2. * Copyright (c) 2014 RISC OS Open Ltd
  3. * Author: Ben Avison <bavison@riscosopen.org>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. #define MAX_CHANNELS 8
  23. #define MAX_FIR_ORDER 8
  24. #define MAX_IIR_ORDER 4
  25. #define MAX_RATEFACTOR 4
  26. #define MAX_BLOCKSIZE (40 * MAX_RATEFACTOR)
  27. PST .req a1
  28. PCO .req a2
  29. AC0 .req a3
  30. AC1 .req a4
  31. CO0 .req v1
  32. CO1 .req v2
  33. CO2 .req v3
  34. CO3 .req v4
  35. ST0 .req v5
  36. ST1 .req v6
  37. ST2 .req sl
  38. ST3 .req fp
  39. I .req ip
  40. PSAMP .req lr
  41. .macro branch_pic_label first, remainder:vararg
  42. A .word \first - 4
  43. T .hword (\first) / 2
  44. .ifnb \remainder
  45. branch_pic_label \remainder
  46. .endif
  47. .endm
  48. // Some macros that do loads/multiplies where the register number is determined
  49. // from an assembly-time expression. Boy is GNU assembler's syntax ugly...
  50. .macro load group, index, base, offset
  51. .altmacro
  52. load_ \group, %(\index), \base, \offset
  53. .noaltmacro
  54. .endm
  55. .macro load_ group, index, base, offset
  56. ldr \group\index, [\base, #\offset]
  57. .endm
  58. .macro loadd group, index, base, offset
  59. .altmacro
  60. loadd_ \group, %(\index), %(\index+1), \base, \offset
  61. .noaltmacro
  62. .endm
  63. .macro loadd_ group, index0, index1, base, offset
  64. A .if \offset >= 256
  65. A ldr \group\index0, [\base, #\offset]
  66. A ldr \group\index1, [\base, #(\offset) + 4]
  67. A .else
  68. ldrd \group\index0, \group\index1, [\base, #\offset]
  69. A .endif
  70. .endm
  71. .macro multiply index, accumulate, long
  72. .altmacro
  73. multiply_ %(\index), \accumulate, \long
  74. .noaltmacro
  75. .endm
  76. .macro multiply_ index, accumulate, long
  77. .if \long
  78. .if \accumulate
  79. smlal AC0, AC1, CO\index, ST\index
  80. .else
  81. smull AC0, AC1, CO\index, ST\index
  82. .endif
  83. .else
  84. .if \accumulate
  85. mla AC0, CO\index, ST\index, AC0
  86. .else
  87. mul AC0, CO\index, ST\index
  88. .endif
  89. .endif
  90. .endm
  91. // A macro to update the load register number and load offsets
  92. .macro inc howmany
  93. .set LOAD_REG, (LOAD_REG + \howmany) & 3
  94. .set OFFSET_CO, OFFSET_CO + 4 * \howmany
  95. .set OFFSET_ST, OFFSET_ST + 4 * \howmany
  96. .if FIR_REMAIN > 0
  97. .set FIR_REMAIN, FIR_REMAIN - \howmany
  98. .if FIR_REMAIN == 0
  99. .set OFFSET_CO, 4 * MAX_FIR_ORDER
  100. .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
  101. .endif
  102. .elseif IIR_REMAIN > 0
  103. .set IIR_REMAIN, IIR_REMAIN - \howmany
  104. .endif
  105. .endm
  106. // Macro to implement the inner loop for one specific combination of parameters
  107. .macro implement_filter mask_minus1, shift_0, shift_8, iir_taps, fir_taps
  108. .set TOTAL_TAPS, \iir_taps + \fir_taps
  109. // Deal with register allocation...
  110. .set DEFINED_SHIFT, 0
  111. .set DEFINED_MASK, 0
  112. .set SHUFFLE_SHIFT, 0
  113. .set SHUFFLE_MASK, 0
  114. .set SPILL_SHIFT, 0
  115. .set SPILL_MASK, 0
  116. .if TOTAL_TAPS == 0
  117. // Little register pressure in this case - just keep MASK where it was
  118. .if !\mask_minus1
  119. MASK .req ST1
  120. .set DEFINED_MASK, 1
  121. .endif
  122. .else
  123. .if \shift_0
  124. .if !\mask_minus1
  125. // AC1 is unused with shift 0
  126. MASK .req AC1
  127. .set DEFINED_MASK, 1
  128. .set SHUFFLE_MASK, 1
  129. .endif
  130. .elseif \shift_8
  131. .if !\mask_minus1
  132. .if TOTAL_TAPS <= 4
  133. // All coefficients are preloaded (so pointer not needed)
  134. MASK .req PCO
  135. .set DEFINED_MASK, 1
  136. .set SHUFFLE_MASK, 1
  137. .else
  138. .set SPILL_MASK, 1
  139. .endif
  140. .endif
  141. .else // shift not 0 or 8
  142. .if TOTAL_TAPS <= 3
  143. // All coefficients are preloaded, and at least one CO register is unused
  144. .if \fir_taps & 1
  145. SHIFT .req CO0
  146. .set DEFINED_SHIFT, 1
  147. .set SHUFFLE_SHIFT, 1
  148. .else
  149. SHIFT .req CO3
  150. .set DEFINED_SHIFT, 1
  151. .set SHUFFLE_SHIFT, 1
  152. .endif
  153. .if !\mask_minus1
  154. MASK .req PCO
  155. .set DEFINED_MASK, 1
  156. .set SHUFFLE_MASK, 1
  157. .endif
  158. .elseif TOTAL_TAPS == 4
  159. // All coefficients are preloaded
  160. SHIFT .req PCO
  161. .set DEFINED_SHIFT, 1
  162. .set SHUFFLE_SHIFT, 1
  163. .if !\mask_minus1
  164. .set SPILL_MASK, 1
  165. .endif
  166. .else
  167. .set SPILL_SHIFT, 1
  168. .if !\mask_minus1
  169. .set SPILL_MASK, 1
  170. .endif
  171. .endif
  172. .endif
  173. .endif
  174. .if SPILL_SHIFT
  175. SHIFT .req ST0
  176. .set DEFINED_SHIFT, 1
  177. .endif
  178. .if SPILL_MASK
  179. MASK .req ST1
  180. .set DEFINED_MASK, 1
  181. .endif
  182. // Preload coefficients if possible
  183. .if TOTAL_TAPS <= 4
  184. .set OFFSET_CO, 0
  185. .if \fir_taps & 1
  186. .set LOAD_REG, 1
  187. .else
  188. .set LOAD_REG, 0
  189. .endif
  190. .rept \fir_taps
  191. load CO, LOAD_REG, PCO, OFFSET_CO
  192. .set LOAD_REG, (LOAD_REG + 1) & 3
  193. .set OFFSET_CO, OFFSET_CO + 4
  194. .endr
  195. .set OFFSET_CO, 4 * MAX_FIR_ORDER
  196. .rept \iir_taps
  197. load CO, LOAD_REG, PCO, OFFSET_CO
  198. .set LOAD_REG, (LOAD_REG + 1) & 3
  199. .set OFFSET_CO, OFFSET_CO + 4
  200. .endr
  201. .endif
  202. // Move mask/shift to final positions if necessary
  203. // Need to do this after preloading, because in some cases we
  204. // reuse the coefficient pointer register
  205. .if SHUFFLE_SHIFT
  206. mov SHIFT, ST0
  207. .endif
  208. .if SHUFFLE_MASK
  209. mov MASK, ST1
  210. .endif
  211. // Begin loop
  212. 01:
  213. .if TOTAL_TAPS == 0
  214. // Things simplify a lot in this case
  215. // In fact this could be pipelined further if it's worth it...
  216. ldr ST0, [PSAMP]
  217. subs I, I, #1
  218. .if !\mask_minus1
  219. and ST0, ST0, MASK
  220. .endif
  221. str ST0, [PST, #-4]!
  222. str ST0, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
  223. str ST0, [PSAMP], #4 * MAX_CHANNELS
  224. bne 01b
  225. .else
  226. .if \fir_taps & 1
  227. .set LOAD_REG, 1
  228. .else
  229. .set LOAD_REG, 0
  230. .endif
  231. .set LOAD_BANK, 0
  232. .set FIR_REMAIN, \fir_taps
  233. .set IIR_REMAIN, \iir_taps
  234. .if FIR_REMAIN == 0 // only IIR terms
  235. .set OFFSET_CO, 4 * MAX_FIR_ORDER
  236. .set OFFSET_ST, 4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)
  237. .else
  238. .set OFFSET_CO, 0
  239. .set OFFSET_ST, 0
  240. .endif
  241. .set MUL_REG, LOAD_REG
  242. .set COUNTER, 0
  243. .rept TOTAL_TAPS + 2
  244. // Do load(s)
  245. .if FIR_REMAIN != 0 || IIR_REMAIN != 0
  246. .if COUNTER == 0
  247. .if TOTAL_TAPS > 4
  248. load CO, LOAD_REG, PCO, OFFSET_CO
  249. .endif
  250. load ST, LOAD_REG, PST, OFFSET_ST
  251. inc 1
  252. .elseif COUNTER == 1 && (\fir_taps & 1) == 0
  253. .if TOTAL_TAPS > 4
  254. load CO, LOAD_REG, PCO, OFFSET_CO
  255. .endif
  256. load ST, LOAD_REG, PST, OFFSET_ST
  257. inc 1
  258. .elseif LOAD_BANK == 0
  259. .if TOTAL_TAPS > 4
  260. .if FIR_REMAIN == 0 && IIR_REMAIN == 1
  261. load CO, LOAD_REG, PCO, OFFSET_CO
  262. .else
  263. loadd CO, LOAD_REG, PCO, OFFSET_CO
  264. .endif
  265. .endif
  266. .set LOAD_BANK, 1
  267. .else
  268. .if FIR_REMAIN == 0 && IIR_REMAIN == 1
  269. load ST, LOAD_REG, PST, OFFSET_ST
  270. inc 1
  271. .else
  272. loadd ST, LOAD_REG, PST, OFFSET_ST
  273. inc 2
  274. .endif
  275. .set LOAD_BANK, 0
  276. .endif
  277. .endif
  278. // Do interleaved multiplies, slightly delayed
  279. .if COUNTER >= 2
  280. multiply MUL_REG, COUNTER > 2, !\shift_0
  281. .set MUL_REG, (MUL_REG + 1) & 3
  282. .endif
  283. .set COUNTER, COUNTER + 1
  284. .endr
  285. // Post-process the result of the multiplies
  286. .if SPILL_SHIFT
  287. ldr SHIFT, [sp, #9*4 + 0*4]
  288. .endif
  289. .if SPILL_MASK
  290. ldr MASK, [sp, #9*4 + 1*4]
  291. .endif
  292. ldr ST2, [PSAMP]
  293. subs I, I, #1
  294. .if \shift_8
  295. mov AC0, AC0, lsr #8
  296. orr AC0, AC0, AC1, lsl #24
  297. .elseif !\shift_0
  298. rsb ST3, SHIFT, #32
  299. mov AC0, AC0, lsr SHIFT
  300. A orr AC0, AC0, AC1, lsl ST3
  301. T mov AC1, AC1, lsl ST3
  302. T orr AC0, AC0, AC1
  303. .endif
  304. .if \mask_minus1
  305. add ST3, ST2, AC0
  306. .else
  307. add ST2, ST2, AC0
  308. and ST3, ST2, MASK
  309. sub ST2, ST3, AC0
  310. .endif
  311. str ST3, [PST, #-4]!
  312. str ST2, [PST, #4 * (MAX_BLOCKSIZE + MAX_FIR_ORDER)]
  313. str ST3, [PSAMP], #4 * MAX_CHANNELS
  314. bne 01b
  315. .endif
  316. b 99f
  317. .if DEFINED_SHIFT
  318. .unreq SHIFT
  319. .endif
  320. .if DEFINED_MASK
  321. .unreq MASK
  322. .endif
  323. .endm
  324. .macro switch_on_fir_taps mask_minus1, shift_0, shift_8, iir_taps
  325. A ldr CO0, [pc, a3, lsl #2] // firorder is in range 0-(8-iir_taps)
  326. A add pc, pc, CO0
  327. T tbh [pc, a3, lsl #1]
  328. 0:
  329. branch_pic_label (70f - 0b), (71f - 0b), (72f - 0b), (73f - 0b)
  330. branch_pic_label (74f - 0b)
  331. .if \iir_taps <= 3
  332. branch_pic_label (75f - 0b)
  333. .if \iir_taps <= 2
  334. branch_pic_label (76f - 0b)
  335. .if \iir_taps <= 1
  336. branch_pic_label (77f - 0b)
  337. .if \iir_taps == 0
  338. branch_pic_label (78f - 0b)
  339. .endif
  340. .endif
  341. .endif
  342. .endif
  343. 70: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 0
  344. 71: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 1
  345. 72: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 2
  346. 73: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 3
  347. 74: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 4
  348. .if \iir_taps <= 3
  349. 75: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 5
  350. .if \iir_taps <= 2
  351. 76: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 6
  352. .if \iir_taps <= 1
  353. 77: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 7
  354. .if \iir_taps == 0
  355. 78: implement_filter \mask_minus1, \shift_0, \shift_8, \iir_taps, 8
  356. .endif
  357. .endif
  358. .endif
  359. .endif
  360. .endm
  361. .macro switch_on_iir_taps mask_minus1, shift_0, shift_8
  362. A ldr CO0, [pc, a4, lsl #2] // irorder is in range 0-4
  363. A add pc, pc, CO0
  364. T tbh [pc, a4, lsl #1]
  365. 0:
  366. branch_pic_label (60f - 0b), (61f - 0b), (62f - 0b), (63f - 0b)
  367. branch_pic_label (64f - 0b)
  368. 60: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 0
  369. 61: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 1
  370. 62: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 2
  371. 63: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 3
  372. 64: switch_on_fir_taps \mask_minus1, \shift_0, \shift_8, 4
  373. .endm
  374. /* void ff_mlp_filter_channel_arm(int32_t *state, const int32_t *coeff,
  375. * int firorder, int iirorder,
  376. * unsigned int filter_shift, int32_t mask,
  377. * int blocksize, int32_t *sample_buffer);
  378. */
  379. function ff_mlp_filter_channel_arm, export=1
  380. push {v1-fp,lr}
  381. add v1, sp, #9*4 // point at arguments on stack
  382. ldm v1, {ST0,ST1,I,PSAMP}
  383. cmp ST1, #-1
  384. bne 30f
  385. movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
  386. bne 20f
  387. bcs 10f
  388. switch_on_iir_taps 1, 1, 0
  389. 10: switch_on_iir_taps 1, 0, 1
  390. 20: switch_on_iir_taps 1, 0, 0
  391. 30: movs ST2, ST0, lsl #29 // shift is in range 0-15; we want to special-case 0 and 8
  392. bne 50f
  393. bcs 40f
  394. switch_on_iir_taps 0, 1, 0
  395. 40: switch_on_iir_taps 0, 0, 1
  396. 50: switch_on_iir_taps 0, 0, 0
  397. 99: pop {v1-fp,pc}
  398. endfunc
  399. .unreq PST
  400. .unreq PCO
  401. .unreq AC0
  402. .unreq AC1
  403. .unreq CO0
  404. .unreq CO1
  405. .unreq CO2
  406. .unreq CO3
  407. .unreq ST0
  408. .unreq ST1
  409. .unreq ST2
  410. .unreq ST3
  411. .unreq I
  412. .unreq PSAMP
  413. /********************************************************************/
  414. PSA .req a1 // samples
  415. PCO .req a2 // coeffs
  416. PBL .req a3 // bypassed_lsbs
  417. INDEX .req a4
  418. CO0 .req v1
  419. CO1 .req v2
  420. CO2 .req v3
  421. CO3 .req v4
  422. SA0 .req v5
  423. SA1 .req v6
  424. SA2 .req sl
  425. SA3 .req fp
  426. AC0 .req ip
  427. AC1 .req lr
  428. NOISE .req SA0
  429. LSB .req SA1
  430. DCH .req SA2 // dest_ch
  431. MASK .req SA3
  432. // INDEX is used as follows:
  433. // bits 0..6 index2 (values up to 17, but wider so that we can
  434. // add to index field without needing to mask)
  435. // bits 7..14 i (values up to 160)
  436. // bit 15 underflow detect for i
  437. // bits 25..31 (if access_unit_size_pow2 == 128) \ index
  438. // bits 26..31 (if access_unit_size_pow2 == 64) /
  439. .macro implement_rematrix shift, index_mask, mask_minus1, maxchan
  440. .if \maxchan == 1
  441. // We can just leave the coefficients in registers in this case
  442. ldrd CO0, CO1, [PCO]
  443. .endif
  444. 1:
  445. .if \maxchan == 1
  446. ldrd SA0, SA1, [PSA]
  447. smull AC0, AC1, CO0, SA0
  448. .elseif \maxchan == 5
  449. ldr CO0, [PCO, #0]
  450. ldr SA0, [PSA, #0]
  451. ldr CO1, [PCO, #4]
  452. ldr SA1, [PSA, #4]
  453. ldrd CO2, CO3, [PCO, #8]
  454. smull AC0, AC1, CO0, SA0
  455. ldrd SA2, SA3, [PSA, #8]
  456. smlal AC0, AC1, CO1, SA1
  457. ldrd CO0, CO1, [PCO, #16]
  458. smlal AC0, AC1, CO2, SA2
  459. ldrd SA0, SA1, [PSA, #16]
  460. smlal AC0, AC1, CO3, SA3
  461. smlal AC0, AC1, CO0, SA0
  462. .else // \maxchan == 7
  463. ldr CO2, [PCO, #0]
  464. ldr SA2, [PSA, #0]
  465. ldr CO3, [PCO, #4]
  466. ldr SA3, [PSA, #4]
  467. ldrd CO0, CO1, [PCO, #8]
  468. smull AC0, AC1, CO2, SA2
  469. ldrd SA0, SA1, [PSA, #8]
  470. smlal AC0, AC1, CO3, SA3
  471. ldrd CO2, CO3, [PCO, #16]
  472. smlal AC0, AC1, CO0, SA0
  473. ldrd SA2, SA3, [PSA, #16]
  474. smlal AC0, AC1, CO1, SA1
  475. ldrd CO0, CO1, [PCO, #24]
  476. smlal AC0, AC1, CO2, SA2
  477. ldrd SA0, SA1, [PSA, #24]
  478. smlal AC0, AC1, CO3, SA3
  479. smlal AC0, AC1, CO0, SA0
  480. .endif
  481. ldm sp, {NOISE, DCH, MASK}
  482. smlal AC0, AC1, CO1, SA1
  483. .if \shift != 0
  484. .if \index_mask == 63
  485. add NOISE, NOISE, INDEX, lsr #32-6
  486. ldrb LSB, [PBL], #MAX_CHANNELS
  487. ldrsb NOISE, [NOISE]
  488. add INDEX, INDEX, INDEX, lsl #32-6
  489. .else // \index_mask == 127
  490. add NOISE, NOISE, INDEX, lsr #32-7
  491. ldrb LSB, [PBL], #MAX_CHANNELS
  492. ldrsb NOISE, [NOISE]
  493. add INDEX, INDEX, INDEX, lsl #32-7
  494. .endif
  495. sub INDEX, INDEX, #1<<7
  496. adds AC0, AC0, NOISE, lsl #\shift + 7
  497. adc AC1, AC1, NOISE, asr #31
  498. .else
  499. ldrb LSB, [PBL], #MAX_CHANNELS
  500. sub INDEX, INDEX, #1<<7
  501. .endif
  502. add PSA, PSA, #MAX_CHANNELS*4
  503. mov AC0, AC0, lsr #14
  504. orr AC0, AC0, AC1, lsl #18
  505. .if !\mask_minus1
  506. and AC0, AC0, MASK
  507. .endif
  508. add AC0, AC0, LSB
  509. tst INDEX, #1<<15
  510. str AC0, [PSA, DCH, lsl #2] // DCH is precompensated for the early increment of PSA
  511. beq 1b
  512. b 98f
  513. .endm
  514. .macro switch_on_maxchan shift, index_mask, mask_minus1
  515. cmp v4, #5
  516. blo 51f
  517. beq 50f
  518. implement_rematrix \shift, \index_mask, \mask_minus1, 7
  519. 50: implement_rematrix \shift, \index_mask, \mask_minus1, 5
  520. 51: implement_rematrix \shift, \index_mask, \mask_minus1, 1
  521. .endm
  522. .macro switch_on_mask shift, index_mask
  523. cmp sl, #-1
  524. bne 40f
  525. switch_on_maxchan \shift, \index_mask, 1
  526. 40: switch_on_maxchan \shift, \index_mask, 0
  527. .endm
  528. .macro switch_on_au_size shift
  529. .if \shift == 0
  530. switch_on_mask \shift, undefined
  531. .else
  532. teq v6, #64
  533. bne 30f
  534. orr INDEX, INDEX, v1, lsl #32-6
  535. switch_on_mask \shift, 63
  536. 30: orr INDEX, INDEX, v1, lsl #32-7
  537. switch_on_mask \shift, 127
  538. .endif
  539. .endm
  540. /* void ff_mlp_rematrix_channel_arm(int32_t *samples,
  541. * const int32_t *coeffs,
  542. * const uint8_t *bypassed_lsbs,
  543. * const int8_t *noise_buffer,
  544. * int index,
  545. * unsigned int dest_ch,
  546. * uint16_t blockpos,
  547. * unsigned int maxchan,
  548. * int matrix_noise_shift,
  549. * int access_unit_size_pow2,
  550. * int32_t mask);
  551. */
  552. function ff_mlp_rematrix_channel_arm, export=1
  553. push {v1-fp,lr}
  554. add v1, sp, #9*4 // point at arguments on stack
  555. ldm v1, {v1-sl}
  556. teq v4, #1
  557. itt ne
  558. teqne v4, #5
  559. teqne v4, #7
  560. bne 99f
  561. teq v6, #64
  562. it ne
  563. teqne v6, #128
  564. bne 99f
  565. sub v2, v2, #MAX_CHANNELS
  566. push {a4,v2,sl} // initialise NOISE,DCH,MASK; make sp dword-aligned
  567. movs INDEX, v3, lsl #7
  568. beq 98f // just in case, do nothing if blockpos = 0
  569. subs INDEX, INDEX, #1<<7 // offset by 1 so we borrow at the right time
  570. adc lr, v1, v1 // calculate index2 (C was set by preceding subs)
  571. orr INDEX, INDEX, lr
  572. // Switch on matrix_noise_shift: values 0 and 1 are
  573. // disproportionately common so do those in a form the branch
  574. // predictor can accelerate. Values can only go up to 15.
  575. cmp v5, #1
  576. beq 11f
  577. blo 10f
  578. A ldr v5, [pc, v5, lsl #2]
  579. A add pc, pc, v5
  580. T tbh [pc, v5, lsl #1]
  581. 0:
  582. branch_pic_label 0, 0, (12f - 0b), (13f - 0b)
  583. branch_pic_label (14f - 0b), (15f - 0b), (16f - 0b), (17f - 0b)
  584. branch_pic_label (18f - 0b), (19f - 0b), (20f - 0b), (21f - 0b)
  585. branch_pic_label (22f - 0b), (23f - 0b), (24f - 0b), (25f - 0b)
  586. 10: switch_on_au_size 0
  587. 11: switch_on_au_size 1
  588. 12: switch_on_au_size 2
  589. 13: switch_on_au_size 3
  590. 14: switch_on_au_size 4
  591. 15: switch_on_au_size 5
  592. 16: switch_on_au_size 6
  593. 17: switch_on_au_size 7
  594. 18: switch_on_au_size 8
  595. 19: switch_on_au_size 9
  596. 20: switch_on_au_size 10
  597. 21: switch_on_au_size 11
  598. 22: switch_on_au_size 12
  599. 23: switch_on_au_size 13
  600. 24: switch_on_au_size 14
  601. 25: switch_on_au_size 15
  602. 98: add sp, sp, #3*4
  603. pop {v1-fp,pc}
  604. 99: // Can't handle these parameters, drop back to C
  605. pop {v1-fp,lr}
  606. b X(ff_mlp_rematrix_channel)
  607. endfunc
  608. .unreq PSA
  609. .unreq PCO
  610. .unreq PBL
  611. .unreq INDEX
  612. .unreq CO0
  613. .unreq CO1
  614. .unreq CO2
  615. .unreq CO3
  616. .unreq SA0
  617. .unreq SA1
  618. .unreq SA2
  619. .unreq SA3
  620. .unreq AC0
  621. .unreq AC1
  622. .unreq NOISE
  623. .unreq LSB
  624. .unreq DCH
  625. .unreq MASK