You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

249 lines
8.4KB

  1. /*
  2. * Copyright (C) 2010 Mans Rullgard
  3. *
  4. * This file is part of FFmpeg.
  5. *
  6. * FFmpeg is free software; you can redistribute it and/or
  7. * modify it under the terms of the GNU Lesser General Public
  8. * License as published by the Free Software Foundation; either
  9. * version 2.1 of the License, or (at your option) any later version.
  10. *
  11. * FFmpeg is distributed in the hope that it will be useful,
  12. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14. * Lesser General Public License for more details.
  15. *
  16. * You should have received a copy of the GNU Lesser General Public
  17. * License along with FFmpeg; if not, write to the Free Software
  18. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19. */
  20. #include "asm.S"
  21. .macro rac_get_prob h, bs, buf, cw, pr, t0, t1
  22. adds \bs, \bs, \t0
  23. lsl \cw, \cw, \t0
  24. lsl \t0, \h, \t0
  25. rsb \h, \pr, #256
  26. it cs
  27. ldrhcs \t1, [\buf], #2
  28. smlabb \h, \t0, \pr, \h
  29. T itttt cs
  30. rev16cs \t1, \t1
  31. A orrcs \cw, \cw, \t1, lsl \bs
  32. T lslcs \t1, \t1, \bs
  33. T orrcs \cw, \cw, \t1
  34. subcs \bs, \bs, #16
  35. lsr \h, \h, #8
  36. cmp \cw, \h, lsl #16
  37. itt ge
  38. subge \cw, \cw, \h, lsl #16
  39. subge \h, \t0, \h
  40. .endm
  41. .macro rac_get_128 h, bs, buf, cw, t0, t1
  42. adds \bs, \bs, \t0
  43. lsl \cw, \cw, \t0
  44. lsl \t0, \h, \t0
  45. it cs
  46. ldrhcs \t1, [\buf], #2
  47. mov \h, #128
  48. it cs
  49. rev16cs \t1, \t1
  50. add \h, \h, \t0, lsl #7
  51. A orrcs \cw, \cw, \t1, lsl \bs
  52. T ittt cs
  53. T lslcs \t1, \t1, \bs
  54. T orrcs \cw, \cw, \t1
  55. subcs \bs, \bs, #16
  56. lsr \h, \h, #8
  57. cmp \cw, \h, lsl #16
  58. itt ge
  59. subge \cw, \cw, \h, lsl #16
  60. subge \h, \t0, \h
  61. .endm
  62. function ff_decode_block_coeffs_armv6, export=1
  63. push {r0,r1,r4-r11,lr}
  64. movrel lr, X(ff_vp56_norm_shift)
  65. ldrd r4, r5, [sp, #44] @ token_prob, qmul
  66. cmp r3, #0
  67. ldr r11, [r5]
  68. ldm r0, {r5-r7} @ high, bits, buf
  69. it ne
  70. pkhtbne r11, r11, r11, asr #16
  71. ldr r8, [r0, #16] @ code_word
  72. 0:
  73. ldrb r9, [lr, r5]
  74. add r3, r3, #1
  75. ldrb r0, [r4, #1]
  76. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  77. blt 2f
  78. ldrb r9, [lr, r5]
  79. ldrb r0, [r4, #2]
  80. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  81. ldrb r9, [lr, r5]
  82. bge 3f
  83. add r4, r3, r3, lsl #5
  84. sxth r12, r11
  85. add r4, r2, r4
  86. adds r6, r6, r9
  87. add r4, r4, #11
  88. lsl r8, r8, r9
  89. it cs
  90. ldrhcs r10, [r7], #2
  91. lsl r9, r5, r9
  92. mov r5, #128
  93. it cs
  94. rev16cs r10, r10
  95. add r5, r5, r9, lsl #7
  96. T ittt cs
  97. T lslcs r10, r10, r6
  98. T orrcs r8, r8, r10
  99. A orrcs r8, r8, r10, lsl r6
  100. subcs r6, r6, #16
  101. lsr r5, r5, #8
  102. cmp r8, r5, lsl #16
  103. movrel r10, zigzag_scan-1
  104. itt ge
  105. subge r8, r8, r5, lsl #16
  106. subge r5, r9, r5
  107. ldrb r10, [r10, r3]
  108. it ge
  109. rsbge r12, r12, #0
  110. cmp r3, #16
  111. strh r12, [r1, r10]
  112. bge 6f
  113. 5:
  114. ldrb r9, [lr, r5]
  115. ldrb r0, [r4]
  116. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  117. pkhtb r11, r11, r11, asr #16
  118. bge 0b
  119. 6:
  120. ldr r0, [sp]
  121. ldr r9, [r0, #12]
  122. cmp r7, r9
  123. it hi
  124. movhi r7, r9
  125. stm r0, {r5-r7} @ high, bits, buf
  126. str r8, [r0, #16] @ code_word
  127. add sp, sp, #8
  128. mov r0, r3
  129. pop {r4-r11,pc}
  130. 2:
  131. add r4, r3, r3, lsl #5
  132. cmp r3, #16
  133. add r4, r2, r4
  134. pkhtb r11, r11, r11, asr #16
  135. bne 0b
  136. b 6b
  137. 3:
  138. ldrb r0, [r4, #3]
  139. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  140. ldrb r9, [lr, r5]
  141. bge 1f
  142. mov r12, #2
  143. ldrb r0, [r4, #4]
  144. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  145. it ge
  146. addge r12, #1
  147. ldrb r9, [lr, r5]
  148. blt 4f
  149. ldrb r0, [r4, #5]
  150. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  151. it ge
  152. addge r12, #1
  153. ldrb r9, [lr, r5]
  154. b 4f
  155. 1:
  156. ldrb r0, [r4, #6]
  157. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  158. ldrb r9, [lr, r5]
  159. bge 3f
  160. ldrb r0, [r4, #7]
  161. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  162. ldrb r9, [lr, r5]
  163. bge 2f
  164. mov r12, #5
  165. mov r0, #159
  166. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  167. it ge
  168. addge r12, r12, #1
  169. ldrb r9, [lr, r5]
  170. b 4f
  171. 2:
  172. mov r12, #7
  173. mov r0, #165
  174. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  175. it ge
  176. addge r12, r12, #2
  177. ldrb r9, [lr, r5]
  178. mov r0, #145
  179. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  180. it ge
  181. addge r12, r12, #1
  182. ldrb r9, [lr, r5]
  183. b 4f
  184. 3:
  185. ldrb r0, [r4, #8]
  186. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  187. it ge
  188. addge r4, r4, #1
  189. ldrb r9, [lr, r5]
  190. ite ge
  191. movge r12, #2
  192. movlt r12, #0
  193. ldrb r0, [r4, #9]
  194. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  195. mov r9, #8
  196. it ge
  197. addge r12, r12, #1
  198. movrel r4, X(ff_vp8_dct_cat_prob)
  199. lsl r9, r9, r12
  200. ldr r4, [r4, r12, lsl #2]
  201. add r12, r9, #3
  202. mov r1, #0
  203. ldrb r0, [r4], #1
  204. 1:
  205. ldrb r9, [lr, r5]
  206. lsl r1, r1, #1
  207. rac_get_prob r5, r6, r7, r8, r0, r9, r10
  208. ldrb r0, [r4], #1
  209. it ge
  210. addge r1, r1, #1
  211. cmp r0, #0
  212. bne 1b
  213. ldrb r9, [lr, r5]
  214. add r12, r12, r1
  215. ldr r1, [sp, #4]
  216. 4:
  217. add r4, r3, r3, lsl #5
  218. add r4, r2, r4
  219. add r4, r4, #22
  220. rac_get_128 r5, r6, r7, r8, r9, r10
  221. it ge
  222. rsbge r12, r12, #0
  223. smulbb r12, r12, r11
  224. movrel r9, zigzag_scan-1
  225. ldrb r9, [r9, r3]
  226. cmp r3, #16
  227. strh r12, [r1, r9]
  228. bge 6b
  229. b 5b
  230. endfunc
  231. const zigzag_scan
  232. .byte 0, 2, 8, 16
  233. .byte 10, 4, 6, 12
  234. .byte 18, 24, 26, 20
  235. .byte 14, 22, 28, 30
  236. endconst