You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

254 lines
7.4KB

  1. /*
  2. * Copyright (c) 2013 RISC OS Open Ltd
  3. * Author: Ben Avison <bavison@riscosopen.org>
  4. *
  5. * This file is part of Libav.
  6. *
  7. * Libav is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * Libav is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with Libav; if not, write to the Free Software
  19. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20. */
  21. #include "libavutil/arm/asm.S"
  22. RESULT .req a1
  23. BUF .req a1
  24. SIZE .req a2
  25. PATTERN .req a3
  26. PTR .req a4
  27. DAT0 .req v1
  28. DAT1 .req v2
  29. DAT2 .req v3
  30. DAT3 .req v4
  31. TMP0 .req v5
  32. TMP1 .req v6
  33. TMP2 .req ip
  34. TMP3 .req lr
  35. #define PRELOAD_DISTANCE 4
  36. .macro innerloop4
  37. ldr DAT0, [PTR], #4
  38. subs SIZE, SIZE, #4 @ C flag survives rest of macro
  39. sub TMP0, DAT0, PATTERN, lsr #14
  40. bic TMP0, TMP0, DAT0
  41. ands TMP0, TMP0, PATTERN
  42. .endm
  43. .macro innerloop16 decrement, do_preload
  44. ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
  45. .ifnc "\do_preload",""
  46. pld [PTR, #PRELOAD_DISTANCE*32]
  47. .endif
  48. .ifnc "\decrement",""
  49. subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
  50. .endif
  51. sub TMP0, DAT0, PATTERN, lsr #14
  52. sub TMP1, DAT1, PATTERN, lsr #14
  53. bic TMP0, TMP0, DAT0
  54. bic TMP1, TMP1, DAT1
  55. sub TMP2, DAT2, PATTERN, lsr #14
  56. sub TMP3, DAT3, PATTERN, lsr #14
  57. ands TMP0, TMP0, PATTERN
  58. bic TMP2, TMP2, DAT2
  59. it eq
  60. andseq TMP1, TMP1, PATTERN
  61. bic TMP3, TMP3, DAT3
  62. itt eq
  63. andseq TMP2, TMP2, PATTERN
  64. andseq TMP3, TMP3, PATTERN
  65. .endm
  66. /* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
  67. function ff_startcode_find_candidate_armv6, export=1
  68. push {v1-v6,lr}
  69. mov PTR, BUF
  70. @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
  71. @ before using code that does preloads
  72. cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
  73. blo 60f
  74. @ Get to word-alignment, 1 byte at a time
  75. tst PTR, #3
  76. beq 2f
  77. 1: ldrb DAT0, [PTR], #1
  78. sub SIZE, SIZE, #1
  79. teq DAT0, #0
  80. beq 90f
  81. tst PTR, #3
  82. bne 1b
  83. 2: @ Get to 4-word alignment, 1 word at a time
  84. ldr PATTERN, =0x80008000
  85. setend be
  86. tst PTR, #12
  87. beq 4f
  88. 3: innerloop4
  89. bne 91f
  90. tst PTR, #12
  91. bne 3b
  92. 4: @ Get to cacheline (8-word) alignment
  93. tst PTR, #16
  94. beq 5f
  95. innerloop16 16
  96. bne 93f
  97. 5: @ Check complete cachelines, with preloading
  98. @ We need to stop when there are still (PRELOAD_DISTANCE+1)
  99. @ complete cachelines to go
  100. sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
  101. 6: innerloop16 , do_preload
  102. bne 93f
  103. innerloop16 32
  104. bne 93f
  105. bcs 6b
  106. @ Preload trailing part-cacheline, if any
  107. tst SIZE, #31
  108. beq 7f
  109. pld [PTR, #(PRELOAD_DISTANCE+1)*32]
  110. @ Check remaining data without doing any more preloads. First
  111. @ do in chunks of 4 words:
  112. 7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
  113. bmi 9f
  114. 8: innerloop16 16
  115. bne 93f
  116. bcs 8b
  117. @ Then in words:
  118. 9: adds SIZE, SIZE, #16 - 4
  119. bmi 11f
  120. 10: innerloop4
  121. bne 91f
  122. bcs 10b
  123. 11: setend le
  124. @ Check second byte of final halfword
  125. ldrb DAT0, [PTR, #-1]
  126. teq DAT0, #0
  127. beq 90f
  128. @ Check any remaining bytes
  129. tst SIZE, #3
  130. beq 13f
  131. 12: ldrb DAT0, [PTR], #1
  132. sub SIZE, SIZE, #1
  133. teq DAT0, #0
  134. beq 90f
  135. tst SIZE, #3
  136. bne 12b
  137. @ No candidate found
  138. 13: sub RESULT, PTR, BUF
  139. b 99f
  140. 60: @ Small buffer - simply check by looping over bytes
  141. subs SIZE, SIZE, #1
  142. bcc 99f
  143. 61: ldrb DAT0, [PTR], #1
  144. subs SIZE, SIZE, #1
  145. teq DAT0, #0
  146. beq 90f
  147. bcs 61b
  148. @ No candidate found
  149. sub RESULT, PTR, BUF
  150. b 99f
  151. 90: @ Found a candidate at the preceding byte
  152. sub RESULT, PTR, BUF
  153. sub RESULT, RESULT, #1
  154. b 99f
  155. 91: @ Found a candidate somewhere in the preceding 4 bytes
  156. sub RESULT, PTR, BUF
  157. sub RESULT, RESULT, #4
  158. sub TMP0, DAT0, #0x20000
  159. bics TMP0, TMP0, DAT0
  160. itt pl
  161. ldrbpl DAT0, [PTR, #-3]
  162. addpl RESULT, RESULT, #2
  163. bpl 92f
  164. teq RESULT, #0
  165. beq 98f @ don't look back a byte if found at first byte in buffer
  166. ldrb DAT0, [PTR, #-5]
  167. 92: teq DAT0, #0
  168. it eq
  169. subeq RESULT, RESULT, #1
  170. b 98f
  171. 93: @ Found a candidate somewhere in the preceding 16 bytes
  172. sub RESULT, PTR, BUF
  173. sub RESULT, RESULT, #16
  174. teq TMP0, #0
  175. beq 95f @ not in first 4 bytes
  176. sub TMP0, DAT0, #0x20000
  177. bics TMP0, TMP0, DAT0
  178. itt pl
  179. ldrbpl DAT0, [PTR, #-15]
  180. addpl RESULT, RESULT, #2
  181. bpl 94f
  182. teq RESULT, #0
  183. beq 98f @ don't look back a byte if found at first byte in buffer
  184. ldrb DAT0, [PTR, #-17]
  185. 94: teq DAT0, #0
  186. it eq
  187. subeq RESULT, RESULT, #1
  188. b 98f
  189. 95: add RESULT, RESULT, #4
  190. teq TMP1, #0
  191. beq 96f @ not in next 4 bytes
  192. sub TMP1, DAT1, #0x20000
  193. bics TMP1, TMP1, DAT1
  194. itee mi
  195. ldrbmi DAT0, [PTR, #-13]
  196. ldrbpl DAT0, [PTR, #-11]
  197. addpl RESULT, RESULT, #2
  198. teq DAT0, #0
  199. it eq
  200. subeq RESULT, RESULT, #1
  201. b 98f
  202. 96: add RESULT, RESULT, #4
  203. teq TMP2, #0
  204. beq 97f @ not in next 4 bytes
  205. sub TMP2, DAT2, #0x20000
  206. bics TMP2, TMP2, DAT2
  207. itee mi
  208. ldrbmi DAT0, [PTR, #-9]
  209. ldrbpl DAT0, [PTR, #-7]
  210. addpl RESULT, RESULT, #2
  211. teq DAT0, #0
  212. it eq
  213. subeq RESULT, RESULT, #1
  214. b 98f
  215. 97: add RESULT, RESULT, #4
  216. sub TMP3, DAT3, #0x20000
  217. bics TMP3, TMP3, DAT3
  218. itee mi
  219. ldrbmi DAT0, [PTR, #-5]
  220. ldrbpl DAT0, [PTR, #-3]
  221. addpl RESULT, RESULT, #2
  222. teq DAT0, #0
  223. it eq
  224. subeq RESULT, RESULT, #1
  225. @ drop through to 98f
  226. 98: setend le
  227. 99: pop {v1-v6,pc}
  228. endfunc
  229. .unreq RESULT
  230. .unreq BUF
  231. .unreq SIZE
  232. .unreq PATTERN
  233. .unreq PTR
  234. .unreq DAT0
  235. .unreq DAT1
  236. .unreq DAT2
  237. .unreq DAT3
  238. .unreq TMP0
  239. .unreq TMP1
  240. .unreq TMP2
  241. .unreq TMP3