|
- /*
- * Copyright (c) 2013 RISC OS Open Ltd
- * Author: Ben Avison <bavison@riscosopen.org>
- *
- * This file is part of Libav.
- *
- * Libav is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * Libav is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with Libav; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
- #include "libavutil/arm/asm.S"
-
- RESULT .req a1
- BUF .req a1
- SIZE .req a2
- PATTERN .req a3
- PTR .req a4
- DAT0 .req v1
- DAT1 .req v2
- DAT2 .req v3
- DAT3 .req v4
- TMP0 .req v5
- TMP1 .req v6
- TMP2 .req ip
- TMP3 .req lr
-
- #define PRELOAD_DISTANCE 4
-
- .macro innerloop4
- ldr DAT0, [PTR], #4
- subs SIZE, SIZE, #4 @ C flag survives rest of macro
- sub TMP0, DAT0, PATTERN, lsr #14
- bic TMP0, TMP0, DAT0
- ands TMP0, TMP0, PATTERN
- .endm
-
- .macro innerloop16 decrement, do_preload
- ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
- .ifnc "\do_preload",""
- pld [PTR, #PRELOAD_DISTANCE*32]
- .endif
- .ifnc "\decrement",""
- subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
- .endif
- sub TMP0, DAT0, PATTERN, lsr #14
- sub TMP1, DAT1, PATTERN, lsr #14
- bic TMP0, TMP0, DAT0
- bic TMP1, TMP1, DAT1
- sub TMP2, DAT2, PATTERN, lsr #14
- sub TMP3, DAT3, PATTERN, lsr #14
- ands TMP0, TMP0, PATTERN
- bic TMP2, TMP2, DAT2
- it eq
- andseq TMP1, TMP1, PATTERN
- bic TMP3, TMP3, DAT3
- itt eq
- andseq TMP2, TMP2, PATTERN
- andseq TMP3, TMP3, PATTERN
- .endm
-
- /* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
- function ff_startcode_find_candidate_armv6, export=1
- push {v1-v6,lr}
- mov PTR, BUF
- @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
- @ before using code that does preloads
- cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
- blo 60f
-
- @ Get to word-alignment, 1 byte at a time
- tst PTR, #3
- beq 2f
- 1: ldrb DAT0, [PTR], #1
- sub SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- tst PTR, #3
- bne 1b
- 2: @ Get to 4-word alignment, 1 word at a time
- ldr PATTERN, =0x80008000
- setend be
- tst PTR, #12
- beq 4f
- 3: innerloop4
- bne 91f
- tst PTR, #12
- bne 3b
- 4: @ Get to cacheline (8-word) alignment
- tst PTR, #16
- beq 5f
- innerloop16 16
- bne 93f
- 5: @ Check complete cachelines, with preloading
- @ We need to stop when there are still (PRELOAD_DISTANCE+1)
- @ complete cachelines to go
- sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
- 6: innerloop16 , do_preload
- bne 93f
- innerloop16 32
- bne 93f
- bcs 6b
- @ Preload trailing part-cacheline, if any
- tst SIZE, #31
- beq 7f
- pld [PTR, #(PRELOAD_DISTANCE+1)*32]
- @ Check remaining data without doing any more preloads. First
- @ do in chunks of 4 words:
- 7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
- bmi 9f
- 8: innerloop16 16
- bne 93f
- bcs 8b
- @ Then in words:
- 9: adds SIZE, SIZE, #16 - 4
- bmi 11f
- 10: innerloop4
- bne 91f
- bcs 10b
- 11: setend le
- @ Check second byte of final halfword
- ldrb DAT0, [PTR, #-1]
- teq DAT0, #0
- beq 90f
- @ Check any remaining bytes
- tst SIZE, #3
- beq 13f
- 12: ldrb DAT0, [PTR], #1
- sub SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- tst SIZE, #3
- bne 12b
- @ No candidate found
- 13: sub RESULT, PTR, BUF
- b 99f
-
- 60: @ Small buffer - simply check by looping over bytes
- subs SIZE, SIZE, #1
- bcc 99f
- 61: ldrb DAT0, [PTR], #1
- subs SIZE, SIZE, #1
- teq DAT0, #0
- beq 90f
- bcs 61b
- @ No candidate found
- sub RESULT, PTR, BUF
- b 99f
-
- 90: @ Found a candidate at the preceding byte
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #1
- b 99f
-
- 91: @ Found a candidate somewhere in the preceding 4 bytes
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #4
- sub TMP0, DAT0, #0x20000
- bics TMP0, TMP0, DAT0
- itt pl
- ldrbpl DAT0, [PTR, #-3]
- addpl RESULT, RESULT, #2
- bpl 92f
- teq RESULT, #0
- beq 98f @ don't look back a byte if found at first byte in buffer
- ldrb DAT0, [PTR, #-5]
- 92: teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
-
- 93: @ Found a candidate somewhere in the preceding 16 bytes
- sub RESULT, PTR, BUF
- sub RESULT, RESULT, #16
- teq TMP0, #0
- beq 95f @ not in first 4 bytes
- sub TMP0, DAT0, #0x20000
- bics TMP0, TMP0, DAT0
- itt pl
- ldrbpl DAT0, [PTR, #-15]
- addpl RESULT, RESULT, #2
- bpl 94f
- teq RESULT, #0
- beq 98f @ don't look back a byte if found at first byte in buffer
- ldrb DAT0, [PTR, #-17]
- 94: teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
- 95: add RESULT, RESULT, #4
- teq TMP1, #0
- beq 96f @ not in next 4 bytes
- sub TMP1, DAT1, #0x20000
- bics TMP1, TMP1, DAT1
- itee mi
- ldrbmi DAT0, [PTR, #-13]
- ldrbpl DAT0, [PTR, #-11]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
- 96: add RESULT, RESULT, #4
- teq TMP2, #0
- beq 97f @ not in next 4 bytes
- sub TMP2, DAT2, #0x20000
- bics TMP2, TMP2, DAT2
- itee mi
- ldrbmi DAT0, [PTR, #-9]
- ldrbpl DAT0, [PTR, #-7]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- b 98f
- 97: add RESULT, RESULT, #4
- sub TMP3, DAT3, #0x20000
- bics TMP3, TMP3, DAT3
- itee mi
- ldrbmi DAT0, [PTR, #-5]
- ldrbpl DAT0, [PTR, #-3]
- addpl RESULT, RESULT, #2
- teq DAT0, #0
- it eq
- subeq RESULT, RESULT, #1
- @ drop through to 98f
- 98: setend le
- 99: pop {v1-v6,pc}
- endfunc
-
- .unreq RESULT
- .unreq BUF
- .unreq SIZE
- .unreq PATTERN
- .unreq PTR
- .unreq DAT0
- .unreq DAT1
- .unreq DAT2
- .unreq DAT3
- .unreq TMP0
- .unreq TMP1
- .unreq TMP2
- .unreq TMP3
|