2 * Copyright (c) 2013 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/arm/asm.S"
38 #define PRELOAD_DISTANCE 4
42 subs SIZE, SIZE, #4 @ C flag survives rest of macro
43 sub TMP0, DAT0, PATTERN, lsr #14
45 ands TMP0, TMP0, PATTERN
48 .macro innerloop16 decrement, do_preload
49 ldmia PTR!, {DAT0,DAT1,DAT2,DAT3}
50 .ifnc "\do_preload",""
51 pld [PTR, #PRELOAD_DISTANCE*32]
54 subs SIZE, SIZE, #\decrement @ C flag survives rest of macro
56 sub TMP0, DAT0, PATTERN, lsr #14
57 sub TMP1, DAT1, PATTERN, lsr #14
60 sub TMP2, DAT2, PATTERN, lsr #14
61 sub TMP3, DAT3, PATTERN, lsr #14
62 ands TMP0, TMP0, PATTERN
65 andseq TMP1, TMP1, PATTERN
68 andseq TMP2, TMP2, PATTERN
69 andseq TMP3, TMP3, PATTERN
72 /* int ff_startcode_find_candidate_armv6(const uint8_t *buf, int size) */
73 function ff_startcode_find_candidate_armv6, export=1
76 @ Ensure there are at least (PRELOAD_DISTANCE+2) complete cachelines to go
77 @ before using code that does preloads
78 cmp SIZE, #(PRELOAD_DISTANCE+3)*32 - 1
81 @ Get to word-alignment, 1 byte at a time
84 1: ldrb DAT0, [PTR], #1
90 2: @ Get to 4-word alignment, 1 word at a time
91 ldr PATTERN, =0x80008000
99 4: @ Get to cacheline (8-word) alignment
104 5: @ Check complete cachelines, with preloading
105 @ We need to stop when there are still (PRELOAD_DISTANCE+1)
106 @ complete cachelines to go
107 sub SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32
108 6: innerloop16 , do_preload
113 @ Preload trailing part-cacheline, if any
116 pld [PTR, #(PRELOAD_DISTANCE+1)*32]
117 @ Check remaining data without doing any more preloads. First
118 @ do in chunks of 4 words:
119 7: adds SIZE, SIZE, #(PRELOAD_DISTANCE+2)*32 - 16
125 9: adds SIZE, SIZE, #16 - 4
131 @ Check second byte of final halfword
132 ldrb DAT0, [PTR, #-1]
135 @ Check any remaining bytes
138 12: ldrb DAT0, [PTR], #1
145 13: sub RESULT, PTR, BUF
148 60: @ Small buffer - simply check by looping over bytes
151 61: ldrb DAT0, [PTR], #1
160 90: @ Found a candidate at the preceding byte
162 sub RESULT, RESULT, #1
165 91: @ Found a candidate somewhere in the preceding 4 bytes
167 sub RESULT, RESULT, #4
168 sub TMP0, DAT0, #0x20000
169 bics TMP0, TMP0, DAT0
171 ldrbpl DAT0, [PTR, #-3]
172 addpl RESULT, RESULT, #2
175 beq 98f @ don't look back a byte if found at first byte in buffer
176 ldrb DAT0, [PTR, #-5]
179 subeq RESULT, RESULT, #1
182 93: @ Found a candidate somewhere in the preceding 16 bytes
184 sub RESULT, RESULT, #16
186 beq 95f @ not in first 4 bytes
187 sub TMP0, DAT0, #0x20000
188 bics TMP0, TMP0, DAT0
190 ldrbpl DAT0, [PTR, #-15]
191 addpl RESULT, RESULT, #2
194 beq 98f @ don't look back a byte if found at first byte in buffer
195 ldrb DAT0, [PTR, #-17]
198 subeq RESULT, RESULT, #1
200 95: add RESULT, RESULT, #4
202 beq 96f @ not in next 4 bytes
203 sub TMP1, DAT1, #0x20000
204 bics TMP1, TMP1, DAT1
206 ldrbmi DAT0, [PTR, #-13]
207 ldrbpl DAT0, [PTR, #-11]
208 addpl RESULT, RESULT, #2
211 subeq RESULT, RESULT, #1
213 96: add RESULT, RESULT, #4
215 beq 97f @ not in next 4 bytes
216 sub TMP2, DAT2, #0x20000
217 bics TMP2, TMP2, DAT2
219 ldrbmi DAT0, [PTR, #-9]
220 ldrbpl DAT0, [PTR, #-7]
221 addpl RESULT, RESULT, #2
224 subeq RESULT, RESULT, #1
226 97: add RESULT, RESULT, #4
227 sub TMP3, DAT3, #0x20000
228 bics TMP3, TMP3, DAT3
230 ldrbmi DAT0, [PTR, #-5]
231 ldrbpl DAT0, [PTR, #-3]
232 addpl RESULT, RESULT, #2
235 subeq RESULT, RESULT, #1
236 @ drop through to 98f