2 * Copyright (c) 2014 RISC OS Open Ltd
3 * Author: Ben Avison <bavison@riscosopen.org>
5 * This file is part of FFmpeg.
7 * FFmpeg is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * FFmpeg is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with FFmpeg; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 #include "libavutil/arm/asm.S"
24 .macro loadregoffsh2 group, index, base, offgroup, offindex
26 loadregoffsh2_ \group, %(\index), \base, \offgroup, %(\offindex)
30 .macro loadregoffsh2_ group, index, base, offgroup, offindex
31 ldr \group\index, [\base, \offgroup\offindex, lsl #2]
34 .macro eorlslreg check, data, group, index
36 eorlslreg_ \check, \data, \group, %(\index)
40 .macro eorlslreg_ check, data, group, index
41 eor \check, \check, \data, lsl \group\index
44 .macro decr_modulo var, by, modulus
51 .macro load_group1 size, channels, r0, r1, r2, r3, pointer_dead=0
53 ldrd \r0, \r1, [IN], #(\size + 8 - \channels) * 4
55 .if IDX1 > 4 || \channels==8
56 ldm IN!, {\r0, \r1, \r2, \r3}
58 ldm IN, {\r0, \r1, \r2, \r3}
60 add IN, IN, #(4 + 8 - \channels) * 4
64 decr_modulo IDX1, \size, \channels
67 .macro load_group2 size, channels, r0, r1, r2, r3, pointer_dead=0
74 //A ldm IN, {\r2, \r3}
77 //A ldr \r3, [IN], #(\size - 1 + 8 - \channels) * 4
80 ldrd \r2, \r3, [IN], #(\size + 8 - \channels) * 4
84 decr_modulo IDX1, \size, \channels
87 .macro implement_pack inorder, channels, shift
107 .set SIZE_GROUP1, IDX1
111 .set SIZE_GROUP2, 4 - SIZE_GROUP1
112 load_group1 SIZE_GROUP1, \channels, DAT0, DAT1, DAT2, DAT3
113 load_group2 SIZE_GROUP2, \channels, DAT0, DAT1, DAT2, DAT3
119 .elseif \channels == 6
136 .elseif \channels == 8
138 uxtb SHIFT0, SHIFT4, ror #0
139 uxtb SHIFT1, SHIFT4, ror #8
140 uxtb SHIFT2, SHIFT4, ror #16
141 uxtb SHIFT3, SHIFT4, ror #24
143 uxtb SHIFT0, SHIFT5, ror #0
144 uxtb SHIFT1, SHIFT5, ror #8
145 uxtb SHIFT2, SHIFT5, ror #16
146 uxtb SHIFT3, SHIFT5, ror #24
153 eor CHECK, CHECK, DAT0, lsr #8 - (\channels - IDX2)
154 eor CHECK, CHECK, DAT1, lsr #7 - (\channels - IDX2)
155 decr_modulo IDX2, 2, \channels
156 eor CHECK, CHECK, DAT2, lsr #8 - (\channels - IDX2)
157 eor CHECK, CHECK, DAT3, lsr #7 - (\channels - IDX2)
158 decr_modulo IDX2, 2, \channels
159 stm OUT!, {DAT0 - DAT3}
162 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
163 .if (WORDS_PER_LOOP % 2) == 0
164 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
166 .if (WORDS_PER_LOOP % 2) == 0
167 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
169 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
170 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
172 function ff_mlp_pack_output_inorder_\channels\()ch_mixedshift_armv6, export=1
173 .if SAMPLES_PER_LOOP > 1
174 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
176 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
181 push {v1-v6,sl,fp,lr}
182 ldr SHIFT0, [sp, #(9+1)*4] // get output_shift from stack
183 ldr SHIFT1, =0x08080808
186 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
187 uxtb SHIFT0, SHIFT4, ror #0
188 uxtb SHIFT1, SHIFT4, ror #8
190 ldr SHIFT5, [SHIFT0, #4]
191 uadd8 SHIFT4, SHIFT4, SHIFT1 // increase all shifts by 8
192 uadd8 SHIFT5, SHIFT5, SHIFT1
194 uxtb SHIFT0, SHIFT4, ror #0
195 uxtb SHIFT1, SHIFT4, ror #8
196 uxtb SHIFT2, SHIFT4, ror #16
197 uxtb SHIFT3, SHIFT4, ror #24
198 uxtb SHIFT4, SHIFT5, ror #0
199 uxtb SHIFT5, SHIFT5, ror #8
205 .rept WORDS_PER_LOOP / 4
208 subs COUNT, COUNT, #SAMPLES_PER_LOOP
242 DAT6 .req sl // use these rather than the otherwise unused
243 DAT7 .req fp // ip and lr so that we can load them using LDRD
245 .macro output4words tail, head, r0, r1, r2, r3, r4, r5, r6, r7, pointer_dead=0
247 .set SIZE_GROUP1, IDX1
251 .set SIZE_GROUP2, 4 - SIZE_GROUP1
252 load_group1 SIZE_GROUP1, \channels, \r0, \r1, \r2, \r3, \pointer_dead
255 eor CHECK, CHECK, \r4, lsr #8 - (\channels - IDX2)
256 eor CHECK, CHECK, \r5, lsr #7 - (\channels - IDX2)
257 decr_modulo IDX2, 2, \channels
260 load_group2 SIZE_GROUP2, \channels, \r0, \r1, \r2, \r3, \pointer_dead
263 eor CHECK, CHECK, \r6, lsr #8 - (\channels - IDX2)
264 eor CHECK, CHECK, \r7, lsr #7 - (\channels - IDX2)
265 decr_modulo IDX2, 2, \channels
266 stm OUT!, {\r4, \r5, \r6, \r7}
276 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 8)
277 .if (WORDS_PER_LOOP % 2) == 0
278 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
280 .if (WORDS_PER_LOOP % 2) == 0
281 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
283 .if (WORDS_PER_LOOP % 2) == 0
284 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
286 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 8
287 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
289 function ff_mlp_pack_output_inorder_\channels\()ch_\shift\()shift_armv6, export=1
290 .if SAMPLES_PER_LOOP > 1
291 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
293 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
295 subs COUNT, COUNT, #SAMPLES_PER_LOOP
298 push {v1-v6,sl,fp,lr}
301 output4words 0, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
303 .rept WORDS_PER_LOOP / 8
304 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
305 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
307 subs COUNT, COUNT, #SAMPLES_PER_LOOP
310 .rept WORDS_PER_LOOP / 8 - 1
311 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3
312 output4words 1, 1, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
314 output4words 1, 1, DAT4, DAT5, DAT6, DAT7, DAT0, DAT1, DAT2, DAT3, pointer_dead=1
315 output4words 1, 0, DAT0, DAT1, DAT2, DAT3, DAT4, DAT5, DAT6, DAT7
337 // This case not currently handled
361 uxtb CHAN0, CHAN4, ror #0
362 uxtb CHAN1, CHAN4, ror #8
363 uxtb CHAN2, CHAN4, ror #16
364 uxtb CHAN3, CHAN4, ror #24
366 uxtb CHAN0, CHAN5, ror #0
367 uxtb CHAN1, CHAN5, ror #8
368 uxtb CHAN2, CHAN5, ror #16
369 uxtb CHAN3, CHAN5, ror #24
371 ldr DAT0, [IN, CHAN0, lsl #2]
372 ldr DAT1, [IN, CHAN1, lsl #2]
373 ldr DAT2, [IN, CHAN2, lsl #2]
374 ldr DAT3, [IN, CHAN3, lsl #2]
378 decr_modulo IDX1, 4, \channels
380 .set SIZE_GROUP1, IDX1
384 .set SIZE_GROUP2, 4 - SIZE_GROUP1
386 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
387 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
389 .else // SIZE_GROUP1 == 4
390 loadregoffsh2 DAT, 0, IN, CHAN, 0 + (\channels - IDX1)
391 loadregoffsh2 DAT, 1, IN, CHAN, 1 + (\channels - IDX1)
392 loadregoffsh2 DAT, 2, IN, CHAN, 2 + (\channels - IDX1)
393 loadregoffsh2 DAT, 3, IN, CHAN, 3 + (\channels - IDX1)
398 decr_modulo IDX1, SIZE_GROUP1, \channels
400 loadregoffsh2 DAT, 2, IN, CHAN, 0 + (\channels - IDX1)
401 loadregoffsh2 DAT, 3, IN, CHAN, 1 + (\channels - IDX1)
406 decr_modulo IDX1, SIZE_GROUP2, \channels
408 .if \channels == 8 // in this case we can corrupt CHAN0-3
413 lsl DAT0, #8 + \shift
414 lsl DAT1, #8 + \shift
415 lsl DAT2, #8 + \shift
416 lsl DAT3, #8 + \shift
417 eor CHECK, CHECK, DAT0, lsr CHAN0
418 eor CHECK, CHECK, DAT1, lsr CHAN1
419 eor CHECK, CHECK, DAT2, lsr CHAN2
420 eor CHECK, CHECK, DAT3, lsr CHAN3
428 bic DAT0, DAT0, #0xff000000
429 bic DAT1, DAT1, #0xff000000
430 bic DAT2, DAT2, #0xff000000
431 bic DAT3, DAT3, #0xff000000
432 eorlslreg CHECK, DAT0, CHAN, 0 + (\channels - IDX2)
433 eorlslreg CHECK, DAT1, CHAN, 1 + (\channels - IDX2)
434 decr_modulo IDX2, 2, \channels
435 eorlslreg CHECK, DAT2, CHAN, 0 + (\channels - IDX2)
436 eorlslreg CHECK, DAT3, CHAN, 1 + (\channels - IDX2)
437 decr_modulo IDX2, 2, \channels
443 stm OUT!, {DAT0 - DAT3}
446 .set WORDS_PER_LOOP, \channels // calculate LCM (channels, 4)
447 .if (WORDS_PER_LOOP % 2) == 0
448 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
450 .if (WORDS_PER_LOOP % 2) == 0
451 .set WORDS_PER_LOOP, WORDS_PER_LOOP / 2
453 .set WORDS_PER_LOOP, WORDS_PER_LOOP * 4
454 .set SAMPLES_PER_LOOP, WORDS_PER_LOOP / \channels
456 function ff_mlp_pack_output_outoforder_\channels\()ch_\shift\()shift_armv6, export=1
457 .if SAMPLES_PER_LOOP > 1
458 tst COUNT, #SAMPLES_PER_LOOP - 1 // always seems to be in practice
460 bne X(ff_mlp_pack_output) // but just in case, branch to C implementation if not
465 push {v1-v6,sl,fp,lr}
466 ldr CHAN0, [sp, #(9+0)*4] // get ch_assign from stack
469 uxtb CHAN0, CHAN4, ror #0
470 uxtb CHAN1, CHAN4, ror #8
472 ldr CHAN5, [CHAN0, #4]
474 uxtb CHAN0, CHAN4, ror #0
475 uxtb CHAN1, CHAN4, ror #8
476 uxtb CHAN2, CHAN4, ror #16
477 uxtb CHAN3, CHAN4, ror #24
478 uxtb CHAN4, CHAN5, ror #0
479 uxtb CHAN5, CHAN5, ror #8
485 .rept WORDS_PER_LOOP / 4
488 subs COUNT, COUNT, #SAMPLES_PER_LOOP
510 #endif // !CONFIG_THUMB
514 .endm // implement_pack
516 .macro pack_channels inorder, channels
517 implement_pack \inorder, \channels, 0
518 implement_pack \inorder, \channels, 1
519 implement_pack \inorder, \channels, 2
520 implement_pack \inorder, \channels, 3
521 implement_pack \inorder, \channels, 4
522 implement_pack \inorder, \channels, 5
523 implement_pack \inorder, \channels, mixed
526 .macro pack_order inorder
527 pack_channels \inorder, 2
528 pack_channels \inorder, 6
529 pack_channels \inorder, 8