2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 .macro call_2x_pixels type, subp
24 function ff_\type\()_pixels16\subp\()_armv6, export=1
26 bl ff_\type\()_pixels8\subp\()_armv6
30 b ff_\type\()_pixels8\subp\()_armv6
35 call_2x_pixels put, _x2
36 call_2x_pixels put, _y2
37 call_2x_pixels put, _x2_no_rnd
38 call_2x_pixels put, _y2_no_rnd
40 function ff_put_pixels16_armv6, export=1
49 strd_post r4, r5, r0, r2
53 strd r10, r11, [r0, #8]
55 strd_post r8, r9, r0, r2
62 function ff_put_pixels8_armv6, export=1
68 strd_post r4, r5, r0, r2
71 strd_post r6, r7, r0, r2
78 function ff_put_pixels8_x2_armv6, export=1
81 orr r12, r12, r12, lsl #8
82 orr r12, r12, r12, lsl #16
90 orr r6, r6, r5, lsl #24
95 orr r10, r10, r9, lsl #24
111 strd_post r4, r5, r0, r2
113 strd_post r8, r9, r0, r2
119 function ff_put_pixels8_y2_armv6, export=1
122 orr r12, r12, r12, lsl #8
123 orr r12, r12, r12, lsl #16
149 strd_post r8, r9, r0, r2
151 strd_post r10, r11, r0, r2
158 function ff_put_pixels8_x2_no_rnd_armv6, export=1
170 orr r6, r6, r5, lsl #24
172 orr r12, r12, r9, lsl #24
186 function ff_put_pixels8_y2_no_rnd_armv6, export=1
211 function ff_avg_pixels8_armv6, export=1
215 orr lr, lr, lr, lsl #8
216 orr lr, lr, lr, lsl #16
226 ldrd_reg r6, r7, r0, r2
238 strd_post r4, r5, r0, r2
242 ldrd_reg r4, r5, r0, r2
249 strd_post r6, r7, r0, r2
256 strd_post r6, r7, r0, r2
261 function ff_add_pixels_clamped_armv6, export=1
265 ldm r0!, {r4,r5,r12,lr}
267 pkhbt r8, r4, r5, lsl #16
268 pkhtb r5, r5, r4, asr #16
269 pkhbt r4, r12, lr, lsl #16
270 pkhtb lr, lr, r12, asr #16
273 uxtab16 r5, r5, r6, ror #8
275 uxtab16 lr, lr, r7, ror #8
280 orr r6, r8, r5, lsl #8
281 orr r7, r4, lr, lsl #8
283 strd_post r6, r7, r1, r2
288 function ff_get_pixels_armv6, export=1
293 ldrd_post r4, r5, r1, r2
296 uxtb16 r4, r4, ror #8
298 uxtb16 r8, r5, ror #8
300 pkhbt r5, r6, r4, lsl #16
301 pkhtb r6, r4, r6, asr #16
302 pkhbt r7, r12, r8, lsl #16
303 pkhtb r12, r8, r12, asr #16
304 stm r0!, {r5,r6,r7,r12}
310 function ff_diff_pixels_armv6, export=1
316 ldrd_post r4, r5, r1, r3
317 ldrd_post r6, r7, r2, r3
319 uxtb16 r4, r4, ror #8
321 uxtb16 r6, r6, ror #8
326 uxtb16 r5, r5, ror #8
328 pkhbt r4, r9, r6, lsl #16
329 pkhtb r6, r6, r9, asr #16
331 uxtb16 r7, r7, ror #8
335 pkhbt r8, r9, r5, lsl #16
336 pkhtb r9, r5, r9, asr #16
337 stm r0!, {r4,r6,r8,r9}
343 function ff_pix_abs16_armv6, export=1
353 usada8 r12, r4, r8, r12
356 usada8 lr, r5, r9, lr
358 usada8 r12, r6, r8, r12
360 usada8 lr, r7, r9, lr
372 function ff_pix_abs16_x2_armv6, export=1
377 orr lr, lr, lr, lsl #8
378 orr lr, lr, lr, lsl #16
385 orr r10, r10, r9, lsl #24
389 orr r6, r6, r5, lsl #24
393 usada8 r0, r4, r7, r0
401 orr r10, r10, r6, lsl #24
402 usada8 r0, r8, r4, r0
412 usada8 r0, r4, r8, r0
413 orr r10, r10, r7, lsl #24
421 usada8 r0, r5, r9, r0
427 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
435 usada8 r0, \p0, \n2, r0
443 usada8 r0, \p1, \p0, r0
451 usada8 r0, \p2, \p1, r0
458 usada8 r0, \p3, \p0, r0
462 function ff_pix_abs16_y2_armv6, export=1
469 orr lr, lr, lr, lsl #8
470 orr lr, lr, lr, lsl #16
477 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11
479 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7
485 function ff_pix_abs8_armv6, export=1
491 ldrd_post r4, r5, r1, r3
496 ldrd_post r8, r9, r1, r3
497 usada8 r0, r4, r6, r0
499 usada8 lr, r5, r7, lr
503 ldrd_post r4, r5, r1, r3
504 usada8 r0, r8, r6, r0
506 usada8 lr, r9, r7, lr
509 usada8 r0, r8, r6, r0
510 usada8 lr, r9, r7, lr
515 function ff_sse16_armv6, export=1
523 uxtb16 r4, r4, ror #8
525 uxtb16 r8, r8, ror #8
531 uxtb16 lr, r5, ror #8
533 uxtb16 r9, r7, ror #8
535 ldrd r4, r5, [r1, #8]
541 uxtb16 r4, r4, ror #8
543 uxtb16 r7, r7, ror #8
550 uxtb16 r5, r5, ror #8
552 uxtb16 r8, r8, ror #8
566 function ff_pix_norm1_armv6, export=1
573 uxtb16 r2, r2, ror #8
577 uxtb16 r3, r3, ror #8
581 uxtb16 r4, r4, ror #8
585 uxtb16 r5, r5, ror #8
596 function ff_pix_sum_armv6, export=1
606 usada8 r2, r4, lr, r2
608 usada8 r3, r5, lr, r3
610 usada8 r2, r6, lr, r2
613 usada8 r3, r7, lr, r3
616 usada8 r3, r7, lr, r3