/*
* Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
*
- * This file is part of FFmpeg.
+ * This file is part of Libav.
*
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#include "asm.S"
-
- .text
+#include "libavutil/arm/asm.S"
.macro call_2x_pixels type, subp
function ff_\type\()_pixels16\subp\()_armv6, export=1
add r0, r0, #8
add r1, r1, #8
b ff_\type\()_pixels8\subp\()_armv6
-.endfunc
+endfunc
.endm
call_2x_pixels avg
ldr r5, [r1, #4]
ldr r6, [r1, #8]
ldr r7, [r1, #12]
- ldr r4, [r1], r2
+ ldr_post r4, r1, r2
strd r6, r7, [r0, #8]
ldr r9, [r1, #4]
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
ldr r10, [r1, #8]
ldr r11, [r1, #12]
- ldr r8, [r1], r2
+ ldr_post r8, r1, r2
strd r10, r11, [r0, #8]
subs r3, r3, #2
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_armv6, export=1
push {r4-r7}
1:
ldr r5, [r1, #4]
- ldr r4, [r1], r2
+ ldr_post r4, r1, r2
ldr r7, [r1, #4]
- strd r4, r5, [r0], r2
- ldr r6, [r1], r2
+ strd_post r4, r5, r0, r2
+ ldr_post r6, r1, r2
subs r3, r3, #2
- strd r6, r7, [r0], r2
+ strd_post r6, r7, r0, r2
bne 1b
pop {r4-r7}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_x2_armv6, export=1
push {r4-r11, lr}
ldr r5, [r1, #4]
ldr r7, [r1, #5]
lsr r6, r4, #8
- ldr r8, [r1, r2]!
+ ldr_pre r8, r1, r2
orr r6, r6, r5, lsl #24
ldr r9, [r1, #4]
ldr r11, [r1, #5]
uhadd8 r9, r9, r11
and r6, r6, r12
uadd8 r8, r8, r14
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
uadd8 r9, r9, r6
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
bne 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
function ff_put_pixels8_y2_armv6, export=1
push {r4-r11}
orr r12, r12, r12, lsl #16
ldr r4, [r1]
ldr r5, [r1, #4]
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r9, r5, r7
eor r11, r5, r7
and r10, r10, r12
- ldr r4, [r1, r2]!
+ ldr_pre r4, r1, r2
uadd8 r8, r8, r10
and r11, r11, r12
uadd8 r9, r9, r11
eor r7, r5, r7
uadd8 r10, r10, r6
and r7, r7, r12
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
uadd8 r11, r11, r7
- strd r8, r9, [r0], r2
+ strd_post r8, r9, r0, r2
ldr r7, [r1, #4]
- strd r10, r11, [r0], r2
+ strd_post r10, r11, r0, r2
bne 1b
pop {r4-r11}
bx lr
-.endfunc
+endfunc
function ff_put_pixels8_x2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
ldr r7, [r1, #5]
- ldr r8, [r1, r2]!
+ ldr_pre r8, r1, r2
ldr r9, [r1, #4]
ldr r14, [r1, #5]
add r1, r1, r2
bne 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_put_pixels8_y2_no_rnd_armv6, export=1
push {r4-r9, lr}
ldr r4, [r1]
ldr r5, [r1, #4]
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
ldr r7, [r1, #4]
1:
subs r3, r3, #2
uhadd8 r8, r4, r6
- ldr r4, [r1, r2]!
+ ldr_pre r4, r1, r2
uhadd8 r9, r5, r7
ldr r5, [r1, #4]
uhadd8 r12, r4, r6
- ldr r6, [r1, r2]!
+ ldr_pre r6, r1, r2
uhadd8 r14, r5, r7
ldr r7, [r1, #4]
stm r0, {r8,r9}
bne 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_avg_pixels8_armv6, export=1
pld [r1, r2]
orr lr, lr, lr, lsl #16
ldrd r4, r5, [r0]
ldr r10, [r1, #4]
- ldr r9, [r1], r2
+ ldr_post r9, r1, r2
subs r3, r3, #2
1:
pld [r1, r2]
eor r8, r4, r9
uhadd8 r4, r4, r9
eor r12, r5, r10
- ldrd r6, r7, [r0, r2]
+ ldrd_reg r6, r7, r0, r2
uhadd8 r5, r5, r10
and r8, r8, lr
ldr r10, [r1, #4]
and r12, r12, lr
uadd8 r4, r4, r8
- ldr r9, [r1], r2
+ ldr_post r9, r1, r2
eor r8, r6, r9
uadd8 r5, r5, r12
pld [r1, r2, lsl #1]
eor r12, r7, r10
uhadd8 r6, r6, r9
- strd r4, r5, [r0], r2
+ strd_post r4, r5, r0, r2
uhadd8 r7, r7, r10
beq 2f
and r8, r8, lr
- ldrd r4, r5, [r0, r2]
+ ldrd_reg r4, r5, r0, r2
uadd8 r6, r6, r8
ldr r10, [r1, #4]
and r12, r12, lr
subs r3, r3, #2
uadd8 r7, r7, r12
- ldr r9, [r1], r2
- strd r6, r7, [r0], r2
+ ldr_post r9, r1, r2
+ strd_post r6, r7, r0, r2
b 1b
2:
and r8, r8, lr
and r12, r12, lr
uadd8 r6, r6, r8
uadd8 r7, r7, r12
- strd r6, r7, [r0], r2
+ strd_post r6, r7, r0, r2
pop {r4-r10, pc}
-.endfunc
+endfunc
function ff_add_pixels_clamped_armv6, export=1
push {r4-r8,lr}
orr r6, r8, r5, lsl #8
orr r7, r4, lr, lsl #8
subs r3, r3, #1
- strd r6, r7, [r1], r2
+ strd_post r6, r7, r1, r2
bgt 1b
pop {r4-r8,pc}
-.endfunc
+endfunc
function ff_get_pixels_armv6, export=1
pld [r1, r2]
push {r4-r8, lr}
mov lr, #8
1:
- ldrd r4, r5, [r1], r2
+ ldrd_post r4, r5, r1, r2
subs lr, lr, #1
uxtb16 r6, r4
uxtb16 r4, r4, ror #8
bgt 1b
pop {r4-r8, pc}
-.endfunc
+endfunc
function ff_diff_pixels_armv6, export=1
pld [r1, r3]
push {r4-r9, lr}
mov lr, #8
1:
- ldrd r4, r5, [r1], r3
- ldrd r6, r7, [r2], r3
+ ldrd_post r4, r5, r1, r3
+ ldrd_post r6, r7, r2, r3
uxtb16 r8, r4
uxtb16 r4, r4, ror #8
uxtb16 r9, r6
bgt 1b
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_pix_abs16_armv6, export=1
ldr r0, [sp]
2:
add r0, r12, lr
pop {r4-r9, pc}
-.endfunc
+endfunc
function ff_pix_abs16_x2_armv6, export=1
ldr r12, [sp]
bgt 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
.macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
ldr \n0, [r2]
bgt 1b
pop {r4-r11, pc}
-.endfunc
+endfunc
function ff_pix_abs8_armv6, export=1
pld [r2, r3]
push {r4-r9, lr}
mov r0, #0
mov lr, #0
- ldrd r4, r5, [r1], r3
+ ldrd_post r4, r5, r1, r3
1:
subs r12, r12, #2
ldr r7, [r2, #4]
- ldr r6, [r2], r3
- ldrd r8, r9, [r1], r3
+ ldr_post r6, r2, r3
+ ldrd_post r8, r9, r1, r3
usada8 r0, r4, r6, r0
pld [r2, r3]
usada8 lr, r5, r7, lr
ldr r7, [r2, #4]
- ldr r6, [r2], r3
+ ldr_post r6, r2, r3
beq 2f
- ldrd r4, r5, [r1], r3
+ ldrd_post r4, r5, r1, r3
usada8 r0, r8, r6, r0
pld [r2, r3]
usada8 lr, r9, r7, lr
usada8 lr, r9, r7, lr
add r0, r0, lr
pop {r4-r9, pc}
-.endfunc
+endfunc
+
+function ff_sse16_armv6, export=1
+ ldr r12, [sp]
+ push {r4-r9, lr}
+ mov r0, #0
+1:
+ ldrd r4, r5, [r1]
+ ldr r8, [r2]
+ uxtb16 lr, r4
+ uxtb16 r4, r4, ror #8
+ uxtb16 r9, r8
+ uxtb16 r8, r8, ror #8
+ ldr r7, [r2, #4]
+ usub16 lr, lr, r9
+ usub16 r4, r4, r8
+ smlad r0, lr, lr, r0
+ uxtb16 r6, r5
+ uxtb16 lr, r5, ror #8
+ uxtb16 r8, r7
+ uxtb16 r9, r7, ror #8
+ smlad r0, r4, r4, r0
+ ldrd r4, r5, [r1, #8]
+ usub16 r6, r6, r8
+ usub16 r8, lr, r9
+ ldr r7, [r2, #8]
+ smlad r0, r6, r6, r0
+ uxtb16 lr, r4
+ uxtb16 r4, r4, ror #8
+ uxtb16 r9, r7
+ uxtb16 r7, r7, ror #8
+ smlad r0, r8, r8, r0
+ ldr r8, [r2, #12]
+ usub16 lr, lr, r9
+ usub16 r4, r4, r7
+ smlad r0, lr, lr, r0
+ uxtb16 r6, r5
+ uxtb16 r5, r5, ror #8
+ uxtb16 r9, r8
+ uxtb16 r8, r8, ror #8
+ smlad r0, r4, r4, r0
+ usub16 r6, r6, r9
+ usub16 r5, r5, r8
+ smlad r0, r6, r6, r0
+ add r1, r1, r3
+ add r2, r2, r3
+ subs r12, r12, #1
+ smlad r0, r5, r5, r0
+ bgt 1b
+
+ pop {r4-r9, pc}
+endfunc
+
+function ff_pix_norm1_armv6, export=1
+ push {r4-r6, lr}
+ mov r12, #16
+ mov lr, #0
+1:
+ ldm r0, {r2-r5}
+ uxtb16 r6, r2
+ uxtb16 r2, r2, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r3
+ smlad lr, r2, r2, lr
+ uxtb16 r3, r3, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r4
+ smlad lr, r3, r3, lr
+ uxtb16 r4, r4, ror #8
+ smlad lr, r6, r6, lr
+ uxtb16 r6, r5
+ smlad lr, r4, r4, lr
+ uxtb16 r5, r5, ror #8
+ smlad lr, r6, r6, lr
+ subs r12, r12, #1
+ add r0, r0, r1
+ smlad lr, r5, r5, lr
+ bgt 1b
+
+ mov r0, lr
+ pop {r4-r6, pc}
+endfunc
+
+function ff_pix_sum_armv6, export=1
+ push {r4-r7, lr}
+ mov r12, #16
+ mov r2, #0
+ mov r3, #0
+ mov lr, #0
+ ldr r4, [r0]
+1:
+ subs r12, r12, #1
+ ldr r5, [r0, #4]
+ usada8 r2, r4, lr, r2
+ ldr r6, [r0, #8]
+ usada8 r3, r5, lr, r3
+ ldr r7, [r0, #12]
+ usada8 r2, r6, lr, r2
+ beq 2f
+ ldr_pre r4, r0, r1
+ usada8 r3, r7, lr, r3
+ bgt 1b
+2:
+ usada8 r3, r7, lr, r3
+ add r0, r2, r3
+ pop {r4-r7, pc}
+endfunc