@ ARMv4 optimized DSP utils
@ Copyright (c) 2004 AGAWA Koji <i (AT) atty (DOT) jp>
@
-@ This file is part of FFmpeg.
+@ This file is part of Libav.
@
-@ FFmpeg is free software; you can redistribute it and/or
+@ Libav is free software; you can redistribute it and/or
@ modify it under the terms of the GNU Lesser General Public
@ License as published by the Free Software Foundation; either
@ version 2.1 of the License, or (at your option) any later version.
@
-@ FFmpeg is distributed in the hope that it will be useful,
+@ Libav is distributed in the hope that it will be useful,
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
@ Lesser General Public License for more details.
@
@ You should have received a copy of the GNU Lesser General Public
-@ License along with FFmpeg; if not, write to the Free Software
+@ License along with Libav; if not, write to the Free Software
@ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
@
preserve8
-#if !HAVE_PLD
-.macro pld reg
-.endm
-#endif
-
#if HAVE_ARMV5TE
function ff_prefetch_arm, export=1
subs r2, r2, #1
add r0, r0, r1
bne ff_prefetch_arm
bx lr
- .endfunc
+endfunc
+#else
+#define pld @
#endif
.macro ALIGN_QWORD_D shift, Rd0, Rd1, Rd2, Rd3, Rn0, Rn1, Rn2, Rn3, Rn4
@ ----------------------------------------------------------------
.align 5
-function put_pixels16_arm, export=1
+function ff_put_pixels16_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 4b
pop {r4-r11,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
.align 5
-function put_pixels8_arm, export=1
+function ff_put_pixels8_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 4b
pop {r4-r5,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
.align 5
-function put_pixels8_x2_arm, export=1
+function ff_put_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
- .endfunc
+endfunc
.align 5
-function put_no_rnd_pixels8_x2_arm, export=1
+function ff_put_no_rnd_pixels8_x2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 4b
pop {r4-r10,pc}
- .endfunc
+endfunc
@ ----------------------------------------------------------------
.align 5
-function put_pixels8_y2_arm, export=1
+function ff_put_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
- .endfunc
+endfunc
.align 5
-function put_no_rnd_pixels8_y2_arm, export=1
+function ff_put_no_rnd_pixels8_y2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
add r0, r0, r2
bne 6b
pop {r4-r11,pc}
- .endfunc
+endfunc
.ltorg
and r9, r5, r14
and r10, r6, r14
and r11, r7, r14
+ it eq
andeq r14, r14, r14, \rnd #1
add r8, r8, r10
add r9, r9, r11
ldr r12, =0xfcfcfcfc >> 2
+ itt eq
addeq r8, r8, r14
addeq r9, r9, r14
and r4, r12, r4, lsr #2
.endm
.align 5
-function put_pixels8_xy2_arm, export=1
+function ff_put_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr} @ R14 is also called LR
JMP_ALIGN r5, r1
-1:
- RND_XY2_EXPAND 0, lsl
-
+1: RND_XY2_EXPAND 0, lsl
.align 5
-2:
- RND_XY2_EXPAND 1, lsl
-
+2: RND_XY2_EXPAND 1, lsl
.align 5
-3:
- RND_XY2_EXPAND 2, lsl
-
+3: RND_XY2_EXPAND 2, lsl
.align 5
-4:
- RND_XY2_EXPAND 3, lsl
- .endfunc
+4: RND_XY2_EXPAND 3, lsl
+endfunc
.align 5
-function put_no_rnd_pixels8_xy2_arm, export=1
+function ff_put_no_rnd_pixels8_xy2_arm, export=1
@ void func(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@ block = word aligned, pixles = unaligned
pld [r1]
push {r4-r11,lr}
JMP_ALIGN r5, r1
-1:
- RND_XY2_EXPAND 0, lsr
-
+1: RND_XY2_EXPAND 0, lsr
.align 5
-2:
- RND_XY2_EXPAND 1, lsr
-
+2: RND_XY2_EXPAND 1, lsr
.align 5
-3:
- RND_XY2_EXPAND 2, lsr
-
+3: RND_XY2_EXPAND 2, lsr
.align 5
-4:
- RND_XY2_EXPAND 3, lsr
- .endfunc
+4: RND_XY2_EXPAND 3, lsr
+endfunc
.align 5
-@ void ff_add_pixels_clamped_ARM(int16_t *block, uint8_t *dest, int stride)
-function ff_add_pixels_clamped_ARM, export=1
+@ void ff_add_pixels_clamped_arm(int16_t *block, uint8_t *dest, int stride)
+function ff_add_pixels_clamped_arm, export=1
push {r4-r10}
mov r10, #8
1:
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #4] /* moved form [A] */
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
ldr r4, [r1, #4] /* moved form [B] */
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
mov r9, r6
ldrsh r5, [r0, #12] /* moved from [D] */
mvn r5, r5
mvn r7, r7
tst r6, #0x100
+ it ne
movne r6, r5, lsr #24
tst r8, #0x100
+ it ne
movne r8, r7, lsr #24
orr r9, r9, r6, lsl #16
add r0, r0, #16 /* moved from [E] */
pop {r4-r10}
bx lr
- .endfunc
+endfunc