]> git.sesse.net Git - vlc/blob - modules/arm_neon/amplify.S
enable the macosx GUI to handle negative stop-time
[vlc] / modules / arm_neon / amplify.S
1  @*****************************************************************************
2  @ amplify.S : ARM NEON software amplification
3  @*****************************************************************************
4  @ Copyright (C) 2012 RĂ©mi Denis-Courmont
5  @
6  @ This program is free software; you can redistribute it and/or modify
7  @ it under the terms of the GNU Lesser General Public License as published by
8  @ the Free Software Foundation; either version 2.1 of the License, or
9  @ (at your option) any later version.
10  @
11  @ This program is distributed in the hope that it will be useful,
12  @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13  @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  @ GNU Lesser General Public License for more details.
15  @
16  @ You should have received a copy of the GNU Lesser General Public License
17  @ along with this program; if not, write to the Free Software Foundation,
18  @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19  @****************************************************************************/
20
21         .syntax unified
22         .arm
23         .fpu    neon
24         .text
25
26 #define DST     r0
27 #define SRC     r1
28 #define SIZE    r2
29         .align 2
30         .global amplify_float_arm_neon
31         .type   amplify_float_arm_neon, %function
32 amplify_float_arm_neon:
33         cmp             SIZE,   #0
34         bxeq            lr
35 #ifdef __ARM_PCS
36         vmov            s0,     r3      @ softfp
37 #endif
38         pld             [SRC,   #64]
39         vld1.f32        {d16-d17},      [SRC,:128]!
40         subs            SIZE,   SIZE,   #16
41         vmul.f32        d16,    d16,    d0[0]
42         vmul.f32        d17,    d17,    d0[0]
43         blo             5f
44         pld             [SRC,   #64]
45         vld1.f32        {d18-d19},      [SRC,:128]!
46         subs            SIZE,   SIZE,   #16
47         vmul.f32        d18,    d18,    d0[0]
48         vmul.f32        d19,    d19,    d0[0]
49         blo             2f
50 1:      @ main loop starts
51         pld             [SRC,   #64]
52         vld1.f32        {d20-d21},      [SRC,:128]!
53         subs            SIZE,   SIZE,   #16
54         vmul.f32        d20,    d20,    d0[0]
55         vmul.f32        d21,    d21,    d0[0]
56         vst1.f32        {d16-d17},      [DST,:128]!
57         blo             3f
58         pld             [SRC,   #64]
59         vld1.f32        {d16-d17},      [SRC,:128]!
60         subs            SIZE,   SIZE,   #16
61         vmul.f32        d16,    d16,    d0[0]
62         vmul.f32        d17,    d17,    d0[0]
63         vst1.f32        {d18-d19},      [DST,:128]!
64         blo             4f
65         pld             [SRC,   #64]
66         vld1.f32        {d18-d19},      [SRC,:128]!
67         subs            SIZE,   SIZE,   #16
68         vmul.f32        d18,    d18,    d0[0]
69         vmul.f32        d19,    d19,    d0[0]
70         vst1.f32        {d20-d21},      [DST,:128]!
71         bhi             1b
72         @ main loop ends
73 2:      vst1.f32        {d16-d17},      [DST,:128]!
74         vst1.f32        {d18-d19},      [DST,:128]!
75         bx              lr
76 3:      vst1.f32        {d18-d19},      [DST,:128]!
77         vst1.f32        {d20-d21},      [DST,:128]!
78         bx              lr
79 4:      vst1.f32        {d20-d21},      [DST,:128]!
80 5:      vst1.f32        {d16-d17},      [DST,:128]!
81         bx              lr