]> git.sesse.net Git - ffmpeg/blob - libavutil/arm/float_dsp_neon.S
mathematics.h: remove a couple of math defines
[ffmpeg] / libavutil / arm / float_dsp_neon.S
1 /*
2  * ARM NEON optimised Float DSP functions
3  * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
4  *
5  * This file is part of Libav.
6  *
7  * Libav is free software; you can redistribute it and/or
8  * modify it under the terms of the GNU Lesser General Public
9  * License as published by the Free Software Foundation; either
10  * version 2.1 of the License, or (at your option) any later version.
11  *
12  * Libav is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * Lesser General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with Libav; if not, write to the Free Software
19  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20  */
21
22 #include "config.h"
23 #include "asm.S"
24
25         preserve8
26
27 function ff_vector_fmul_neon, export=1
28         subs            r3,  r3,  #8
29         vld1.32         {d0-d3},  [r1,:128]!
30         vld1.32         {d4-d7},  [r2,:128]!
31         vmul.f32        q8,  q0,  q2
32         vmul.f32        q9,  q1,  q3
33         beq             3f
34         bics            ip,  r3,  #15
35         beq             2f
36 1:      subs            ip,  ip,  #16
37         vld1.32         {d0-d1},  [r1,:128]!
38         vld1.32         {d4-d5},  [r2,:128]!
39         vmul.f32        q10, q0,  q2
40         vld1.32         {d2-d3},  [r1,:128]!
41         vld1.32         {d6-d7},  [r2,:128]!
42         vmul.f32        q11, q1,  q3
43         vst1.32         {d16-d19},[r0,:128]!
44         vld1.32         {d0-d1},  [r1,:128]!
45         vld1.32         {d4-d5},  [r2,:128]!
46         vmul.f32        q8,  q0,  q2
47         vld1.32         {d2-d3},  [r1,:128]!
48         vld1.32         {d6-d7},  [r2,:128]!
49         vmul.f32        q9,  q1,  q3
50         vst1.32         {d20-d23},[r0,:128]!
51         bne             1b
52         ands            r3,  r3,  #15
53         beq             3f
54 2:      vld1.32         {d0-d1},  [r1,:128]!
55         vld1.32         {d4-d5},  [r2,:128]!
56         vst1.32         {d16-d17},[r0,:128]!
57         vmul.f32        q8,  q0,  q2
58         vld1.32         {d2-d3},  [r1,:128]!
59         vld1.32         {d6-d7},  [r2,:128]!
60         vst1.32         {d18-d19},[r0,:128]!
61         vmul.f32        q9,  q1,  q3
62 3:      vst1.32         {d16-d19},[r0,:128]!
63         bx              lr
64 endfunc
65
66 function ff_vector_fmac_scalar_neon, export=1
67 VFP     len .req r2
68 VFP     acc .req r3
69 NOVFP   len .req r3
70 NOVFP   acc .req r2
71 VFP     vdup.32         q15, d0[0]
72 NOVFP   vdup.32         q15, r2
73         bics            r12, len, #15
74         mov             acc, r0
75         beq             3f
76         vld1.32         {q0},     [r1,:128]!
77         vld1.32         {q8},     [acc,:128]!
78         vld1.32         {q1},     [r1,:128]!
79         vld1.32         {q9},     [acc,:128]!
80 1:      vmla.f32        q8,  q0,  q15
81         vld1.32         {q2},     [r1,:128]!
82         vld1.32         {q10},    [acc,:128]!
83         vmla.f32        q9,  q1,  q15
84         vld1.32         {q3},     [r1,:128]!
85         vld1.32         {q11},    [acc,:128]!
86         vmla.f32        q10, q2,  q15
87         vst1.32         {q8},     [r0,:128]!
88         vmla.f32        q11, q3,  q15
89         vst1.32         {q9},     [r0,:128]!
90         subs            r12, r12, #16
91         beq             2f
92         vld1.32         {q0},     [r1,:128]!
93         vld1.32         {q8},     [acc,:128]!
94         vst1.32         {q10},    [r0,:128]!
95         vld1.32         {q1},     [r1,:128]!
96         vld1.32         {q9},     [acc,:128]!
97         vst1.32         {q11},    [r0,:128]!
98         b               1b
99 2:      vst1.32         {q10},    [r0,:128]!
100         vst1.32         {q11},    [r0,:128]!
101         ands            len, len, #15
102         it              eq
103         bxeq            lr
104 3:      vld1.32         {q0},     [r1,:128]!
105         vld1.32         {q8},     [acc,:128]!
106         vmla.f32        q8,  q0,  q15
107         vst1.32         {q8},     [r0,:128]!
108         subs            len, len, #4
109         bgt             3b
110         bx              lr
111         .unreq          len
112 endfunc