2 * ARM NEON optimised Float DSP functions
3 * Copyright (c) 2008 Mans Rullgard <mans@mansr.com>
5 * This file is part of Libav.
7 * Libav is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU Lesser General Public
9 * License as published by the Free Software Foundation; either
10 * version 2.1 of the License, or (at your option) any later version.
12 * Libav is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * Lesser General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with Libav; if not, write to the Free Software
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27 function ff_vector_fmul_neon, export=1
29 vld1.32 {d0-d3}, [r1,:128]!
30 vld1.32 {d4-d7}, [r2,:128]!
37 vld1.32 {d0-d1}, [r1,:128]!
38 vld1.32 {d4-d5}, [r2,:128]!
40 vld1.32 {d2-d3}, [r1,:128]!
41 vld1.32 {d6-d7}, [r2,:128]!
43 vst1.32 {d16-d19},[r0,:128]!
44 vld1.32 {d0-d1}, [r1,:128]!
45 vld1.32 {d4-d5}, [r2,:128]!
47 vld1.32 {d2-d3}, [r1,:128]!
48 vld1.32 {d6-d7}, [r2,:128]!
50 vst1.32 {d20-d23},[r0,:128]!
54 2: vld1.32 {d0-d1}, [r1,:128]!
55 vld1.32 {d4-d5}, [r2,:128]!
56 vst1.32 {d16-d17},[r0,:128]!
58 vld1.32 {d2-d3}, [r1,:128]!
59 vld1.32 {d6-d7}, [r2,:128]!
60 vst1.32 {d18-d19},[r0,:128]!
62 3: vst1.32 {d16-d19},[r0,:128]!
66 function ff_vector_fmac_scalar_neon, export=1
71 VFP vdup.32 q15, d0[0]
76 vld1.32 {q0}, [r1,:128]!
77 vld1.32 {q8}, [acc,:128]!
78 vld1.32 {q1}, [r1,:128]!
79 vld1.32 {q9}, [acc,:128]!
80 1: vmla.f32 q8, q0, q15
81 vld1.32 {q2}, [r1,:128]!
82 vld1.32 {q10}, [acc,:128]!
84 vld1.32 {q3}, [r1,:128]!
85 vld1.32 {q11}, [acc,:128]!
87 vst1.32 {q8}, [r0,:128]!
89 vst1.32 {q9}, [r0,:128]!
92 vld1.32 {q0}, [r1,:128]!
93 vld1.32 {q8}, [acc,:128]!
94 vst1.32 {q10}, [r0,:128]!
95 vld1.32 {q1}, [r1,:128]!
96 vld1.32 {q9}, [acc,:128]!
97 vst1.32 {q11}, [r0,:128]!
99 2: vst1.32 {q10}, [r0,:128]!
100 vst1.32 {q11}, [r0,:128]!
104 3: vld1.32 {q0}, [r1,:128]!
105 vld1.32 {q8}, [acc,:128]!
107 vst1.32 {q8}, [r0,:128]!