1 /*****************************************************************************
2 * arm_neon.c: NEON assembly optimized audio conversions
3 *****************************************************************************
4 * Copyright (C) 2009 RĂ©mi Denis-Courmont
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19 *****************************************************************************/
25 #include <vlc_common.h>
26 #include <vlc_plugin.h>
30 static int Open (vlc_object_t *);
33 set_description (N_("ARM NEON audio format conversions") )
34 set_capability ("audio filter", 20)
35 set_callbacks (Open, NULL)
36 add_requirement (NEON)
39 static void Do_F32_S32 (aout_instance_t *, aout_filter_t *,
40 aout_buffer_t *, aout_buffer_t *);
41 static void Do_S32_S16 (aout_instance_t *, aout_filter_t *,
42 aout_buffer_t *, aout_buffer_t *);
44 static int Open (vlc_object_t *obj)
46 aout_filter_t *filter = (aout_filter_t *)obj;
48 if (!AOUT_FMTS_SIMILAR (&filter->input, &filter->output))
51 switch (filter->input.i_format)
54 switch (filter->output.i_format)
57 filter->pf_do_work = Do_F32_S32;
65 switch (filter->output.i_format)
68 filter->pf_do_work = Do_S32_S16;
78 filter->b_in_place = true;
83 * Half-precision floating point to signed fixed point conversion.
85 static void Do_F32_S32 (aout_instance_t *aout, aout_filter_t *filter,
86 aout_buffer_t *inbuf, aout_buffer_t *outbuf)
88 unsigned nb_samples = inbuf->i_nb_samples
89 * aout_FormatNbChannels (&filter->input);
90 const float *inp = (float *)inbuf->p_buffer;
91 const float *endp = inp + nb_samples;
92 int32_t *outp = (int32_t *)outbuf->p_buffer;
97 "vldr.32 s0, [%[inp]]\n"
98 "vcvt.s32.f32 d0, d0, #28\n"
99 "vstr.32 s0, [%[outp]]\n"
101 : [outp] "r" (outp), [inp] "r" (inp)
109 "vld1.f32 {d0}, [%[inp]]!\n"
110 "vcvt.s32.f32 d0, d0, #28\n"
111 "vst1.s32 {d0}, [%[outp]]!\n"
112 : [outp] "+r" (outp), [inp] "+r" (inp)
118 "vld1.f32 {q0}, [%[inp]]!\n"
119 "vcvt.s32.f32 q0, q0, #28\n"
120 "vst1.s32 {q0}, [%[outp]]!\n"
121 : [outp] "+r" (outp), [inp] "+r" (inp)
127 "vld1.f32 {q0-q1}, [%[inp]]!\n"
128 "vcvt.s32.f32 q0, q0, #28\n"
129 "vcvt.s32.f32 q1, q1, #28\n"
130 "vst1.s32 {q0-q1}, [%[outp]]!\n"
131 : [outp] "+r" (outp), [inp] "+r" (inp)
133 : "q0", "q1", "memory");
135 outbuf->i_nb_samples = inbuf->i_nb_samples;
136 outbuf->i_nb_bytes = inbuf->i_nb_bytes;
141 * Signed 32-bits fixed point to signed 16-bits integer
143 static void Do_S32_S16 (aout_instance_t *aout, aout_filter_t *filter,
144 aout_buffer_t *inbuf, aout_buffer_t *outbuf)
146 unsigned nb_samples = inbuf->i_nb_samples
147 * aout_FormatNbChannels (&filter->input);
148 int32_t *inp = (int32_t *)inbuf->p_buffer;
149 const int32_t *endp = inp + nb_samples;
150 int16_t *outp = (int16_t *)outbuf->p_buffer;
152 while (nb_samples & 3)
154 const int16_t roundup = 1 << 12;
156 "qadd r0, %[inv], %[roundup]\n"
157 "ssat %[outv], #16, r0, asr #13\n"
158 : [outv] "=r" (*outp)
159 : [inv] "r" (*inp), [roundup] "r" (roundup)
168 "vld1.s32 {q0}, [%[inp]]!\n"
169 "vrshrn.i32 d0, q0, #13\n"
170 "vst1.s16 {d0}, [%[outp]]!\n"
171 : [outp] "+r" (outp), [inp] "+r" (inp)
177 "vld1.s32 {q0-q1}, [%[inp]]!\n"
178 "vrshrn.i32 d0, q0, #13\n"
179 "vrshrn.i32 d1, q1, #13\n"
180 "vst1.s16 {q0}, [%[outp]]!\n"
181 : [outp] "+r" (outp), [inp] "+r" (inp)
183 : "q0", "q1", "memory");
187 "vld1.s32 {q0-q1}, [%[inp]]!\n"
188 "vld1.s32 {q2-q3}, [%[inp]]!\n"
189 "vrshrn.s32 d0, q0, #13\n"
190 "vrshrn.s32 d1, q1, #13\n"
191 "vrshrn.s32 d2, q2, #13\n"
192 "vrshrn.s32 d3, q3, #13\n"
193 "vst1.s16 {q0-q1}, [%[outp]]!\n"
194 : [outp] "+r" (outp), [inp] "+r" (inp)
196 : "q0", "q1", "q2", "q3", "memory");
198 outbuf->i_nb_samples = inbuf->i_nb_samples;
199 outbuf->i_nb_bytes = inbuf->i_nb_bytes / 2;