1 /*****************************************************************************
2 * arm_neon.c: NEON assembly optimized audio conversions
3 *****************************************************************************
4 * Copyright (C) 2009 RĂ©mi Denis-Courmont
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19 *****************************************************************************/
25 #include <vlc_common.h>
26 #include <vlc_plugin.h>
29 static int Open (vlc_object_t *);
32 set_description (N_("ARM NEON audio format conversions") )
33 set_capability ("audio filter", 20)
34 set_callbacks (Open, NULL)
35 add_requirement (NEON)
38 static void Do_F32_S32 (aout_instance_t *, aout_filter_t *,
39 aout_buffer_t *, aout_buffer_t *);
40 static void Do_S32_S16 (aout_instance_t *, aout_filter_t *,
41 aout_buffer_t *, aout_buffer_t *);
43 static int Open (vlc_object_t *obj)
45 aout_filter_t *filter = (aout_filter_t *)obj;
47 if (!AOUT_FMTS_SIMILAR (&filter->input, &filter->output))
50 switch (filter->input.i_format)
53 switch (filter->output.i_format)
56 filter->pf_do_work = Do_F32_S32;
64 switch (filter->output.i_format)
67 filter->pf_do_work = Do_S32_S16;
77 filter->b_in_place = true;
82 * Half-precision floating point to signed fixed point conversion.
84 static void Do_F32_S32 (aout_instance_t *aout, aout_filter_t *filter,
85 aout_buffer_t *inbuf, aout_buffer_t *outbuf)
87 unsigned nb_samples = inbuf->i_nb_samples
88 * aout_FormatNbChannels (&filter->input);
89 const float *inp = (float *)inbuf->p_buffer;
90 const float *endp = inp + nb_samples;
91 int32_t *outp = (int32_t *)outbuf->p_buffer;
96 "vldr.32 s0, [%[inp]]\n"
97 "vcvt.s32.f32 d0, d0, #28\n"
98 "vstr.32 s0, [%[outp]]\n"
100 : [outp] "r" (outp), [inp] "r" (inp)
108 "vld1.f32 {d0}, [%[inp]]!\n"
109 "vcvt.s32.f32 d0, d0, #28\n"
110 "vst1.s32 {d0}, [%[outp]]!\n"
111 : [outp] "+r" (outp), [inp] "+r" (inp)
117 "vld1.f32 {q0}, [%[inp]]!\n"
118 "vcvt.s32.f32 q0, q0, #28\n"
119 "vst1.s32 {q0}, [%[outp]]!\n"
120 : [outp] "+r" (outp), [inp] "+r" (inp)
126 "vld1.f32 {q0-q1}, [%[inp]]!\n"
127 "vcvt.s32.f32 q0, q0, #28\n"
128 "vcvt.s32.f32 q1, q1, #28\n"
129 "vst1.s32 {q0-q1}, [%[outp]]!\n"
130 : [outp] "+r" (outp), [inp] "+r" (inp)
132 : "q0", "q1", "memory");
134 outbuf->i_nb_samples = inbuf->i_nb_samples;
135 outbuf->i_nb_bytes = inbuf->i_nb_bytes;
140 * Signed 32-bits fixed point to signed 16-bits integer
142 static void Do_S32_S16 (aout_instance_t *aout, aout_filter_t *filter,
143 aout_buffer_t *inbuf, aout_buffer_t *outbuf)
145 unsigned nb_samples = inbuf->i_nb_samples
146 * aout_FormatNbChannels (&filter->input);
147 int32_t *inp = (int32_t *)inbuf->p_buffer;
148 const int32_t *endp = inp + nb_samples;
149 int16_t *outp = (int16_t *)outbuf->p_buffer;
151 while (nb_samples & 3)
153 const int16_t roundup = 1 << 12;
155 "qadd r0, %[inv], %[roundup]\n"
156 "ssat %[outv], #16, r0, asr #13\n"
157 : [outv] "=r" (*outp)
158 : [inv] "r" (*inp), [roundup] "r" (roundup)
167 "vld1.s32 {q0}, [%[inp]]!\n"
168 "vrshrn.i32 d0, q0, #13\n"
169 "vst1.s16 {d0}, [%[outp]]!\n"
170 : [outp] "+r" (outp), [inp] "+r" (inp)
176 "vld1.s32 {q0-q1}, [%[inp]]!\n"
177 "vrshrn.i32 d0, q0, #13\n"
178 "vrshrn.i32 d1, q1, #13\n"
179 "vst1.s16 {q0}, [%[outp]]!\n"
180 : [outp] "+r" (outp), [inp] "+r" (inp)
182 : "q0", "q1", "memory");
186 "vld1.s32 {q0-q1}, [%[inp]]!\n"
187 "vld1.s32 {q2-q3}, [%[inp]]!\n"
188 "vrshrn.s32 d0, q0, #13\n"
189 "vrshrn.s32 d1, q1, #13\n"
190 "vrshrn.s32 d2, q2, #13\n"
191 "vrshrn.s32 d3, q3, #13\n"
192 "vst1.s16 {q0-q1}, [%[outp]]!\n"
193 : [outp] "+r" (outp), [inp] "+r" (inp)
195 : "q0", "q1", "q2", "q3", "memory");
197 outbuf->i_nb_samples = inbuf->i_nb_samples;
198 outbuf->i_nb_bytes = inbuf->i_nb_bytes / 2;