1 @*****************************************************************************
2 @ neon_s32_s16.S : ARM NEONv1 fi32 to s16n audio sample conversion
3 @*****************************************************************************
4 @ Copyright (C) 2009 RĂ©mi Denis-Courmont
6 @ This program is free software; you can redistribute it and/or modify
7 @ it under the terms of the GNU Lesser General Public License as published by
8 @ the Free Software Foundation; either version 2.1 of the License, or
9 @ (at your option) any later version.
11 @ This program is distributed in the hope that it will be useful,
12 @ but WITHOUT ANY WARRANTY; without even the implied warranty of
13 @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 @ GNU Lesser General Public License for more details.
16 @ You should have received a copy of the GNU Lesser General Public License
17 @ along with this program; if not, write to the Free Software Foundation,
18 @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
19 @****************************************************************************/
33 .type s32_s16_neon, %function
34 @ Converts fixed-point 32-bits to signed 16-bits
35 @ Input and output must be on 128-bits boundary
40 blt s32_s16_neon_unaligned
41 vld1.s32 {q8-q9}, [IN,:128]!
46 vqrshrn.s32 d16, q8, #13
47 vqrshrn.s32 d17, q9, #13
50 vld1.s32 {q10-q11}, [IN,:128]!
52 vqrshrn.s32 d18, q10, #13
53 vqrshrn.s32 d19, q11, #13
56 vld1.s32 {q12-q13}, [IN,:128]!
58 vqrshrn.s32 d20, q12, #13
59 vqrshrn.s32 d21, q13, #13
60 vst1.s16 {d16-d19}, [OUT,:128]!
63 vld1.s32 {q8-q9}, [IN,:128]!
64 vst1.s16 {d20-d21}, [OUT,:128]!
67 vst1.s16 {d16-d17}, [OUT,:128]!
70 vst1.s16 {d16-d19}, [OUT,:128]!
73 vst1.s16 {d20-d21}, [OUT,:128]!
76 blt s32_s16_neon_unaligned
77 vld1.s32 {q8}, [IN,:128]!
79 vqrshrn.s32 d16, q8, #13
80 vst1.s16 {d16}, [OUT,:64]!
82 @ Fall through for last 0-3 samples
84 .global s32_s16_neon_unaligned
85 .type s32_s16_neon_unaligned, %function
86 @ Converts fixed-point 32-bits to signed 16-bits
87 @ Input must be on 32-bits boundary, output on 16-bits
88 s32_s16_neon_unaligned:
99 ssat BUF, #16, BUF, asr #13