@***************************************************************************** @ neon_s32_s16.S : ARM NEONv1 fi32 to s16n audio sample conversion @***************************************************************************** @ Copyright (C) 2009 RĂ©mi Denis-Courmont @ @ This program is free software; you can redistribute it and/or modify @ it under the terms of the GNU General Public License as published by @ the Free Software Foundation; either version 2 of the License, or @ (at your option) any later version. @ @ This program is distributed in the hope that it will be useful, @ but WITHOUT ANY WARRANTY; without even the implied warranty of @ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the @ GNU General Public License for more details. @ @ You should have received a copy of the GNU General Public License @ along with this program; if not, write to the Free Software Foundation, @ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA. @****************************************************************************/ .fpu neon .text #define OUT r0 #define IN r1 #define N r2 #define BUF r3 #define HALF ip .align .global s32_s16_neon .type s32_s16_neon, %function @ Converts fixed-point 32-bits to signed 16-bits @ Input and output must be on 128-bits boundary s32_s16_neon: pld [IN] 2: cmp N, #8 blt s32_s16_neon_unaligned vld1.s32 {q8-q9}, [IN,:128]! 3: @ Main loop pld [IN, #64] sub N, #8 vqrshrn.s32 d16, q8, #13 vqrshrn.s32 d17, q9, #13 cmp N, #8 blt 4f vld1.s32 {q10-q11}, [IN,:128]! sub N, #8 vqrshrn.s32 d18, q10, #13 vqrshrn.s32 d19, q11, #13 cmp N, #8 blt 5f vld1.s32 {q12-q13}, [IN,:128]! sub N, #8 vqrshrn.s32 d20, q12, #13 vqrshrn.s32 d21, q13, #13 vst1.s16 {d16-d19}, [OUT,:128]! cmp N, #8 blt 6f vld1.s32 {q8-q9}, [IN,:128]! vst1.s16 {d20-d21}, [OUT,:128]! b 3b 4: vst1.s16 {d16-d17}, [OUT,:128]! b 7f 5: vst1.s16 {d16-d19}, [OUT,:128]! b 7f 6: vst1.s16 {d20-d21}, [OUT,:128]! 7: cmp N, #4 blt s32_s16_neon_unaligned vld1.s32 {q8}, [IN,:128]! sub N, #4 vqrshrn.s32 d16, q8, #13 vst1.s16 {d16}, [OUT,:64]! @ Fall through for last 0-3 samples .global s32_s16_neon_unaligned .type s32_s16_neon_unaligned, %function @ Converts fixed-point 32-bits to signed 16-bits @ Input must be on 32-bits boundary, output on 16-bits s32_s16_neon_unaligned: mov HALF, #4096 1: cmp N, #0 bxeq lr ldr BUF, [IN] add IN, #4 add OUT, #2 qadd BUF, HALF, BUF sub N, #1 ssat BUF, #16, BUF, asr #13 strh BUF, [OUT, #-2] b 1b