1 ;******************************************************************************
2 ;* SIMD optimized SBC encoder DSP functions
4 ;* Copyright (C) 2017 Aurelien Jacobs <aurel@gnuage.org>
5 ;* Copyright (C) 2008-2010 Nokia Corporation
6 ;* Copyright (C) 2004-2010 Marcel Holtmann <marcel@holtmann.org>
7 ;* Copyright (C) 2004-2005 Henryk Ploetz <henryk@ploetzli.ch>
8 ;* Copyright (C) 2005-2006 Brad Midgley <bmidgley@xmission.com>
10 ;* This file is part of FFmpeg.
12 ;* FFmpeg is free software; you can redistribute it and/or
13 ;* modify it under the terms of the GNU Lesser General Public
14 ;* License as published by the Free Software Foundation; either
15 ;* version 2.1 of the License, or (at your option) any later version.
17 ;* FFmpeg is distributed in the hope that it will be useful,
18 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
19 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 ;* Lesser General Public License for more details.
22 ;* You should have received a copy of the GNU Lesser General Public
23 ;* License along with FFmpeg; if not, write to the Free Software
24 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 ;******************************************************************************
27 %include "libavutil/x86/x86util.asm"
31 scale_mask: times 2 dd 0x8000 ; 1 << (SBC_PROTO_FIXED_SCALE - 1)
41 %macro ANALYZE_MAC 9 ; out1, out2, in1, in2, tmp1, tmp2, add1, add2, offset
44 pmaddwd %5, [constsq+%9]
45 pmaddwd %6, [constsq+%9+8]
50 %macro ANALYZE_MAC_IN 7 ; out1, out2, tmp1, tmp2, add1, add2, offset
51 ANALYZE_MAC %1, %2, [inq+%7], [inq+%7+8], %3, %4, %5, %6, %7
54 %macro ANALYZE_MAC_REG 7 ; out1, out2, in, tmp1, tmp2, offset, pack
56 psrad %3, 16 ; SBC_PROTO_FIXED_SCALE
59 ANALYZE_MAC %1, %2, %3, %3, %4, %5, %4, %5, %6
62 ;*******************************************************************
63 ;void ff_sbc_analyze_4(const int16_t *in, int32_t *out, const int16_t *consts);
64 ;*******************************************************************
66 cglobal sbc_analyze_4, 3, 3, 4, in, out, consts
67 ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0
68 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 16
69 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 32
70 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 48
71 ANALYZE_MAC_IN m0, m1, m2, m3, m2, m3, 64
73 ANALYZE_MAC_REG m0, m2, m0, m0, m2, 80, pack
74 ANALYZE_MAC_REG m0, m2, m1, m1, m3, 96, pack
82 ;*******************************************************************
83 ;void ff_sbc_analyze_8(const int16_t *in, int32_t *out, const int16_t *consts);
84 ;*******************************************************************
86 cglobal sbc_analyze_8, 3, 3, 4, in, out, consts
87 ANALYZE_MAC_IN m0, m1, m0, m1, [scale_mask], [scale_mask], 0
88 ANALYZE_MAC_IN m2, m3, m2, m3, [scale_mask], [scale_mask], 16
89 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 32
90 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 48
91 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 64
92 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 80
93 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 96
94 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 112
95 ANALYZE_MAC_IN m0, m1, m4, m5, m4, m5, 128
96 ANALYZE_MAC_IN m2, m3, m6, m7, m6, m7, 144
98 ANALYZE_MAC_REG m4, m5, m0, m4, m5, 160, pack
99 ANALYZE_MAC_REG m4, m5, m1, m6, m7, 192, pack
100 ANALYZE_MAC_REG m4, m5, m2, m6, m7, 224, pack
101 ANALYZE_MAC_REG m4, m5, m3, m6, m7, 256, pack
106 ANALYZE_MAC_REG m0, m5, m0, m0, m5, 176, no
107 ANALYZE_MAC_REG m0, m5, m1, m1, m7, 208, no
108 ANALYZE_MAC_REG m0, m5, m2, m2, m7, 240, no
109 ANALYZE_MAC_REG m0, m5, m3, m3, m7, 272, no
117 ;*******************************************************************
118 ;void ff_sbc_calc_scalefactors(int32_t sb_sample_f[16][2][8],
119 ; uint32_t scale_factor[2][8],
120 ; int blocks, int channels, int subbands)
121 ;*******************************************************************
123 cglobal sbc_calc_scalefactors, 5, 7, 4, sb_sample_f, scale_factor, blocks, channels, subbands, ptr, blk
124 ; subbands = 4 * subbands * channels
125 movq m3, [scale_mask]
133 lea ptrq, [sb_sample_fq + subbandsq]
135 ; blk = (blocks - 1) * 64;
136 lea blkq, [blocksq - 1]
144 paddd m1, [ptrq+blkq]
156 sub blkd, 15 ; SCALE_OUT_BITS
157 mov [scale_factorq + subbandsq], blkd
161 sub blkd, 15 ; SCALE_OUT_BITS
162 mov [scale_factorq + subbandsq + 4], blkd