2 * Copyright (C) 2013 Xiaolei Yu <dreifachstein@gmail.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/arm/asm.S"
23 .macro alias name, tgt, set=1
33 .macro alias_dw_all qw, dw_l, dw_h
34 alias q\qw\()_l, d\dw_l
35 alias q\qw\()_h, d\dw_h
37 alias_dw_all %(\qw + 1), %(\dw_l + 2), %(\dw_h + 2)
45 .macro alias_qw name, qw, set=1
46 alias \name\(), \qw, \set
47 alias \name\()_l, \qw\()_l, \set
48 alias \name\()_h, \qw\()_h, \set
61 .macro load_arg reg, ix
62 ldr \reg, [sp, #((10 * 4 + 4 * 16) + (\ix - 4) * 4)]
66 /* ()_to_()_neon(const uint8_t *src, uint8_t *y, uint8_t *chroma
67 * int width, int height,
68 * int y_stride, int c_stride, int src_stride,
69 * int32_t coeff_table[9]);
71 .macro alias_loop_420sp set=1
76 alias chroma, r2, \set
78 alias header, width, \set
80 alias height, r4, \set
81 alias y_stride, r5, \set
82 alias c_stride, r6, \set
83 alias c_padding, c_stride, \set
84 alias src_stride, r7, \set
86 alias y0_end, r8, \set
88 alias src_padding,r9, \set
89 alias y_padding, r10, \set
94 alias coeff_table,r12, \set
98 .macro loop_420sp s_fmt, d_fmt, init, kernel, precision
100 function \s_fmt\()_to_\d_fmt\()_neon_\precision, export=1
108 load_arg src_stride, 7
109 load_arg coeff_table, 8
113 sub y_padding, y_stride, width
114 sub c_padding, c_stride, width
115 sub src_padding, src_stride, width, LSL #2
117 add y0_end, y0, width
118 and header, width, #15
121 add src1, src0, src_stride
127 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma, header
130 \kernel \s_fmt, \d_fmt, src0, src1, y0, y1, chroma
135 add y0, y1, y_padding
136 add y0_end, y1, y_stride
137 add chroma, chroma, c_padding
138 add src0, src1, src_padding
141 add src1, src0, src_stride
143 subs height, height, #2
155 vpaddl.u8 r16x8, r8x16
156 vpaddl.u8 g16x8, g8x16
157 vpaddl.u8 b16x8, b8x16
161 /* acculumate and right shift by 2 */
162 .macro downsample_ars2
163 vpadal.u8 r16x8, r8x16
164 vpadal.u8 g16x8, g8x16
165 vpadal.u8 b16x8, b8x16
167 vrshr.u16 r16x8, r16x8, #2
168 vrshr.u16 g16x8, g16x8, #2
169 vrshr.u16 b16x8, b16x8, #2
172 .macro store_y8_16x1 dst, count
174 vstmia \dst!, {y8x16}
177 add \dst, \dst, \count
181 .macro store_chroma_nv12_8x1 dst, count
183 vst2.i8 {u8x8, v8x8}, [\dst]!
185 vst2.i8 {u8x8, v8x8}, [\dst], \count
189 .macro store_chroma_nv21_8x1 dst, count
191 vst2.i8 {v8x8, u8x8}, [\dst]!
193 vst2.i8 {v8x8, u8x8}, [\dst], \count
197 .macro load_8888_16x1 a, b, c, d, src, count
199 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
200 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]!
202 vld4.8 {\a\()8x16_l, \b\()8x16_l, \c\()8x16_l, \d\()8x16_l}, [\src]!
203 vld4.8 {\a\()8x16_h, \b\()8x16_h, \c\()8x16_h, \d\()8x16_h}, [\src]
205 add \src, \src, \count, LSL #2
209 .macro load_rgbx_16x1 src, count
210 load_8888_16x1 r, g, b, x, \src, \count
213 .macro load_bgrx_16x1 src, count
214 load_8888_16x1 b, g, r, x, \src, \count
217 .macro alias_src_rgbx set=1
218 alias_src_8888 r, g, b, x, \set
221 .macro alias_src_bgrx set=1
222 alias_src_8888 b, g, r, x, \set
225 .macro alias_dst_nv12 set=1
226 alias u8x8, c8x8x2_l, \set
227 alias v8x8, c8x8x2_h, \set
230 .macro alias_dst_nv21 set=1
231 alias v8x8, c8x8x2_l, \set
232 alias u8x8, c8x8x2_h, \set
259 /* q3-q6 R8G8B8X8 x16 */
261 .macro alias_src_8888 a, b, c, d, set
262 alias_qw \a\()8x16, q3, \set
263 alias_qw \b\()8x16, q4, \set
264 alias_qw \c\()8x16, q5, \set
265 alias_qw \d\()8x16, q6, \set
268 .macro kernel_420_16x2 rgb_fmt, yuv_fmt, rgb0, rgb1, y0, y1, chroma, count
272 load_\rgb_fmt\()_16x1 \rgb0, \count
276 store_y8_16x1 \y0, \count
279 load_\rgb_fmt\()_16x1 \rgb1, \count
282 store_y8_16x1 \y1, \count
284 compute_chroma_8x1 u, U
285 compute_chroma_8x1 v, V
287 store_chroma_\yuv_fmt\()_8x1 \chroma, \count