1 ;******************************************************************************
2 ;* SIMD optimized SAO functions for HEVC 8bit decoding
4 ;* Copyright (c) 2013 Pierre-Edouard LEPERE
5 ;* Copyright (c) 2014 James Almer
7 ;* This file is part of FFmpeg.
9 ;* FFmpeg is free software; you can redistribute it and/or
10 ;* modify it under the terms of the GNU Lesser General Public
11 ;* License as published by the Free Software Foundation; either
12 ;* version 2.1 of the License, or (at your option) any later version.
14 ;* FFmpeg is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 ;* Lesser General Public License for more details.
19 ;* You should have received a copy of the GNU Lesser General Public
20 ;* License along with FFmpeg; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22 ;******************************************************************************
24 %include "libavutil/x86/x86util.asm"
28 pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
29 pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
35 ;******************************************************************************
37 ;******************************************************************************
39 %macro HEVC_SAO_BAND_FILTER_INIT 0
57 SPLATW m4, [offsetq + 2]
58 SPLATW m5, [offsetq + 4]
59 SPLATW m6, [offsetq + 6]
60 SPLATW m7, [offsetq + 8]
62 movq m7, [offsetq + 2]
73 mova [rsp+mmsize*0], m0
74 mova [rsp+mmsize*1], m1
75 mova [rsp+mmsize*2], m2
76 mova [rsp+mmsize*3], m3
77 mova [rsp+mmsize*4], m4
78 mova [rsp+mmsize*5], m5
79 mova [rsp+mmsize*6], m6
87 DEFINE_ARGS dst, src, dststride, srcstride, offset, height
91 %macro HEVC_SAO_BAND_FILTER_COMPUTE 2
107 pcmpeqw m4, %1, [rsp+MMSIZE*0]
108 pcmpeqw m5, %1, [rsp+MMSIZE*1]
109 pcmpeqw m6, %1, [rsp+MMSIZE*2]
110 pcmpeqw %1, [rsp+MMSIZE*3]
111 pand m4, [rsp+MMSIZE*4]
112 pand m5, [rsp+MMSIZE*5]
113 pand m6, [rsp+MMSIZE*6]
122 ;void ff_hevc_sao_band_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
123 ; int16_t *sao_offset_val, int sao_left_class, int width, int height);
124 %macro HEVC_SAO_BAND_FILTER 2
125 cglobal hevc_sao_band_filter_%1_8, 6, 6, 15, 7*mmsize*ARCH_X86_32, dst, src, dststride, srcstride, offset, left
126 HEVC_SAO_BAND_FILTER_INIT
133 HEVC_SAO_BAND_FILTER_COMPUTE m9, m8
141 punpcklbw m8, m13, m14
142 HEVC_SAO_BAND_FILTER_COMPUTE m9, m8
144 HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
154 punpcklbw m8, m13, m14
155 HEVC_SAO_BAND_FILTER_COMPUTE m9, m8
157 HEVC_SAO_BAND_FILTER_COMPUTE m9, m13
165 add dstq, dststrideq ; dst += dststride
166 add srcq, srcstrideq ; src += srcstride
167 dec heightd ; cmp height
168 jnz .loop ; height loop
173 %macro HEVC_SAO_BAND_FILTER_FUNCS 0
174 HEVC_SAO_BAND_FILTER 8, 0
175 HEVC_SAO_BAND_FILTER 16, 1
176 HEVC_SAO_BAND_FILTER 32, 2
177 HEVC_SAO_BAND_FILTER 48, 2
178 HEVC_SAO_BAND_FILTER 64, 4
182 HEVC_SAO_BAND_FILTER_FUNCS
184 HEVC_SAO_BAND_FILTER_FUNCS
186 %if HAVE_AVX2_EXTERNAL
188 HEVC_SAO_BAND_FILTER 8, 0
189 HEVC_SAO_BAND_FILTER 16, 1
191 HEVC_SAO_BAND_FILTER 32, 1
192 HEVC_SAO_BAND_FILTER 48, 1
193 HEVC_SAO_BAND_FILTER 64, 2
196 ;******************************************************************************
198 ;******************************************************************************
200 %define MAX_PB_SIZE 64
201 %define PADDING_SIZE 32 ; AV_INPUT_BUFFER_PADDING_SIZE
202 %define EDGE_SRCSTRIDE 2 * MAX_PB_SIZE + PADDING_SIZE
204 %macro HEVC_SAO_EDGE_FILTER_INIT 0
206 movsxd eoq, dword eom
213 movsx a_strideq, byte [tmp2q+eoq*4+1]
214 movsx b_strideq, byte [tmp2q+eoq*4+3]
215 imul a_strideq, EDGE_SRCSTRIDE
216 imul b_strideq, EDGE_SRCSTRIDE
217 movsx tmpq, byte [tmp2q+eoq*4]
219 movsx tmpq, byte [tmp2q+eoq*4+2]
223 %macro HEVC_SAO_EDGE_FILTER_COMPUTE 1
252 ;void ff_hevc_sao_edge_filter_<width>_8_<opt>(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
253 ; int eo, int width, int height);
254 %macro HEVC_SAO_EDGE_FILTER 2-3
256 cglobal hevc_sao_edge_filter_%1_8, 4, 9, 8, dst, src, dststride, offset, eo, a_stride, b_stride, height, tmp
257 %define tmp2q heightq
258 HEVC_SAO_EDGE_FILTER_INIT
262 cglobal hevc_sao_edge_filter_%1_8, 1, 6, 8, dst, src, dststride, a_stride, b_stride, height
265 %define tmp2q dststrideq
266 %define offsetq heightq
267 HEVC_SAO_EDGE_FILTER_INIT
270 mov dststrideq, dststridem
274 vbroadcasti128 m0, [offsetq]
278 mova m1, [pb_edge_shuffle]
292 movq m2, [srcq + a_strideq]
293 movq m3, [srcq + b_strideq]
294 HEVC_SAO_EDGE_FILTER_COMPUTE %1
301 movu m2, [srcq + a_strideq + i]
302 movu m3, [srcq + b_strideq + i]
303 HEVC_SAO_EDGE_FILTER_COMPUTE %1
312 movu m2, [srcq + a_strideq + i]
313 movu m3, [srcq + b_strideq + i]
314 HEVC_SAO_EDGE_FILTER_COMPUTE %1
322 add srcq, EDGE_SRCSTRIDE
329 HEVC_SAO_EDGE_FILTER 8, 0
330 HEVC_SAO_EDGE_FILTER 16, 1, a
331 HEVC_SAO_EDGE_FILTER 32, 2, a
332 HEVC_SAO_EDGE_FILTER 48, 2, a
333 HEVC_SAO_EDGE_FILTER 64, 4, a
335 %if HAVE_AVX2_EXTERNAL
337 HEVC_SAO_EDGE_FILTER 32, 1, a
338 HEVC_SAO_EDGE_FILTER 48, 1, u
339 HEVC_SAO_EDGE_FILTER 64, 2, a