1 ;*****************************************************************************
2 ;* quant-a.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2005 x264 project
6 ;* Authors: Loren Merritt <lorenm@u.washington.edu>
8 ;* This program is free software; you can redistribute it and/or modify
9 ;* it under the terms of the GNU General Public License as published by
10 ;* the Free Software Foundation; either version 2 of the License, or
11 ;* (at your option) any later version.
13 ;* This program is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;* GNU General Public License for more details.
18 ;* You should have received a copy of the GNU General Public License
19 ;* along with this program; if not, write to the Free Software
20 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
21 ;*****************************************************************************
25 %include "amd64inc.asm"
32 %macro MMX_QUANT_DC_START 0
34 movd mm7, parm3d ; bias
39 %macro SSE2_QUANT_DC_START 0
40 movd xmm6, parm2d ; mf
41 movd xmm7, parm3d ; bias
49 ;;; %1 (m64) dct[y][x]
50 ;;; %2 (m64/mmx) mf[y][x] or mf[0][0] (as uint16_t)
51 ;;; %3 (m64/mmx) bias[y][x] or bias[0][0] (as uint16_t)
53 mov%1 %2m0, %3 ; load dct coeffs
55 pcmpgtw %2m1, %2m0 ; sign(coeff)
57 psubw %2m0, %2m1 ; abs(coeff)
58 paddusw %2m0, %5 ; round
59 pmulhuw %2m0, %4 ; divide
60 pxor %2m0, %2m1 ; restore sign
62 mov%1 %3, %2m0 ; store
64 %macro MMX_QUANT_1x4 3
65 QUANT_ONE q, m, %1, %2, %3
67 %macro SSE2_QUANT_1x8 3
68 QUANT_ONE dqa, xm, %1, %2, %3
71 %macro SSSE3_QUANT_1x8 3
72 movdqa xmm1, %1 ; load dct coeffs
74 paddusw xmm0, %3 ; round
75 pmulhuw xmm0, %2 ; divide
76 psignw xmm0, xmm1 ; restore sign
77 movdqa %1, xmm0 ; store
80 ;-----------------------------------------------------------------------------
81 ; void x264_quant_2x2_dc_mmxext( int16_t dct[4], int mf, int bias )
82 ;-----------------------------------------------------------------------------
83 cglobal x264_quant_2x2_dc_mmxext
85 MMX_QUANT_1x4 [parm1q], mm6, mm7
89 ;-----------------------------------------------------------------------------
90 ; void x264_quant_4x4_dc_sse2( int16_t dct[16], int mf, int bias )
91 ;-----------------------------------------------------------------------------
92 cglobal x264_quant_4x4_dc_%1
96 QUANT_1x8 [parm1q+x], xmm6, xmm7
101 ;-----------------------------------------------------------------------------
102 ; void x264_quant_4x4_sse2( int16_t dct[16], uint16_t mf[16], uint16_t bias[16] )
103 ;-----------------------------------------------------------------------------
104 cglobal x264_quant_4x4_%1
107 QUANT_1x8 [parm1q+x], [parm2q+x], [parm3q+x]
112 ;-----------------------------------------------------------------------------
113 ; void x264_quant_8x8_sse2( int16_t dct[64], uint16_t mf[64], uint16_t bias[64] )
114 ;-----------------------------------------------------------------------------
115 cglobal x264_quant_8x8_%1
118 QUANT_1x8 [parm1q+x], [parm2q+x], [parm3q+x]
124 %define QUANT_1x8 SSE2_QUANT_1x8
127 %define QUANT_1x8 SSSE3_QUANT_1x8
133 ;=============================================================================
135 ;=============================================================================
137 %macro DEQUANT16_L_1x4 3
139 ;;; %2,%3 dequant_mf[i_mf][y][x]
151 %macro DEQUANT32_R_1x4 3
153 ;;; %2,%3 dequant_mf[i_mf][y][x]
183 ;-----------------------------------------------------------------------------
184 ; void x264_dequant_4x4_mmx( int16_t dct[4][4], int dequant_mf[6][4][4], int i_qp )
185 ;-----------------------------------------------------------------------------
189 ; mov rsi, rsi ; dequant_mf
190 ; mov edx, edx ; i_qp
193 shr eax, 8 ; i_qbits = i_qp / 6
196 sub edx, ecx ; i_mf = i_qp % 6
199 add rsi, rdx ; dequant_mf[i_mf]
202 jl .rshift32 ; negative qbits => rightshift
208 DEQUANT16_L_1x4 [rdi], [rsi], [rsi+8]
218 movq mm6, [pd_1 GLOBAL]
224 DEQUANT32_R_1x4 [rdi], [rsi], [rsi+8]
232 DEQUANT_WxH x264_dequant_4x4_mmx, 4, 4
233 DEQUANT_WxH x264_dequant_8x8_mmx, 16, 6