1 ;*****************************************************************************
2 ;* cabac-a.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2008 x264 project
6 ;* Author: Loren Merritt <lorenm@u.washington.edu>
7 ;* Fiona Glaser <fiona@x264.com>
9 ;* This program is free software; you can redistribute it and/or modify
10 ;* it under the terms of the GNU General Public License as published by
11 ;* the Free Software Foundation; either version 2 of the License, or
12 ;* (at your option) any later version.
14 ;* This program is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;* GNU General Public License for more details.
19 ;* You should have received a copy of the GNU General Public License
20 ;* along with this program; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 ;*****************************************************************************
30 cextern x264_cabac_range_lps
31 cextern x264_cabac_transition
32 cextern x264_cabac_renorm_shift
34 ; t3 must be ecx, since it's used for shift.
36 DECLARE_REG_TMP 3,1,2,0,4,5,6,10
39 DECLARE_REG_TMP 0,1,2,3,4,5,6,10
42 DECLARE_REG_TMP 0,3,2,1,4,5,6,3
50 .bytes_outstanding: resd 1
61 ; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea
66 movzx %1, byte [r11+%4]
68 movzx %1, byte [%2+%3+%4]
72 cglobal x264_cabac_encode_decision_asm, 0,7
75 mov t5d, [t0+cb.range]
76 movzx t3d, byte [t0+cb.state+t1]
79 LOAD_GLOBAL t5d, x264_cabac_range_lps-4, t5, t3*4
89 LOAD_GLOBAL t3d, x264_cabac_transition, t2, t3*2
91 mov [t0+cb.state+t1], t3b
95 LOAD_GLOBAL t3d, x264_cabac_renorm_shift, 0, t3
98 add t3d, [t0+cb.queue]
99 mov [t0+cb.range], t4d
101 mov [t0+cb.queue], t3d
106 ; alive: t0=cb t3=queue t6=low
115 cmp t2b, 0xff ; FIXME is a 32bit op faster?
116 mov [t0+cb.queue], t3d
121 mov t5d, [t0+cb.bytes_outstanding]
135 mov [t0+cb.bytes_outstanding], t5d ; is zero, but a reg has smaller opcode than an immediate
139 inc dword [t0+cb.bytes_outstanding]