1 ;*****************************************************************************
2 ;* cabac-a.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2008 x264 project
6 ;* Author: Loren Merritt <lorenm@u.washington.edu>
8 ;* This program is free software; you can redistribute it and/or modify
9 ;* it under the terms of the GNU General Public License as published by
10 ;* the Free Software Foundation; either version 2 of the License, or
11 ;* (at your option) any later version.
13 ;* This program is distributed in the hope that it will be useful,
14 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;* GNU General Public License for more details.
18 ;* You should have received a copy of the GNU General Public License
19 ;* along with this program; if not, write to the Free Software
20 ;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
21 ;*****************************************************************************
29 cextern x264_cabac_range_lps
30 cextern x264_cabac_transition
31 cextern x264_cabac_renorm_shift
42 ; t3 must be ecx, since it's used for shift.
44 DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
47 DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
55 .bytes_outstanding: resd 1
66 ; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea
71 movzx %1, byte [r11+%4]
75 movzx %1, byte [%2+%1 GLOBAL]
77 movzx %1, byte [%2+%3+%4 GLOBAL]
80 movzx %1, byte [%2+%3+%4]
84 cglobal x264_cabac_encode_decision, 0,7
88 mov t5d, [r0+cb.range]
89 movzx t3d, byte [r0+cb.state+t1]
93 LOAD_GLOBAL t5d, x264_cabac_range_lps, t5, t3*4
109 LOAD_GLOBAL t3d, x264_cabac_transition, t1, t3*2
111 LOAD_GLOBAL t3d, x264_cabac_transition, t2, t3*2
114 mov [r0+cb.state+t1], t3b
118 LOAD_GLOBAL t3d, x264_cabac_renorm_shift, 0, t3
121 add t3d, [r0+cb.queue]
122 mov [r0+cb.range], t4d
124 mov [r0+cb.queue], t3d
130 ; alive: t0=cb t3=queue t6=low
139 cmp t2b, 0xff ; FIXME is a 32bit op faster?
140 mov [r0+cb.queue], t3d
145 mov t5d, [r0+cb.bytes_outstanding]
162 mov [r0+cb.bytes_outstanding], t5d ; is zero, but a reg has smaller opcode than an immediate
166 inc dword [r0+cb.bytes_outstanding]