1 ;*****************************************************************************
2 ;* cabac-a.asm: h264 encoder library
3 ;*****************************************************************************
4 ;* Copyright (C) 2008 x264 project
6 ;* Author: Loren Merritt <lorenm@u.washington.edu>
7 ;* Fiona Glaser <fiona@x264.com>
9 ;* This program is free software; you can redistribute it and/or modify
10 ;* it under the terms of the GNU General Public License as published by
11 ;* the Free Software Foundation; either version 2 of the License, or
12 ;* (at your option) any later version.
14 ;* This program is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;* GNU General Public License for more details.
19 ;* You should have received a copy of the GNU General Public License
20 ;* along with this program; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
22 ;*****************************************************************************
30 cextern x264_cabac_range_lps
31 cextern x264_cabac_transition
32 cextern x264_cabac_renorm_shift
43 ; t3 must be ecx, since it's used for shift.
45 DEF_TMP 0,1,2,3,4,5,6,7, 0,1,2,3,4,5,6,10
48 DEF_TMP 0,1,2,3,4,5,6,7, 0,3,2,1,4,5,6,3
56 .bytes_outstanding: resd 1
67 ; this would be faster if the arrays were declared in asm, so that I didn't have to duplicate the lea
72 movzx %1, byte [r11+%4]
76 movzx %1, byte [%2+%1 GLOBAL]
78 movzx %1, byte [%2+%3+%4 GLOBAL]
81 movzx %1, byte [%2+%3+%4]
85 cglobal x264_cabac_encode_decision_asm, 0,7
89 mov t5d, [r0+cb.range]
90 movzx t3d, byte [r0+cb.state+t1]
94 LOAD_GLOBAL t5d, x264_cabac_range_lps, t5, t3*4
110 LOAD_GLOBAL t3d, x264_cabac_transition, t1, t3*2
112 LOAD_GLOBAL t3d, x264_cabac_transition, t2, t3*2
115 mov [r0+cb.state+t1], t3b
119 LOAD_GLOBAL t3d, x264_cabac_renorm_shift, 0, t3
122 add t3d, [r0+cb.queue]
123 mov [r0+cb.range], t4d
125 mov [r0+cb.queue], t3d
130 ; alive: t0=cb t3=queue t6=low
139 cmp t2b, 0xff ; FIXME is a 32bit op faster?
140 mov [r0+cb.queue], t3d
145 mov t5d, [r0+cb.bytes_outstanding]
159 mov [r0+cb.bytes_outstanding], t5d ; is zero, but a reg has smaller opcode than an immediate
163 inc dword [r0+cb.bytes_outstanding]