1 ;*****************************************************************************
2 ;* bitstream-a.asm: x86 bitstream functions
3 ;*****************************************************************************
4 ;* Copyright (C) 2010-2015 x264 project
6 ;* Authors: Fiona Glaser <fiona@x264.com>
7 ;* Henrik Gramner <henrik@gramner.com>
9 ;* This program is free software; you can redistribute it and/or modify
10 ;* it under the terms of the GNU General Public License as published by
11 ;* the Free Software Foundation; either version 2 of the License, or
12 ;* (at your option) any later version.
14 ;* This program is distributed in the hope that it will be useful,
15 ;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;* GNU General Public License for more details.
19 ;* You should have received a copy of the GNU General Public License
20 ;* along with this program; if not, write to the Free Software
21 ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
23 ;* This program is also available under a commercial proprietary license.
24 ;* For more information, contact us at licensing@x264.com.
25 ;*****************************************************************************
28 %include "x86util.asm"
32 ;-----------------------------------------------------------------------------
33 ; uint8_t *x264_nal_escape( uint8_t *dst, uint8_t *src, uint8_t *end )
34 ;-----------------------------------------------------------------------------
37 ; Detect false positive to avoid unneccessary escape loop
46 mova [r0+r1+mmsize], m1
51 %2 m1, [r1+r2+3*mmsize]
53 %2 m2, [r1+r2+2*mmsize]
73 cglobal nal_escape, 3,5
75 sub r1, r2 ; r1 = offset of current src pointer from end of src
78 sub r0, r1 ; r0 = projected end of dst, assuming no more escapes
79 or r3d, 0xffffff00 ; ignore data before src
81 ; Start off by jumping into the escape loop in case there's an escape at the start.
82 ; And do a few more in scalar until dst is aligned.
86 NAL_LOOP .loop_aligned, mova
89 NAL_LOOP .loop_unaligned, movu
95 ; Skip bytes that are known to be valid
98 xor r3d, r3d ; the last two bytes are known to be zero
103 movzx r4d, byte [r1+r2]
106 test r3d, 0xfffffc ; if the last two bytes are 0 and the current byte is <=3
111 test r4d, mmsize-1 ; Do SIMD when dst is aligned
113 movu m1, [r1+r2+mmsize]