2 * scale_line_22_yuv_mmx.S -- scale line in YUY2 format
3 * Copyright (C) 2003-2004 Ushodaya Enterprises Limited
4 * Author: Dan Dennedy <dan@dennedy.org>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 .file "scale_line_22_yuv_mmx.S"
22 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
23 .section .note.GNU-stack,"",%progbits
30 MSG: .ascii "scale_line_22_yuv_mmx: %d %d\n"
35 #if !defined(__MINGW32__) && !defined(__CYGWIN__)
37 .globl pixops_scale_line_22_yuv_mmx
38 .type pixops_scale_line_22_yuv_mmx,@function
39 pixops_scale_line_22_yuv_mmx:
43 .globl _pixops_scale_line_22_yuv_mmx
44 _pixops_scale_line_22_yuv_mmx:
51 * p (dest): 12(%ebp) %esi
72 * int x_scaled -24(%ebp)
79 /* Initialize variables */
80 movl 36(%ebp),%eax # destx
82 movl 32(%ebp),%ebx # x
83 movl 12(%ebp),%esi # dest
85 cmpl 28(%ebp),%esi # dest == dest_end ?
88 /* For the body of this loop, %mm0, %mm1, %mm2, %mm3 hold the 4 adjoining
89 * points we are interpolating between, as:
96 * Load next component values into mm1 (src0) and mm3 (src1)
98 movl %ebx, %eax # x_scaled
100 andl $0xfffffffe, %eax
101 movl %eax, %edx # x_aligned
102 andl $0xfffffffc, %edx
104 movl 16(%ebp), %edi # get src0
105 movl (%edi,%eax), %ecx # get y
106 andl $0x00ff00ff, %ecx # mask off y
107 movl (%edi,%edx), %eax # get uv
108 andl $0xff00ff00, %eax # mask off uv
109 orl %eax, %ecx # composite y, uv
110 movd %ecx, %mm1 # move to mmx1
113 movl 20(%ebp), %edi # get src1
114 movl (%edi,%edx), %ecx # get y
115 andl $0x00ff00ff, %ecx # mask off y
116 movl (%edi,%edx), %eax # get uv
117 andl $0xff00ff00, %eax # mask off uv
118 orl %eax, %ecx # composite y, uv
119 movd %ecx, %mm3 # move to mmx3
127 /* short *pixel_weights = weights + ((x >> (SCALE_SHIFT - SUBSAMPLE_BITS)) & SUBSAMPLE_MASK) * n_x * n_y
130 movl 8(%ebp), %edi # get weights pointer
135 /* At this point, %edi holds weights. Load the 4 weights into
136 * %mm4,%mm5,%mm6,%mm7, multiply and accumulate.
138 movq (%edi,%eax), %mm4
140 movq 8(%edi,%eax), %mm5
142 movq 16(%edi,%eax), %mm6
144 movq 24(%edi,%eax), %mm7
151 /* %mm7 holds the accumulated sum. Compute (C + 0x80) / 256
154 movl $0x80808080, %eax
160 /* Pack into %eax and store result
165 movb %al, (%esi) # *dest = y
167 movl 36(%ebp), %ecx # get dest_x
168 andl $1, %ecx # select u or v
169 sall $1, %ecx # determine offset
170 addl $1, %ecx # relative to x_aligned
171 sall $3, %ecx # offset * 8 bits/byte
175 movb %al, 1(%esi) # *dest = uv
177 addl $2, %esi # dest += 2
178 cmpl %esi,28(%ebp) # if dest == dest_end
181 addl $1, 36(%ebp) # dest_x++
185 addl 24(%ebp), %ebx # x += x_step
187 * Load current component values into mm0 (src0) and mm2 (src1)
193 * Load next component values into mm1 (src0) and mm3 (src1)
195 movl %ebx, %eax # x_scaled
197 andl $0xfffffffe, %eax
198 movl %eax, %edx # x_aligned
199 andl $0xfffffffc, %edx
201 movl 16(%ebp), %edi # get src0
202 movl (%edi,%eax), %ecx # get y
203 andl $0x00ff00ff, %ecx # mask off y
204 movl (%edi,%edx), %eax # get uv
205 andl $0xff00ff00, %eax # mask off uv
206 orl %eax, %ecx # composite y, uv
207 movd %ecx, %mm1 # move to mmx1
210 movl 20(%ebp), %edi # get src1
211 movl (%edi,%edx), %ecx # get y
212 andl $0x00ff00ff, %ecx # mask off y
213 movl (%edi,%edx), %eax # get uv
214 andl $0xff00ff00, %eax # mask off uv
215 orl %eax, %ecx # composite y, uv
216 movd %ecx, %mm3 # move to mmx3