2 ; TinyPTC x11 v0.7.3 MMX-Optimized YV12 converter
3 ; Copyright (C) 2002 Fred Howell <foohoo@shaw.ca>
5 ; http://www.sourceforge.net/projects/tinyptc/
7 ; This library is free software; you can redistribute it and/or
8 ; modify it under the terms of the GNU Lesser General Public
9 ; License as published by the Free Software Foundation; either
10 ; version 2 of the License, or (at your option) any later version.
12 ; This library is distributed in the hope that it will be useful,
13 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ; Lesser General Public License for more details.
17 ; You should have received a copy of the GNU Lesser General Public
18 ; License along with this library; if not, write to the Free Software
19 ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 global convert_yv12_mmx
30 ; static short ygr0bcoff[] = {33058/2,16390/2,0,6405/2};
31 ; static short vgr0bcoff[] = {-24110/2,28781/2,0,-4671/2};
32 ; static short ugr0bcoff[] = {-19068/2,-9713/2,0,28781/2};
33 ; static short yb0grcoff[] = {6405/2,0,33058/2,16390/2};
34 ; static short vb0grcoff[] = {-4671/2,0,-24110/2,28781/2};
35 ; static short ub0grcoff[] = {28781/2,-19068/2,-9713/2,0};
37 ; static unsigned short add2w[] = {1,1,1,1};
38 ; static short aoff[] = {16,128,16,128};
39 ; static unsigned char bmask[] = {0xff, 0, 0, 0, 0, 0, 0, 0};
40 ; static unsigned short grmask[] = {0,0xffff,0,0};
43 ygr0bcoff dw 0x4091,0x2003,0x0000,0x0c82
44 ugr0bcoff dw 0xdac2,0xed08,0x0000,0x3836
45 vgr0bcoff dw 0xd0e9,0x3836,0x0000,0xf6e1
46 yb0grcoff dw 0x0c82,0x0000,0x4091,0x2003
47 ub0grcoff dw 0x3836,0x0000,0xdac2,0xed08
48 vb0grcoff dw 0xf6e1,0x0000,0xd0e9,0x3836
49 yoff dw 0x0010,0x0010,0x0010,0x0010
50 uvoff dw 0x0080,0x0080,0x0080,0x0080
52 grmask dw 0,0xffff,0,0
53 bmask db 0xff,0,0,0, 0,0,0,0
70 ;// initialisation du mm7 à zero
86 ; 1ere quad 1ere ligne
95 punpcklbw mm3, [thezero];
96 punpcklbw mm4, [thezero];
101 pmaddwd mm5, [ygr0bcoff];
105 pmaddwd mm1, [yb0grcoff];
114 pmaddwd mm6, [ugr0bcoff];
118 pmaddwd mm1, [ub0grcoff];
126 pmaddwd mm3, [vgr0bcoff];
129 pmaddwd mm4, [vb0grcoff];
136 ; 1ere quad 2eme ligne
145 punpcklbw mm3, [thezero];
146 punpcklbw mm4, [thezero];
152 pmaddwd mm2, [ygr0bcoff];
156 pmaddwd mm1, [yb0grcoff];
164 pmaddwd mm0, [ugr0bcoff];
168 pmaddwd mm1, [ub0grcoff];
176 packssdw mm6,[thezero];
179 pmaddwd mm3, [vgr0bcoff];
182 pmaddwd mm4, [vb0grcoff];
190 packssdw mm7,[thezero];
192 ; 2eme quad 1ere ligne
201 punpcklbw mm3, [thezero];
202 punpcklbw mm4, [thezero];
207 pmaddwd mm0, [ygr0bcoff];
211 pmaddwd mm1, [yb0grcoff];
225 pmaddwd mm0, [ugr0bcoff];
229 pmaddwd mm1, [ub0grcoff];
242 pmaddwd mm3, [vgr0bcoff];
245 pmaddwd mm4, [vb0grcoff];
255 ; 2eme quad 2eme ligne
256 movq mm3, [edx+edi+8];
264 punpcklbw mm3, [thezero];
265 punpcklbw mm4, [thezero];
271 pmaddwd mm0, [ygr0bcoff];
275 pmaddwd mm1, [yb0grcoff];
289 pmaddwd mm0, [ugr0bcoff];
293 pmaddwd mm1, [ub0grcoff];
303 packssdw mm6,[thezero]
306 pmaddwd mm3, [vgr0bcoff];
309 pmaddwd mm4, [vb0grcoff];
319 packssdw mm7,[thezero]
321 ; 3eme quad 1ere ligne
330 punpcklbw mm3, [thezero];
331 punpcklbw mm4, [thezero];
336 pmaddwd mm5, [ygr0bcoff];
340 pmaddwd mm1, [yb0grcoff];
349 pmaddwd mm0, [ugr0bcoff];
353 pmaddwd mm1, [ub0grcoff];
365 pmaddwd mm3, [vgr0bcoff];
368 pmaddwd mm4, [vb0grcoff];
378 ; 3eme quad 2eme ligne
379 movq mm3, [edx+edi+16];
387 punpcklbw mm3, [thezero];
388 punpcklbw mm4, [thezero];
394 pmaddwd mm2, [ygr0bcoff];
398 pmaddwd mm1, [yb0grcoff];
406 pmaddwd mm0, [ugr0bcoff];
410 pmaddwd mm1, [ub0grcoff];
421 pmaddwd mm3, [vgr0bcoff];
424 pmaddwd mm4, [vb0grcoff];
434 ; 4eme quad 1ere ligne
443 punpcklbw mm3, [thezero];
444 punpcklbw mm4, [thezero];
449 pmaddwd mm0, [ygr0bcoff];
453 pmaddwd mm1, [yb0grcoff];
467 pmaddwd mm0, [ugr0bcoff];
471 pmaddwd mm1, [ub0grcoff];
483 pmaddwd mm3, [vgr0bcoff];
486 pmaddwd mm4, [vb0grcoff];
496 ; 4eme quad 2eme line
497 movq mm3, [edx+edi+24];
505 punpcklbw mm3, [thezero];
506 punpcklbw mm4, [thezero];
512 pmaddwd mm0, [ygr0bcoff];
516 pmaddwd mm1, [yb0grcoff];
525 movd [ebx+esi+4], mm2
530 pmaddwd mm0, [ugr0bcoff];
534 pmaddwd mm1, [ub0grcoff];
550 pmaddwd mm3, [vgr0bcoff];
553 pmaddwd mm4, [vb0grcoff];
575 ; preparations pour les 4 quads suivantes