X-Git-Url: https://git.sesse.net/?p=ccbs;a=blobdiff_plain;f=bigscreen%2Ftinyptc%2Fyv12.s;fp=bigscreen%2Ftinyptc%2Fyv12.s;h=0000000000000000000000000000000000000000;hp=aa158c2c1bb57c4e935a3833b52e77db4d3020f5;hb=478ab340150034e545e3ed551ff068607d49d35b;hpb=8596d4aa3654710fb225c08c4d7235b74362561e diff --git a/bigscreen/tinyptc/yv12.s b/bigscreen/tinyptc/yv12.s deleted file mode 100644 index aa158c2..0000000 --- a/bigscreen/tinyptc/yv12.s +++ /dev/null @@ -1,594 +0,0 @@ -; -; TinyPTC x11 v0.7.3 MMX-Optimized YV12 converter -; Copyright (C) 2002 Fred Howell -; -; http://www.sourceforge.net/projects/tinyptc/ -; -; This library is free software; you can redistribute it and/or -; modify it under the terms of the GNU Lesser General Public -; License as published by the Free Software Foundation; either -; version 2 of the License, or (at your option) any later version. -; -; This library is distributed in the hope that it will be useful, -; but WITHOUT ANY WARRANTY; without even the implied warranty of -; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -; Lesser General Public License for more details. -; -; You should have received a copy of the GNU Lesser General Public -; License along with this library; if not, write to the Free Software -; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -; - -bits 32 - -global convert_yv12_mmx - -section .data - -align 16 - -; static short ygr0bcoff[] = {33058/2,16390/2,0,6405/2}; -; static short vgr0bcoff[] = {-24110/2,28781/2,0,-4671/2}; -; static short ugr0bcoff[] = {-19068/2,-9713/2,0,28781/2}; -; static short yb0grcoff[] = {6405/2,0,33058/2,16390/2}; -; static short vb0grcoff[] = {-4671/2,0,-24110/2,28781/2}; -; static short ub0grcoff[] = {28781/2,-19068/2,-9713/2,0}; -; -; static unsigned short add2w[] = {1,1,1,1}; -; static short aoff[] = {16,128,16,128}; -; static unsigned char bmask[] = {0xff, 0, 0, 0, 0, 0, 0, 0}; -; static unsigned short grmask[] = {0,0xffff,0,0}; - -thezero dw 0,0,0,0 -ygr0bcoff dw 0x4091,0x2003,0x0000,0x0c82 -ugr0bcoff dw 0xdac2,0xed08,0x0000,0x3836 -vgr0bcoff dw 0xd0e9,0x3836,0x0000,0xf6e1 -yb0grcoff dw 0x0c82,0x0000,0x4091,0x2003 -ub0grcoff dw 0x3836,0x0000,0xdac2,0xed08 -vb0grcoff dw 0xf6e1,0x0000,0xd0e9,0x3836 -yoff dw 0x0010,0x0010,0x0010,0x0010 -uvoff dw 0x0080,0x0080,0x0080,0x0080 -add2w dw 1,1,1,1 -grmask dw 0,0xffff,0,0 -bmask db 0xff,0,0,0, 0,0,0,0 - - -section text - -align 16 - -convert_yv12_mmx: - -push ebp -push eax -push ebx -push ecx -push edx -push edi -push esi - -;// initialisation du mm7 à zero -pxor mm7,mm7 - -%assign _P 7*4 -mov edx, [esp+_P+ 4] -mov ebx, [esp+_P+8] -mov ebp,[esp+_P+12] -mov eax,[esp+_P+16] -mov ecx,[esp+_P+20] -mov edi,ecx -mov esi,ecx -shr ecx, 3 -shl edi, 2 - - -.while: -; 1ere quad 1ere ligne - movq mm3, [edx]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - -; Y - movq mm5, mm3; - - pmaddwd mm5, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm5, mm1 - psrad mm5,15; - - -; U - movq mm6, mm3; - - pmaddwd mm6, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm6, mm1; - psrad mm6,15; - - -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - movq mm7, mm3; - -; 1ere quad 2eme ligne - movq mm3, [edx+edi]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - - -; Y - movq mm2, mm3; - - pmaddwd mm2, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm2, mm1 - psrad mm2,15; - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm6,mm0; - pmaddwd mm6,[add2w]; - - packssdw mm6,[thezero]; -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm7,mm3; - pmaddwd mm7,[add2w]; - - packssdw mm7,[thezero]; - -; 2eme quad 1ere ligne - movq mm3, [edx+8]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - -; Y - movq mm0, mm3; - - pmaddwd mm0, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm0, mm1 - psrad mm0,15; - packssdw mm5,mm0 - - paddw mm5,[yoff] - packuswb mm5,mm5 - - movd [ebx], mm5 - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - psllq mm0,32 - - paddw mm6,mm0 - - -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - psllq mm3,32 - - paddw mm7,mm3 - -; 2eme quad 2eme ligne - movq mm3, [edx+edi+8]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - - -; Y - movq mm0, mm3; - - pmaddwd mm0, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm0, mm1 - psrad mm0,15; - packssdw mm2,mm0 - - paddw mm2,[yoff] - packuswb mm2,mm2 - - movd [ebx+esi], mm2 - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - psllq mm0,32 - - paddw mm6,mm0 - pmaddwd mm6,[add2w] - packssdw mm6,[thezero] -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - psllq mm3,32 - - paddw mm7,mm3 - pmaddwd mm7,[add2w] - packssdw mm7,[thezero] - -; 3eme quad 1ere ligne - movq mm3, [edx+16]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - -; Y - movq mm5, mm3; - - pmaddwd mm5, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm5, mm1 - psrad mm5,15; - - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - pmaddwd mm0,[add2w] - psllq mm0,32 - paddw mm6, mm0 - -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - pmaddwd mm3,[add2w] - psllq mm3,32 - paddw mm7, mm3 - -; 3eme quad 2eme ligne - movq mm3, [edx+edi+16]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - - -; Y - movq mm2, mm3; - - pmaddwd mm2, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm2, mm1 - psrad mm2,15; - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - pmaddwd mm0,[add2w] - psllq mm0,32 - paddw mm6, mm0 -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - pmaddwd mm3,[add2w] - psllq mm3,32 - paddw mm7, mm3 - -; 4eme quad 1ere ligne - movq mm3, [edx+24]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - -; Y - movq mm0, mm3; - - pmaddwd mm0, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm0, mm1 - psrad mm0,15; - packssdw mm5,mm0 - - paddw mm5,[yoff] - packuswb mm5,mm5 - - movd [ebx+4], mm5 - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - pmaddwd mm0,[add2w] - psllq mm0,48 - paddw mm6, mm0 - -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - pmaddwd mm3,[add2w] - psllq mm3,48 - paddw mm7, mm3 - -; 4eme quad 2eme line - movq mm3, [edx+edi+24]; - movq mm1,mm3; - psrlq mm3,8; - movq mm4,mm1; - psrlq mm1, 24; - pand mm4, [bmask]; - pand mm1, [grmask]; - por mm4, mm1; - punpcklbw mm3, [thezero]; - punpcklbw mm4, [thezero]; - - -; Y - movq mm0, mm3; - - pmaddwd mm0, [ygr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [yb0grcoff]; - - paddd mm0, mm1 - psrad mm0,15; - packssdw mm2,mm0 - - paddw mm2,[yoff] - packuswb mm2,mm2 - - movd [ebx+esi+4], mm2 - -; U - movq mm0, mm3; - - pmaddwd mm0, [ugr0bcoff]; - - movq mm1, mm4; - - pmaddwd mm1, [ub0grcoff]; - - paddd mm0, mm1; - psrad mm0,15; - - packssdw mm0,mm0 - pmaddwd mm0,[add2w] - psllq mm0,48 - paddw mm6, mm0 - - psraw mm6,2 - paddw mm6, [uvoff] - packuswb mm6,mm6 - movd [eax],mm6 -; V - - pmaddwd mm3, [vgr0bcoff]; - - - pmaddwd mm4, [vb0grcoff]; - - paddd mm3, mm4; - psrad mm3,15; - - packssdw mm3,mm3 - pmaddwd mm3,[add2w] - psllq mm3,48 - paddw mm7, mm3 - - psraw mm7,2 - paddw mm7, [uvoff] - packuswb mm7,mm7 - movd [ebp],mm7 - - - dec ecx - - cmp ecx,0 - -jz .fin_while - -; preparations pour les 4 quads suivantes - lea edx, [edx + 32]; - lea ebx, [ebx + 8]; - lea eax, [eax + 4]; - lea ebp, [ebp + 4]; - -jmp .while - -.fin_while: -emms - -pop esi -pop edi -pop edx -pop ecx -pop ebx -pop eax -pop ebp - -ret ;//The End