2 ; TinyPTC x11 v0.7.3 MMX-Optimized pixelformat converters
3 ; Copyright (C) 2000-2002 Alessandro Gatti <a.gatti@tiscali.it>
4 ; Copyright (C) 2000-2001 Glenn Fiedler <gaffer@gaffer.org>
6 ; http://www.sourceforge.net/projects/tinyptc/
8 ; This library is free software; you can redistribute it and/or
9 ; modify it under the terms of the GNU Lesser General Public
10 ; License as published by the Free Software Foundation; either
11 ; version 2 of the License, or (at your option) any later version.
13 ; This library is distributed in the hope that it will be useful,
14 ; but WITHOUT ANY WARRANTY; without even the implied warranty of
15 ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 ; Lesser General Public License for more details.
18 ; You should have received a copy of the GNU Lesser General Public
19 ; License along with this library; if not, write to the Free Software
20 ; Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
31 %ifdef __PTC_MMX_CONVERT_32_TO_32_BGR888
32 global mmx_convert_32_to_32_bgr888
35 %ifdef __PTC_MMX_CONVERT_32_TO_24_RGB888
36 global mmx_convert_32_to_24_rgb888
39 %ifdef __PTC_MMX_CONVERT_32_TO_24_BGR888
40 global mmx_convert_32_to_24_bgr888
43 %ifdef __PTC_MMX_CONVERT_32_TO_16_RGB565
44 global mmx_convert_32_to_16_rgb565
47 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR565
48 global mmx_convert_32_to_16_bgr565
51 %ifdef __PTC_MMX_CONVERT_32_TO_16_RGB555
52 global mmx_convert_32_to_16_rgb555
55 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR555
56 global mmx_convert_32_to_16_bgr555
63 mmx_rgb888_mask dd 00ffffffh,00ffffffh
65 mmx_rgb565_b dd 000000f8h, 000000f8h
66 mmx_rgb565_g dd 0000fc00h, 0000fc00h
67 mmx_rgb565_r dd 00f80000h, 00f80000h
69 mmx_rgb555_rb dd 00f800f8h,00f800f8h
70 mmx_rgb555_g dd 0000f800h,0000f800h
71 mmx_rgb555_mul dd 20000008h,20000008h
72 mmx_bgr555_mul dd 00082000h,00082000h
87 mov edi,[ebp+8] ; destination
88 mov esi,[ebp+12] ; source
89 mov ecx,[ebp+16] ; bytes
134 %ifdef __PTC_MMX_CONVERT_32_TO_32_BGR888
138 mmx_convert_32_to_32_bgr888:
144 %ifdef __PTC_MMX_CONVERT_32_TO_24_RGB888
149 mmx_convert_32_to_24_rgb888:
156 mov edi,[ebp+8] ; destination
157 mov esi,[ebp+12] ; source
158 mov ecx,[ebp+16] ; bytes
160 ; set up mm6 as the mask, mm7 as zero
161 movq mm6, qword [mmx_rgb888_mask]
164 mov edx, ecx ; save ecx
165 and ecx, 0fffffffch ; clear lower two bits
171 movq mm0, [esi] ; A R G B a r g b
172 pand mm0, mm6 ; 0 R G B 0 r g b
173 movq mm1, [esi+8] ; A R G B a r g b
174 pand mm1, mm6 ; 0 R G B 0 r g b
176 movq mm2, mm0 ; 0 R G B 0 r g b
177 punpckhdq mm2, mm7 ; 0 0 0 0 0 R G B
178 punpckldq mm0, mm7 ; 0 0 0 0 0 r g b
179 psllq mm2, 24 ; 0 0 R G B 0 0 0
180 por mm0, mm2 ; 0 0 R G B r g b
182 movq mm3, mm1 ; 0 R G B 0 r g b
183 psllq mm3, 48 ; g b 0 0 0 0 0 0
184 por mm0, mm3 ; g b R G B r g b
186 movq mm4, mm1 ; 0 R G B 0 r g b
187 punpckhdq mm4, mm7 ; 0 0 0 0 0 R G B
188 punpckldq mm1, mm7 ; 0 0 0 0 0 r g b
189 psrlq mm1, 16 ; 0 0 0 R G B 0 r
190 psllq mm4, 8 ; 0 0 0 0 R G B 0
191 por mm1, mm4 ; 0 0 0 0 R G B r
226 %ifdef __PTC_MMX_CONVERT_32_TO_24_BGR888
230 mmx_convert_32_to_24_bgr888:
237 %ifdef __PTC_MMX_CONVERT_32_TO_16_RGB565
241 mmx_convert_32_to_16_rgb565:
248 mov edi,[ebp+8] ; destination
249 mov esi,[ebp+12] ; source
250 mov ecx,[ebp+16] ; bytes
253 movq mm5, [mmx_rgb565_b]
254 movq mm6, [mmx_rgb565_g]
255 movq mm7, [mmx_rgb565_r]
260 jmp .L2 ; not necessary at the moment, but doesn't hurt (much)
263 movq mm0, [esi] ; argb
269 pslld mm1, 2 ; 0 0 000000bb bbb00000
270 por mm0, mm1 ; 0 0 ggggggbb bbb00000
271 psrld mm0, 5 ; 0 0 00000ggg gggbbbbb
273 movq mm4, [esi+8] ; argb
279 pslld mm2, 2 ; 0 0 000000bb bbb00000
280 por mm4, mm2 ; 0 0 ggggggbb bbb00000
281 psrld mm4, 5 ; 0 0 00000ggg gggbbbbb
283 packuswb mm3, mm1 ; R 0 r 0
284 packssdw mm0, mm4 ; as above.. ish
302 and eax, 0F81Fh ; BYTE?
304 and ebx, 07E0h ; BYTE?
324 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR565
328 mmx_convert_32_to_16_bgr565:
335 mov edi,[ebp+8] ; destination
336 mov esi,[ebp+12] ; source
337 mov ecx,[ebp+16] ; bytes
339 movq mm5, [mmx_rgb565_r]
340 movq mm6, [mmx_rgb565_g]
341 movq mm7, [mmx_rgb565_b]
349 movq mm0, [esi] ; a r g b
350 movq mm1, mm0 ; a r g b
351 pand mm0, mm6 ; 0 0 g 0
352 movq mm3, mm1 ; a r g b
353 pand mm1, mm5 ; 0 r 0 0
354 pand mm3, mm7 ; 0 0 0 b
356 psllq mm3, 16 ; 0 b 0 0
357 psrld mm1, 14 ; 0 0 000000rr rrr00000
358 por mm0, mm1 ; 0 0 ggggggrr rrr00000
359 psrld mm0, 5 ; 0 0 00000ggg gggrrrrr
361 movq mm4, [esi+8] ; a r g b
362 movq mm2, mm4 ; a r g b
363 pand mm4, mm6 ; 0 0 g 0
364 movq mm1, mm2 ; a r g b
365 pand mm2, mm5 ; 0 r 0 0
366 pand mm1, mm7 ; 0 0 0 b
368 psllq mm1, 16 ; 0 b 0 0
369 psrld mm2, 14 ; 0 0 000000rr rrr00000
370 por mm4, mm2 ; 0 0 ggggggrr rrr00000
371 psrld mm4, 5 ; 0 0 00000ggg gggrrrrr
373 packuswb mm3, mm1 ; BBBBB000 00000000 bbbbb000 00000000
374 packssdw mm0, mm4 ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR
375 por mm0, mm3 ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
391 and eax, 0F81Fh ; BYTE ?
393 and ebx, 07E0h ; BYTE ?
413 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR555
417 mmx_convert_32_to_16_bgr555:
419 ; the 16BGR555 converter is identical to the RGB555 one,
420 ; except it uses a different multiplier for the pmaddwd
421 ; instruction. cool huh.
423 movq mm7, qword [mmx_bgr555_mul]
427 %ifdef __PTC_MMX_CONVERT_32_TO_16_RGB555
428 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR555
429 jmp _convert_bgr555_cheat
432 ; This is the same as the Intel version.. they obviously went to
433 ; much more trouble to expand/coil the loop than I did, so theirs
434 ; would almost certainly be faster, even if only a little.
435 ; I did rename 'mmx_rgb555_add' to 'mmx_rgb555_mul', which is
436 ; (I think) a more accurate name..
440 mmx_convert_32_to_16_rgb555:
442 movq mm7,qword [mmx_rgb555_mul]
446 %ifdef __PTC_MMX_CONVERT_32_TO_16_RGB555
447 %ifdef __PTC_MMX_CONVERT_32_TO_16_BGR555
449 _convert_bgr555_cheat:
451 movq mm6,qword [mmx_rgb555_g]
457 mov edi,[ebp+8] ; destination
458 mov esi,[ebp+12] ; source
459 mov ecx,[ebp+16] ; bytes
461 mov edx,ecx ; Save ecx
463 and ecx,BYTE 0fffffff8h ; clear lower three bits
474 pand mm3,qword [mmx_rgb555_rb]
477 pand mm1,qword [mmx_rgb555_rb]
496 pand mm0,qword [mmx_rgb555_rb]
502 pand mm3,qword [mmx_rgb555_rb]
523 pand mm3,qword [mmx_rgb555_rb]
526 pand mm1,qword [mmx_rgb555_rb]
555 and eax,BYTE 0000000000011111b
556 and edx, 0000001111100000b
562 and ebx, 0111110000000000b