-/*******************************************************************************
+/*****************************************************************************
* video_yuv_mmx.S: YUV transformation, optimized for MMX processors
- * (c)1999 VideoLAN
- *******************************************************************************
- * Following functions are defined:
+ *****************************************************************************
+ * Copyright (C) 1999, 2000 VideoLAN
+ *
+ * Authors:
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Following functions are defined:
* vout_YUV420_16_MMX
* This function performs YUV12-to-RGB16 color conversion for H26x.
* It handles any format in which there are three fields, the low
* Height may be any amount, but must be a multiple of two. The U
* and V planes may have a different pitch than the Y plane, subject
* to the same limitations.
- *******************************************************************************/
+ *****************************************************************************/
//.include iammx.inc
//.include locals.inc
.data
.align 16
-RGB_formats:
+RGB_formats:
.long RGB565
.long RGB555
.long RGB664
#define LocalFrameSize 156
#define RegisterStorageSize 16
+//#define DOUBLE /*double le nombre de colonnes */
+
/* Arguments: */
#define YPlane LocalFrameSize + RegisterStorageSize + 4
#define UPlane LocalFrameSize + RegisterStorageSize + 8
#define VPlane LocalFrameSize + RegisterStorageSize + 12
-#define FrameWidth LocalFrameSize + RegisterStorageSize + 16
+#define FrameWidth LocalFrameSize + RegisterStorageSize + 16
#define FrameHeight LocalFrameSize + RegisterStorageSize + 20
#define YPitch LocalFrameSize + RegisterStorageSize + 24
#define ChromaPitch LocalFrameSize + RegisterStorageSize + 28
*
* CCOffsetToLine0 is relative to ColorConvertedFrame.
* CCType used by RGB color convertors to determine the exact conversion type.
- * RGB565 = 0
+ * RGB565 = 0
* RGB555 = 1
* RGB664 = 2
* RGB655 = 3
*/
.globl ConvertYUV420RGB16MMX
-ConvertYUV420RGB16MMX:
+ConvertYUV420RGB16MMX:
pushl %esi
pushl %edi
jmp *RGB_formats(,%eax,4)
-RGB555:
+RGB555:
xorl %eax,%eax
movl $2,%ebx /* 10-8 for byte shift */
movl %ebx,RLeftShift(%esp)
movq %mm0,BUpperLimit(%esp)
jmp RGBEND
-RGB664:
+RGB664:
xorl %eax,%eax
movl $2,%ebx /* 8-6 */
movl %ebx,RLeftShift(%esp)
movq %mm0,BUpperLimit(%esp)
jmp RGBEND
-RGB655:
+RGB655:
xorl %eax,%eax
movl $2,%ebx /* 8-6 */
movl %ebx,RLeftShift(%esp)
movq %mm0,BUpperLimit(%esp)
jmp RGBEND
-RGB565:
+RGB565:
xorl %eax,%eax
movl $3,%ebx /* 8-5 */
movl %ebx,RLeftShift(%esp)
movq %mm0,GUpperLimit(%esp)
// jmp RGBEND
-RGBEND:
+RGBEND:
movl VPlane(%esp),%ebx
movl UPlane(%esp),%ecx
subl %ebx,%ecx
* Register Usage:
*/
-PrepareChromaLine:
+PrepareChromaLine:
movl AspectCount(%esp),%ebp
movl FrameWidth(%esp),%ebx
subl $2,%ebp
xorl %eax,%eax
addl AspectAdjustmentCount(%esp),%ebp
movl %eax,tmpCCOPitch(%esp)
-continue:
+continue:
movl %ebp,AspectCount(%esp)
-do_next_8x2_block:
+do_next_8x2_block:
movl tmpYCursorEven(%esp),%ebp
/* here is even line */
movd (%edx,%ebx,),%mm1 /* 4 u values */
psllw GLeftShift(%esp),%mm3 /* shift high G 5 positions */
por %mm3,%mm7 /* mm5: high RBG16 */
- movl tmpYCursorOdd(%esp),%ebp /* moved to here to save cycles
+ movl tmpYCursorOdd(%esp),%ebp /* moved to here to save cycles
before odd line */
movq %mm1,(%edi) /* !! aligned */
* mm3- temporary results
*/
- psllq RLeftShift(%esp),%mm0 /* position R in the most significant
+ psllq RLeftShift(%esp),%mm0 /* position R in the most significant
part of the byte */
movq %mm2,%mm7 /* mm7: Save B */
por %mm1,%mm2 /* mm2: low RBG16 */
psllw GLeftShift(%esp),%mm3 /* shift high G 5 positions */
por %mm3,%mm7 /* mm7: high RBG16 */
+#ifdef DOUBLE
+ movq %mm2,%mm1
+ movq %mm7,%mm5
+ movq %mm2,%mm0
+ movq %mm7,%mm3
+ punpckhwd %mm2,%mm1
+ punpckhwd %mm7,%mm5
+ punpcklwd %mm2,%mm0
+ punpcklwd %mm7,%mm3
+ movq %mm0,(%edi,%eax,)
+ movq %mm1,8(%edi,%eax,)
+ movq %mm3,16(%edi,%eax,)
+ movq %mm5,24(%edi,%eax,)
+ addl $32,%edi
+ addl $4,%ebx
+#endif
+#ifndef DOUBLE
movq %mm2,(%edi,%eax,)
movq %mm7,8(%edi,%eax,) /* aligned */
addl $16,%edi /* ih take 16 bytes (8 pixels-16 bit) */
addl $4,%ebx /* ? to take 4 pixels together
instead of 2 */
+#endif
jl do_next_8x2_block
addl CCOSkipDistance(%esp),%edi /* go to begin of next line */
/******************************************************************************/
-finish:
+finish:
emms
addl $LocalFrameSize,%esp