/*****************************************************************************
- * ppccommon.h: h264 encoder
+ * ppccommon.h: ppc utility macros
*****************************************************************************
- * Copyright (C) 2003 Eric Petit <eric.petit@lapsus.org>
+ * Copyright (C) 2003-2013 x264 project
+ *
+ * Authors: Eric Petit <eric.petit@lapsus.org>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
+ *
+ * This program is also available under a commercial proprietary license.
+ * For more information, contact us at licensing@x264.com.
*****************************************************************************/
-#ifdef SYS_LINUX
+#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#define vec_s32_t vector signed int
typedef union {
- unsigned int s[4];
- vector unsigned int v;
-} vect_int_u;
+ uint32_t s[4];
+ vec_u32_t v;
+} vec_u32_u;
typedef union {
- unsigned short s[8];
- vector unsigned short v;
-} vect_ushort_u;
+ uint16_t s[8];
+ vec_u16_t v;
+} vec_u16_u;
typedef union {
- signed short s[8];
- vector signed short v;
-} vect_sshort_u;
+ int16_t s[8];
+ vec_s16_t v;
+} vec_s16_u;
+
+typedef union {
+ uint8_t s[16];
+ vec_u8_t v;
+} vec_u8_u;
/***********************************************************************
* Null vector
#define vec_u16_to_u8(v) vec_pack( v, zero_u16v )
#define vec_s16_to_u8(v) vec_packsu( v, zero_s16v )
+
+/***********************************************************************
+ * 16 <-> 32 bits conversions
+ **********************************************************************/
+#define vec_u16_to_u32_h(v) (vec_u32_t) vec_mergeh( zero_u16v, (vec_u16_t) v )
+#define vec_u16_to_u32_l(v) (vec_u32_t) vec_mergel( zero_u16v, (vec_u16_t) v )
+#define vec_u16_to_s32_h(v) (vec_s32_t) vec_mergeh( zero_u16v, (vec_u16_t) v )
+#define vec_u16_to_s32_l(v) (vec_s32_t) vec_mergel( zero_u16v, (vec_u16_t) v )
+
+#define vec_u16_to_u32(v) vec_u16_to_u32_h(v)
+#define vec_u16_to_s32(v) vec_u16_to_s32_h(v)
+
+#define vec_u32_to_u16(v) vec_pack( v, zero_u32v )
+#define vec_s32_to_u16(v) vec_packsu( v, zero_s32v )
+
+
/***********************************************************************
* PREP_LOAD: declares two vectors required to perform unaligned loads
* VEC_LOAD: loads n bytes from u8 * p into vector v of type t where o is from original src offset
vec_u8_t _hv, _lv
#define PREP_LOAD_SRC( src ) \
- vec_u8_t _##src##_ = vec_lvsl(0, src)
+ vec_u8_t _##src##_ = vec_lvsl(0, src)
#define VEC_LOAD_G( p, v, n, t ) \
_hv = vec_ld( 0, p ); \
v = (t) vec_lvsl( 0, p ); \
_lv = vec_ld( n - 1, p ); \
- v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
+ v = (t) vec_perm( _hv, _lv, (vec_u8_t) v )
#define VEC_LOAD( p, v, n, t, g ) \
_hv = vec_ld( 0, p ); \
#define VEC_LOAD_PARTIAL( p, v, n, t, g) \
_hv = vec_ld( 0, p); \
v = (t) vec_perm( _hv, _hv, (vec_u8_t) _##g##_ )
-
+
/***********************************************************************
* PREP_STORE##n: declares required vectors to store n bytes to a
_lv = vec_perm( (vec_u8_t) v, _tmp1v, _##o##r_ ); \
vec_st( _lv, 15, (uint8_t *) p ); \
_hv = vec_perm( _tmp1v, (vec_u8_t) v, _##o##r_ ); \
- vec_st( _hv, 0, (uint8_t *) p )
+ vec_st( _hv, 0, (uint8_t *) p )
#define PREP_STORE8 \
p1 += i1; \
p2 += i2
+#define VEC_DIFF_H_OFFSET(p1,i1,p2,i2,n,d,g1,g2) \
+ pix1v = (vec_s16_t)vec_perm( vec_ld( 0, p1 ), zero_u8v, _##g1##_ );\
+ pix1v = vec_u8_to_s16( pix1v ); \
+ VEC_LOAD( p2, pix2v, n, vec_s16_t, g2); \
+ pix2v = vec_u8_to_s16( pix2v ); \
+ d = vec_sub( pix1v, pix2v ); \
+ p1 += i1; \
+ p2 += i2
+
+
/***********************************************************************
* VEC_DIFF_HL
***********************************************************************