* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
-#define LD(adr) *(uint32_t*)(adr)
-
#define PIXOP2(OPNAME, OP) \
/*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
+ OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
+ OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
+ OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LD32(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(LD32(src1+4),LD32(src2+4)) ); \
- OP(LP(dst+8),rnd_avg32(LD32(src1+8),LD32(src2+8)) ); \
- OP(LP(dst+12),rnd_avg32(LD32(src1+12),LD32(src2+12)) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
+ OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
+ OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),no_rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),no_rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
- OP(LP(dst+8),rnd_avg32(LD32(src1+8),LP(src2+8)) ); \
- OP(LP(dst+12),rnd_avg32(LD32(src1+12),LP(src2+12)) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
+ OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do { /* onlye src2 aligned */\
- OP(LP(dst ),no_rnd_avg32(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),no_rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
{\
do {\
- OP(LP(dst ),rnd_avg32(LD32(src1 ),LP(src2 )) ); \
- OP(LP(dst+4),rnd_avg32(LD32(src1+4),LP(src2+4)) ); \
+ OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
+ OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
src1+=src_stride1; \
src2+=src_stride2; \
dst+=dst_stride; \
static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { \
uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
- UNPACK(a0,a1,LD32(src1),LP(src2)); \
+ UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+ UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { \
uint32_t a0,a1,a2,a3; \
- UNPACK(a0,a1,LD32(src1),LP(src2)); \
+ UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+ UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { /* src1 is unaligned */\
uint32_t a0,a1,a2,a3; \
- UNPACK(a0,a1,LD32(src1),LP(src2)); \
+ UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+ UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
+ UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
+ UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
do { \
uint32_t a0,a1,a2,a3; \
- UNPACK(a0,a1,LD32(src1),LP(src2)); \
+ UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
UNPACK(a2,a3,LP(src3),LP(src4)); \
OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+4),LP(src2+4)); \
+ UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+8),LP(src2+8)); \
+ UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
- UNPACK(a0,a1,LD32(src1+12),LP(src2+12)); \
+ UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
src1+=src_stride1;\
+ src[index+stride+1]* frac_x )* frac_y
+ r)>>(shift*2);
}else{
- index= src_x + clip(src_y, 0, height)*stride;
+ index= src_x + av_clip(src_y, 0, height)*stride;
dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
+ src[index +1]* frac_x )*s
+ r)>>(shift*2);
}
}else{
if((unsigned)src_y < height){
- index= clip(src_x, 0, width) + src_y*stride;
+ index= av_clip(src_x, 0, width) + src_y*stride;
dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
+ src[index+stride ]* frac_y )*s
+ r)>>(shift*2);
}else{
- index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
+ index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
dst[y*stride + x]= src[index ];
}
}
#undef op_avg
#undef op_put
-/* not yet optimized */
-static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
-{
- int i;
- for(i=0; i<h; i++)
- {
- ST32(dst , LD32(src ));
- dst+=dstStride;
- src+=srcStride;
- }
-}
-
-static inline void copy_block8(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
-{
- int i;
- for(i=0; i<h; i++)
- {
- ST32(dst , LD32(src ));
- ST32(dst+4 , LD32(src+4 ));
- dst+=dstStride;
- src+=srcStride;
- }
-}
-
-static inline void copy_block16(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
-{
- int i;
- for(i=0; i<h; i++)
- {
- ST32(dst , LD32(src ));
- ST32(dst+4 , LD32(src+4 ));
- ST32(dst+8 , LD32(src+8 ));
- ST32(dst+12, LD32(src+12));
- dst+=dstStride;
- src+=srcStride;
- }
-}
-
-static inline void copy_block17(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
-{
- int i;
- for(i=0; i<h; i++)
- {
- ST32(dst , LD32(src ));
- ST32(dst+4 , LD32(src+4 ));
- ST32(dst+8 , LD32(src+8 ));
- ST32(dst+12, LD32(src+12));
- dst[16]= src[16];
- dst+=dstStride;
- src+=srcStride;
- }
-}
-
-static inline void copy_block9(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
-{
- int i;
- for(i=0; i<h; i++)
- {
- ST32(dst , LD32(src ));
- ST32(dst+4 , LD32(src+4 ));
- dst[8]= src[8];
- dst+=dstStride;
- src+=srcStride;
- }
-}
-/* end not optimized */
-
#define QPEL_MC(r, OPNAME, RND, OP) \
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
do {\
uint8_t *s = src; \
int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
}\
\
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
int w=8;\
do{\
uint8_t *s = src, *d=dst;\
}\
\
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
do {\
uint8_t *s = src;\
int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
}\
\
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
int w=16;\
do {\
uint8_t *s = src, *d=dst;\
#if 1
#define H264_LOWPASS(OPNAME, OP, OP2) \
static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
do {\
int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
uint8_t *s = src-2;\
}\
\
static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
do{\
int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
uint8_t *s = src-2*srcStride,*d=dst;\
}\
\
static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
- uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
int i;\
src -= 2*srcStride;\
i= h+5; \
#endif
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
- uint8_t *cm = cropTbl + MAX_NEG_CROP;
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
do{
int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
}
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
- uint8_t *cm = cropTbl + MAX_NEG_CROP;
+ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
do{
int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;