2 * This is optimized for sh, which have post increment addressing (*p++).
3 * Some CPU may be index (p[n]) faster than post increment (*p++).
5 * copyright (c) 2001-2003 BERO <bero@geocities.co.jp>
7 * This file is part of FFmpeg.
9 * FFmpeg is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
14 * FFmpeg is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with FFmpeg; if not, write to the Free Software
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #define PIXOP2(OPNAME, OP) \
25 /*static inline void OPNAME ## _no_rnd_pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
28 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
29 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
36 static inline void OPNAME ## _pixels8_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
39 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
40 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
47 static inline void OPNAME ## _pixels4_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
50 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
57 static inline void OPNAME ## _no_rnd_pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
60 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
61 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
62 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
63 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
70 static inline void OPNAME ## _pixels16_l2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
73 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),AV_RN32(src2 )) ); \
74 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),AV_RN32(src2+4)) ); \
75 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),AV_RN32(src2+8)) ); \
76 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),AV_RN32(src2+12)) ); \
83 static inline void OPNAME ## _pixels4_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
86 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
93 static inline void OPNAME ## _pixels4_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
96 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
103 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
106 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
107 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
108 OP(LP(dst+8),no_rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
109 OP(LP(dst+12),no_rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
116 static inline void OPNAME ## _pixels16_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
119 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
120 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
121 OP(LP(dst+8),rnd_avg32(AV_RN32(src1+8),LP(src2+8)) ); \
122 OP(LP(dst+12),rnd_avg32(AV_RN32(src1+12),LP(src2+12)) ); \
129 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
131 do { /* onlye src2 aligned */\
132 OP(LP(dst ),no_rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
133 OP(LP(dst+4),no_rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
140 static inline void OPNAME ## _pixels8_l2_aligned2(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
143 OP(LP(dst ),rnd_avg32(AV_RN32(src1 ),LP(src2 )) ); \
144 OP(LP(dst+4),rnd_avg32(AV_RN32(src1+4),LP(src2+4)) ); \
151 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
154 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
155 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
162 static inline void OPNAME ## _pixels8_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
165 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
166 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
173 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
176 OP(LP(dst ),no_rnd_avg32(LP(src1 ),LP(src2 )) ); \
177 OP(LP(dst+4),no_rnd_avg32(LP(src1+4),LP(src2+4)) ); \
178 OP(LP(dst+8),no_rnd_avg32(LP(src1+8),LP(src2+8)) ); \
179 OP(LP(dst+12),no_rnd_avg32(LP(src1+12),LP(src2+12)) ); \
186 static inline void OPNAME ## _pixels16_l2_aligned(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
189 OP(LP(dst ),rnd_avg32(LP(src1 ),LP(src2 )) ); \
190 OP(LP(dst+4),rnd_avg32(LP(src1+4),LP(src2+4)) ); \
191 OP(LP(dst+8),rnd_avg32(LP(src1+8),LP(src2+8)) ); \
192 OP(LP(dst+12),rnd_avg32(LP(src1+12),LP(src2+12)) ); \
199 static inline void OPNAME ## _no_rnd_pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
200 { OPNAME ## _no_rnd_pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
202 static inline void OPNAME ## _pixels16_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
203 { OPNAME ## _pixels16_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
205 static inline void OPNAME ## _no_rnd_pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
206 { OPNAME ## _no_rnd_pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
208 static inline void OPNAME ## _pixels8_l2_aligned1(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int dst_stride, int src_stride1, int src_stride2, int h) \
209 { OPNAME ## _pixels8_l2_aligned2(dst,src2,src1,dst_stride,src_stride2,src_stride1,h); } \
211 static inline void OPNAME ## _pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
213 uint32_t a0,a1,a2,a3; \
214 UNPACK(a0,a1,LP(src1),LP(src2)); \
215 UNPACK(a2,a3,LP(src3),LP(src4)); \
216 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
217 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
218 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
219 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
228 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
230 uint32_t a0,a1,a2,a3; \
231 UNPACK(a0,a1,LP(src1),LP(src2)); \
232 UNPACK(a2,a3,LP(src3),LP(src4)); \
233 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
234 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
235 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
236 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
245 static inline void OPNAME ## _pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
247 uint32_t a0,a1,a2,a3; /* src1 only not aligned */\
248 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
249 UNPACK(a2,a3,LP(src3),LP(src4)); \
250 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
251 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
252 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
253 OP(LP(dst+4),rnd_PACK(a0,a1,a2,a3)); \
262 static inline void OPNAME ## _no_rnd_pixels8_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
264 uint32_t a0,a1,a2,a3; \
265 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
266 UNPACK(a2,a3,LP(src3),LP(src4)); \
267 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
268 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
269 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
270 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
279 static inline void OPNAME ## _pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
281 uint32_t a0,a1,a2,a3; \
282 UNPACK(a0,a1,LP(src1),LP(src2)); \
283 UNPACK(a2,a3,LP(src3),LP(src4)); \
284 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
285 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
286 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
287 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
288 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
289 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
290 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
291 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
292 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
293 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
302 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
304 uint32_t a0,a1,a2,a3; \
305 UNPACK(a0,a1,LP(src1),LP(src2)); \
306 UNPACK(a2,a3,LP(src3),LP(src4)); \
307 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
308 UNPACK(a0,a1,LP(src1+4),LP(src2+4)); \
309 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
310 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
311 UNPACK(a0,a1,LP(src1+8),LP(src2+8)); \
312 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
313 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
314 UNPACK(a0,a1,LP(src1+12),LP(src2+12)); \
315 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
316 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
325 static inline void OPNAME ## _pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
326 do { /* src1 is unaligned */\
327 uint32_t a0,a1,a2,a3; \
328 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
329 UNPACK(a2,a3,LP(src3),LP(src4)); \
330 OP(LP(dst),rnd_PACK(a0,a1,a2,a3)); \
331 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
332 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
333 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
334 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
335 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
336 OP(LP(dst+8),rnd_PACK(a0,a1,a2,a3)); \
337 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
338 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
339 OP(LP(dst+12),rnd_PACK(a0,a1,a2,a3)); \
348 static inline void OPNAME ## _no_rnd_pixels16_l4_aligned0(uint8_t *dst, const uint8_t *src1, uint8_t *src2, uint8_t *src3, uint8_t *src4,int dst_stride, int src_stride1, int src_stride2,int src_stride3,int src_stride4, int h){\
350 uint32_t a0,a1,a2,a3; \
351 UNPACK(a0,a1,AV_RN32(src1),LP(src2)); \
352 UNPACK(a2,a3,LP(src3),LP(src4)); \
353 OP(LP(dst),no_rnd_PACK(a0,a1,a2,a3)); \
354 UNPACK(a0,a1,AV_RN32(src1+4),LP(src2+4)); \
355 UNPACK(a2,a3,LP(src3+4),LP(src4+4)); \
356 OP(LP(dst+4),no_rnd_PACK(a0,a1,a2,a3)); \
357 UNPACK(a0,a1,AV_RN32(src1+8),LP(src2+8)); \
358 UNPACK(a2,a3,LP(src3+8),LP(src4+8)); \
359 OP(LP(dst+8),no_rnd_PACK(a0,a1,a2,a3)); \
360 UNPACK(a0,a1,AV_RN32(src1+12),LP(src2+12)); \
361 UNPACK(a2,a3,LP(src3+12),LP(src4+12)); \
362 OP(LP(dst+12),no_rnd_PACK(a0,a1,a2,a3)); \
372 #define op_avg(a, b) a = rnd_avg32(a,b)
373 #define op_put(a, b) a = b
380 #define avg2(a,b) ((a+b+1)>>1)
381 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
384 static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
386 const int A=(16-x16)*(16-y16);
387 const int B=( x16)*(16-y16);
388 const int C=(16-x16)*( y16);
389 const int D=( x16)*( y16);
394 uint8_t *s1 = src+stride;
395 t0 = *s0++; t2 = *s1++;
396 t1 = *s0++; t3 = *s1++;
397 dst[0]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
398 t0 = *s0++; t2 = *s1++;
399 dst[1]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
400 t1 = *s0++; t3 = *s1++;
401 dst[2]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
402 t0 = *s0++; t2 = *s1++;
403 dst[3]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
404 t1 = *s0++; t3 = *s1++;
405 dst[4]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
406 t0 = *s0++; t2 = *s1++;
407 dst[5]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
408 t1 = *s0++; t3 = *s1++;
409 dst[6]= (A*t0 + B*t1 + C*t2 + D*t3 + rounder)>>8;
410 t0 = *s0++; t2 = *s1++;
411 dst[7]= (A*t1 + B*t0 + C*t3 + D*t2 + rounder)>>8;
417 static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
418 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
421 const int s= 1<<shift;
431 for(x=0; x<8; x++){ //XXX FIXME optimize
432 int src_x, src_y, frac_x, frac_y, index;
441 if((unsigned)src_x < width){
442 if((unsigned)src_y < height){
443 index= src_x + src_y*stride;
444 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
445 + src[index +1]* frac_x )*(s-frac_y)
446 + ( src[index+stride ]*(s-frac_x)
447 + src[index+stride+1]* frac_x )* frac_y
450 index= src_x + av_clip(src_y, 0, height)*stride;
451 dst[y*stride + x]= ( ( src[index ]*(s-frac_x)
452 + src[index +1]* frac_x )*s
456 if((unsigned)src_y < height){
457 index= av_clip(src_x, 0, width) + src_y*stride;
458 dst[y*stride + x]= ( ( src[index ]*(s-frac_y)
459 + src[index+stride ]* frac_y )*s
462 index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
463 dst[y*stride + x]= src[index ];
474 #define H264_CHROMA_MC(OPNAME, OP)\
475 static void OPNAME ## h264_chroma_mc2_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
476 const int A=(8-x)*(8-y);\
477 const int B=( x)*(8-y);\
478 const int C=(8-x)*( y);\
479 const int D=( x)*( y);\
481 assert(x<8 && y<8 && x>=0 && y>=0);\
486 uint8_t *s1 = src+stride; \
487 t0 = *s0++; t2 = *s1++; \
488 t1 = *s0++; t3 = *s1++; \
489 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
490 t0 = *s0++; t2 = *s1++; \
491 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
497 static void OPNAME ## h264_chroma_mc4_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
498 const int A=(8-x)*(8-y);\
499 const int B=( x)*(8-y);\
500 const int C=(8-x)*( y);\
501 const int D=( x)*( y);\
503 assert(x<8 && y<8 && x>=0 && y>=0);\
508 uint8_t *s1 = src+stride; \
509 t0 = *s0++; t2 = *s1++; \
510 t1 = *s0++; t3 = *s1++; \
511 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
512 t0 = *s0++; t2 = *s1++; \
513 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
514 t1 = *s0++; t3 = *s1++; \
515 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
516 t0 = *s0++; t2 = *s1++; \
517 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
523 static void OPNAME ## h264_chroma_mc8_sh4(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){\
524 const int A=(8-x)*(8-y);\
525 const int B=( x)*(8-y);\
526 const int C=(8-x)*( y);\
527 const int D=( x)*( y);\
529 assert(x<8 && y<8 && x>=0 && y>=0);\
534 uint8_t *s1 = src+stride; \
535 t0 = *s0++; t2 = *s1++; \
536 t1 = *s0++; t3 = *s1++; \
537 OP(dst[0], (A*t0 + B*t1 + C*t2 + D*t3));\
538 t0 = *s0++; t2 = *s1++; \
539 OP(dst[1], (A*t1 + B*t0 + C*t3 + D*t2));\
540 t1 = *s0++; t3 = *s1++; \
541 OP(dst[2], (A*t0 + B*t1 + C*t2 + D*t3));\
542 t0 = *s0++; t2 = *s1++; \
543 OP(dst[3], (A*t1 + B*t0 + C*t3 + D*t2));\
544 t1 = *s0++; t3 = *s1++; \
545 OP(dst[4], (A*t0 + B*t1 + C*t2 + D*t3));\
546 t0 = *s0++; t2 = *s1++; \
547 OP(dst[5], (A*t1 + B*t0 + C*t3 + D*t2));\
548 t1 = *s0++; t3 = *s1++; \
549 OP(dst[6], (A*t0 + B*t1 + C*t2 + D*t3));\
550 t0 = *s0++; t2 = *s1++; \
551 OP(dst[7], (A*t1 + B*t0 + C*t3 + D*t2));\
557 #define op_avg(a, b) a = (((a)+(((b) + 32)>>6)+1)>>1)
558 #define op_put(a, b) a = (((b) + 32)>>6)
560 H264_CHROMA_MC(put_ , op_put)
561 H264_CHROMA_MC(avg_ , op_avg)
565 #define QPEL_MC(r, OPNAME, RND, OP) \
566 static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
567 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
570 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
576 OP(dst[0], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
578 OP(dst[1], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
580 OP(dst[2], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
582 OP(dst[3], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
584 OP(dst[4], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
585 OP(dst[5], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
586 OP(dst[6], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
587 OP(dst[7], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
593 static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
594 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
597 uint8_t *s = src, *d=dst;\
598 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
599 src0 = *s; s+=srcStride; \
600 src1 = *s; s+=srcStride; \
601 src2 = *s; s+=srcStride; \
602 src3 = *s; s+=srcStride; \
603 src4 = *s; s+=srcStride; \
604 OP(*d, (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));d+=dstStride;\
605 src5 = *s; s+=srcStride; \
606 OP(*d, (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));d+=dstStride;\
607 src6 = *s; s+=srcStride; \
608 OP(*d, (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));d+=dstStride;\
609 src7 = *s; s+=srcStride; \
610 OP(*d, (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));d+=dstStride;\
612 OP(*d, (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));d+=dstStride;\
613 OP(*d, (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));d+=dstStride;\
614 OP(*d, (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));d+=dstStride;\
615 OP(*d, (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
621 static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
622 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
625 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
626 int src9,src10,src11,src12,src13,src14,src15,src16;\
632 OP(dst[ 0], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
634 OP(dst[ 1], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
636 OP(dst[ 2], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
638 OP(dst[ 3], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
640 OP(dst[ 4], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
642 OP(dst[ 5], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
644 OP(dst[ 6], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
646 OP(dst[ 7], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
648 OP(dst[ 8], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
650 OP(dst[ 9], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
652 OP(dst[10], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
654 OP(dst[11], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
656 OP(dst[12], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
657 OP(dst[13], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
658 OP(dst[14], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
659 OP(dst[15], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
665 static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
666 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
669 uint8_t *s = src, *d=dst;\
670 int src0,src1,src2,src3,src4,src5,src6,src7,src8;\
671 int src9,src10,src11,src12,src13,src14,src15,src16;\
672 src0 = *s; s+=srcStride; \
673 src1 = *s; s+=srcStride; \
674 src2 = *s; s+=srcStride; \
675 src3 = *s; s+=srcStride; \
676 src4 = *s; s+=srcStride; \
677 OP(*d, (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));d+=dstStride;\
678 src5 = *s; s+=srcStride; \
679 OP(*d, (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));d+=dstStride;\
680 src6 = *s; s+=srcStride; \
681 OP(*d, (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));d+=dstStride;\
682 src7 = *s; s+=srcStride; \
683 OP(*d, (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));d+=dstStride;\
684 src8 = *s; s+=srcStride; \
685 OP(*d, (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));d+=dstStride;\
686 src9 = *s; s+=srcStride; \
687 OP(*d, (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));d+=dstStride;\
688 src10 = *s; s+=srcStride; \
689 OP(*d, (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));d+=dstStride;\
690 src11 = *s; s+=srcStride; \
691 OP(*d, (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));d+=dstStride;\
692 src12 = *s; s+=srcStride; \
693 OP(*d, (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));d+=dstStride;\
694 src13 = *s; s+=srcStride; \
695 OP(*d, (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));d+=dstStride;\
696 src14 = *s; s+=srcStride; \
697 OP(*d, (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));d+=dstStride;\
698 src15 = *s; s+=srcStride; \
699 OP(*d, (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));d+=dstStride;\
701 OP(*d, (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));d+=dstStride;\
702 OP(*d, (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));d+=dstStride;\
703 OP(*d, (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));d+=dstStride;\
704 OP(*d, (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
710 static void OPNAME ## qpel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
711 OPNAME ## pixels8_c(dst, src, stride, 8);\
714 static void OPNAME ## qpel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
716 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
717 OPNAME ## pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);\
720 static void OPNAME ## qpel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
721 OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
724 static void OPNAME ## qpel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
726 put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
727 OPNAME ## pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);\
730 static void OPNAME ## qpel8_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
733 copy_block9(full, src, 16, stride, 9);\
734 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
735 OPNAME ## pixels8_l2_aligned(dst, full, half, stride, 16, 8, 8);\
738 static void OPNAME ## qpel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
740 copy_block9(full, src, 16, stride, 9);\
741 OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
744 static void OPNAME ## qpel8_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
747 copy_block9(full, src, 16, stride, 9);\
748 put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
749 OPNAME ## pixels8_l2_aligned(dst, full+16, half, stride, 16, 8, 8);\
751 static void OPNAME ## qpel8_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
755 copy_block9(full, src, 16, stride, 9);\
756 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
757 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
758 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
759 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
761 static void OPNAME ## qpel8_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
765 copy_block9(full, src, 16, stride, 9);\
766 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
767 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
768 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
769 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
771 static void OPNAME ## qpel8_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
775 copy_block9(full, src, 16, stride, 9);\
776 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
777 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
778 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
779 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
781 static void OPNAME ## qpel8_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
785 copy_block9(full, src, 16, stride, 9);\
786 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
787 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
788 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
789 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
791 static void OPNAME ## qpel8_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
794 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
795 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
796 OPNAME ## pixels8_l2_aligned(dst, halfH, halfHV, stride, 8, 8, 8);\
798 static void OPNAME ## qpel8_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
801 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
802 put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
803 OPNAME ## pixels8_l2_aligned(dst, halfH+8, halfHV, stride, 8, 8, 8);\
805 static void OPNAME ## qpel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
808 copy_block9(full, src, 16, stride, 9);\
809 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
810 put ## RND ## pixels8_l2_aligned(halfH, halfH, full, 8, 8, 16, 9);\
811 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
813 static void OPNAME ## qpel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
816 copy_block9(full, src, 16, stride, 9);\
817 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
818 put ## RND ## pixels8_l2_aligned1(halfH, halfH, full+1, 8, 8, 16, 9);\
819 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
821 static void OPNAME ## qpel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
823 put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
824 OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
826 static void OPNAME ## qpel16_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
827 OPNAME ## pixels16_c(dst, src, stride, 16);\
830 static void OPNAME ## qpel16_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
832 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
833 OPNAME ## pixels16_l2_aligned2(dst, src, half, stride, stride, 16, 16);\
836 static void OPNAME ## qpel16_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
837 OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
840 static void OPNAME ## qpel16_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
842 put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
843 OPNAME ## pixels16_l2_aligned2(dst, src+1, half, stride, stride, 16, 16);\
846 static void OPNAME ## qpel16_mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
847 uint8_t full[24*17];\
849 copy_block17(full, src, 24, stride, 17);\
850 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
851 OPNAME ## pixels16_l2_aligned(dst, full, half, stride, 24, 16, 16);\
854 static void OPNAME ## qpel16_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
855 uint8_t full[24*17];\
856 copy_block17(full, src, 24, stride, 17);\
857 OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
860 static void OPNAME ## qpel16_mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
861 uint8_t full[24*17];\
863 copy_block17(full, src, 24, stride, 17);\
864 put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
865 OPNAME ## pixels16_l2_aligned(dst, full+24, half, stride, 24, 16, 16);\
867 static void OPNAME ## qpel16_mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
868 uint8_t full[24*17];\
870 uint8_t halfHV[256];\
871 copy_block17(full, src, 24, stride, 17);\
872 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
873 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
874 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
875 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
877 static void OPNAME ## qpel16_mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
878 uint8_t full[24*17];\
880 uint8_t halfHV[256];\
881 copy_block17(full, src, 24, stride, 17);\
882 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
883 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
884 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
885 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
887 static void OPNAME ## qpel16_mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
888 uint8_t full[24*17];\
890 uint8_t halfHV[256];\
891 copy_block17(full, src, 24, stride, 17);\
892 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
893 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
894 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
895 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
897 static void OPNAME ## qpel16_mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
898 uint8_t full[24*17];\
900 uint8_t halfHV[256];\
901 copy_block17(full, src, 24, stride, 17);\
902 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
903 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
904 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
905 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
907 static void OPNAME ## qpel16_mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
909 uint8_t halfHV[256];\
910 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
911 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
912 OPNAME ## pixels16_l2_aligned(dst, halfH, halfHV, stride, 16, 16, 16);\
914 static void OPNAME ## qpel16_mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
916 uint8_t halfHV[256];\
917 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
918 put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
919 OPNAME ## pixels16_l2_aligned(dst, halfH+16, halfHV, stride, 16, 16, 16);\
921 static void OPNAME ## qpel16_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
922 uint8_t full[24*17];\
924 copy_block17(full, src, 24, stride, 17);\
925 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
926 put ## RND ## pixels16_l2_aligned(halfH, halfH, full, 16, 16, 24, 17);\
927 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
929 static void OPNAME ## qpel16_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
930 uint8_t full[24*17];\
932 copy_block17(full, src, 24, stride, 17);\
933 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
934 put ## RND ## pixels16_l2_aligned1(halfH, halfH, full+1, 16, 16, 24, 17);\
935 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
937 static void OPNAME ## qpel16_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
939 put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
940 OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
943 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
944 #define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
945 #define op_put(a, b) a = cm[((b) + 16)>>5]
946 #define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
948 QPEL_MC(0, put_ , _ , op_put)
949 QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
950 QPEL_MC(0, avg_ , _ , op_avg)
951 //QPEL_MC(1, avg_no_rnd , _ , op_avg)
958 #define H264_LOWPASS(OPNAME, OP, OP2) \
959 static inline void OPNAME ## h264_qpel_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
960 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
962 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
970 OP(dst[0], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
972 OP(dst[1], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
974 OP(dst[2], (src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
976 OP(dst[3], (src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
977 if (w>4) { /* it optimized */ \
978 int src7,src8,src9,src10; \
980 OP(dst[4], (src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
982 OP(dst[5], (src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
984 OP(dst[6], (src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
986 OP(dst[7], (src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
988 int src11,src12,src13,src14,src15,src16,src17,src18; \
990 OP(dst[8] , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
992 OP(dst[9] , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
994 OP(dst[10], (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
996 OP(dst[11], (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
998 OP(dst[12], (src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
1000 OP(dst[13], (src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
1002 OP(dst[14], (src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
1004 OP(dst[15], (src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
1012 static inline void OPNAME ## h264_qpel_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride,int w,int h){\
1013 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1015 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
1016 uint8_t *s = src-2*srcStride,*d=dst;\
1017 srcB = *s; s+=srcStride;\
1018 srcA = *s; s+=srcStride;\
1019 src0 = *s; s+=srcStride;\
1020 src1 = *s; s+=srcStride;\
1021 src2 = *s; s+=srcStride;\
1022 src3 = *s; s+=srcStride;\
1023 OP(*d, (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));d+=dstStride;\
1024 src4 = *s; s+=srcStride;\
1025 OP(*d, (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));d+=dstStride;\
1026 src5 = *s; s+=srcStride;\
1027 OP(*d, (src2+src3)*20 - (src1+src4)*5 + (src0+src5));d+=dstStride;\
1028 src6 = *s; s+=srcStride;\
1029 OP(*d, (src3+src4)*20 - (src2+src5)*5 + (src1+src6));d+=dstStride;\
1031 int src7,src8,src9,src10; \
1032 src7 = *s; s+=srcStride;\
1033 OP(*d, (src4+src5)*20 - (src3+src6)*5 + (src2+src7));d+=dstStride;\
1034 src8 = *s; s+=srcStride;\
1035 OP(*d, (src5+src6)*20 - (src4+src7)*5 + (src3+src8));d+=dstStride;\
1036 src9 = *s; s+=srcStride;\
1037 OP(*d, (src6+src7)*20 - (src5+src8)*5 + (src4+src9));d+=dstStride;\
1038 src10 = *s; s+=srcStride;\
1039 OP(*d, (src7+src8)*20 - (src6+src9)*5 + (src5+src10));d+=dstStride;\
1041 int src11,src12,src13,src14,src15,src16,src17,src18; \
1042 src11 = *s; s+=srcStride;\
1043 OP(*d , (src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));d+=dstStride;\
1044 src12 = *s; s+=srcStride;\
1045 OP(*d , (src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));d+=dstStride;\
1046 src13 = *s; s+=srcStride;\
1047 OP(*d, (src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));d+=dstStride;\
1048 src14 = *s; s+=srcStride;\
1049 OP(*d, (src11+src12)*20 - (src10+src13)*5 + (src9 +src14));d+=dstStride;\
1050 src15 = *s; s+=srcStride;\
1051 OP(*d, (src12+src13)*20 - (src11+src14)*5 + (src10+src15));d+=dstStride;\
1052 src16 = *s; s+=srcStride;\
1053 OP(*d, (src13+src14)*20 - (src12+src15)*5 + (src11+src16));d+=dstStride;\
1054 src17 = *s; s+=srcStride;\
1055 OP(*d, (src14+src15)*20 - (src13+src16)*5 + (src12+src17));d+=dstStride;\
1056 src18 = *s; s+=srcStride;\
1057 OP(*d, (src15+src16)*20 - (src14+src17)*5 + (src13+src18));d+=dstStride;\
1065 static inline void OPNAME ## h264_qpel_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride,int w,int h){\
1066 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
1068 src -= 2*srcStride;\
1071 int srcB,srcA,src0,src1,src2,src3,src4,src5,src6;\
1072 uint8_t *s = src-2;\
1079 tmp[0] = ((src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
1081 tmp[1] = ((src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
1083 tmp[2] = ((src2+src3)*20 - (src1+src4)*5 + (src0+src5));\
1085 tmp[3] = ((src3+src4)*20 - (src2+src5)*5 + (src1+src6));\
1086 if (w>4) { /* it optimized */ \
1087 int src7,src8,src9,src10; \
1089 tmp[4] = ((src4+src5)*20 - (src3+src6)*5 + (src2+src7));\
1091 tmp[5] = ((src5+src6)*20 - (src4+src7)*5 + (src3+src8));\
1093 tmp[6] = ((src6+src7)*20 - (src5+src8)*5 + (src4+src9));\
1095 tmp[7] = ((src7+src8)*20 - (src6+src9)*5 + (src5+src10));\
1097 int src11,src12,src13,src14,src15,src16,src17,src18; \
1099 tmp[8] = ((src8 +src9 )*20 - (src7 +src10)*5 + (src6 +src11));\
1101 tmp[9] = ((src9 +src10)*20 - (src8 +src11)*5 + (src7 +src12));\
1103 tmp[10] = ((src10+src11)*20 - (src9 +src12)*5 + (src8 +src13));\
1105 tmp[11] = ((src11+src12)*20 - (src10+src13)*5 + (src9 +src14));\
1107 tmp[12] = ((src12+src13)*20 - (src11+src14)*5 + (src10+src15));\
1109 tmp[13] = ((src13+src14)*20 - (src12+src15)*5 + (src11+src16));\
1111 tmp[14] = ((src14+src15)*20 - (src13+src16)*5 + (src12+src17));\
1113 tmp[15] = ((src15+src16)*20 - (src14+src17)*5 + (src13+src18));\
1119 tmp -= tmpStride*(h+5-2);\
1122 int tmpB,tmpA,tmp0,tmp1,tmp2,tmp3,tmp4,tmp5,tmp6;\
1123 int16_t *s = tmp-2*tmpStride; \
1125 tmpB = *s; s+=tmpStride;\
1126 tmpA = *s; s+=tmpStride;\
1127 tmp0 = *s; s+=tmpStride;\
1128 tmp1 = *s; s+=tmpStride;\
1129 tmp2 = *s; s+=tmpStride;\
1130 tmp3 = *s; s+=tmpStride;\
1131 OP2(*d, (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));d+=dstStride;\
1132 tmp4 = *s; s+=tmpStride;\
1133 OP2(*d, (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));d+=dstStride;\
1134 tmp5 = *s; s+=tmpStride;\
1135 OP2(*d, (tmp2+tmp3)*20 - (tmp1+tmp4)*5 + (tmp0+tmp5));d+=dstStride;\
1136 tmp6 = *s; s+=tmpStride;\
1137 OP2(*d, (tmp3+tmp4)*20 - (tmp2+tmp5)*5 + (tmp1+tmp6));d+=dstStride;\
1139 int tmp7,tmp8,tmp9,tmp10; \
1140 tmp7 = *s; s+=tmpStride;\
1141 OP2(*d, (tmp4+tmp5)*20 - (tmp3+tmp6)*5 + (tmp2+tmp7));d+=dstStride;\
1142 tmp8 = *s; s+=tmpStride;\
1143 OP2(*d, (tmp5+tmp6)*20 - (tmp4+tmp7)*5 + (tmp3+tmp8));d+=dstStride;\
1144 tmp9 = *s; s+=tmpStride;\
1145 OP2(*d, (tmp6+tmp7)*20 - (tmp5+tmp8)*5 + (tmp4+tmp9));d+=dstStride;\
1146 tmp10 = *s; s+=tmpStride;\
1147 OP2(*d, (tmp7+tmp8)*20 - (tmp6+tmp9)*5 + (tmp5+tmp10));d+=dstStride;\
1149 int tmp11,tmp12,tmp13,tmp14,tmp15,tmp16,tmp17,tmp18; \
1150 tmp11 = *s; s+=tmpStride;\
1151 OP2(*d , (tmp8 +tmp9 )*20 - (tmp7 +tmp10)*5 + (tmp6 +tmp11));d+=dstStride;\
1152 tmp12 = *s; s+=tmpStride;\
1153 OP2(*d , (tmp9 +tmp10)*20 - (tmp8 +tmp11)*5 + (tmp7 +tmp12));d+=dstStride;\
1154 tmp13 = *s; s+=tmpStride;\
1155 OP2(*d, (tmp10+tmp11)*20 - (tmp9 +tmp12)*5 + (tmp8 +tmp13));d+=dstStride;\
1156 tmp14 = *s; s+=tmpStride;\
1157 OP2(*d, (tmp11+tmp12)*20 - (tmp10+tmp13)*5 + (tmp9 +tmp14));d+=dstStride;\
1158 tmp15 = *s; s+=tmpStride;\
1159 OP2(*d, (tmp12+tmp13)*20 - (tmp11+tmp14)*5 + (tmp10+tmp15));d+=dstStride;\
1160 tmp16 = *s; s+=tmpStride;\
1161 OP2(*d, (tmp13+tmp14)*20 - (tmp12+tmp15)*5 + (tmp11+tmp16));d+=dstStride;\
1162 tmp17 = *s; s+=tmpStride;\
1163 OP2(*d, (tmp14+tmp15)*20 - (tmp13+tmp16)*5 + (tmp12+tmp17));d+=dstStride;\
1164 tmp18 = *s; s+=tmpStride;\
1165 OP2(*d, (tmp15+tmp16)*20 - (tmp14+tmp17)*5 + (tmp13+tmp18));d+=dstStride;\
1173 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1174 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,4,4); \
1176 static void OPNAME ## h264_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1177 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,8,8); \
1179 static void OPNAME ## h264_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1180 OPNAME ## h264_qpel_h_lowpass(dst,src,dstStride,srcStride,16,16); \
1183 static void OPNAME ## h264_qpel4_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1184 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,4,4); \
1186 static void OPNAME ## h264_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1187 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,8,8); \
1189 static void OPNAME ## h264_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
1190 OPNAME ## h264_qpel_v_lowpass(dst,src,dstStride,srcStride,16,16); \
1192 static void OPNAME ## h264_qpel4_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1193 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,4,4); \
1195 static void OPNAME ## h264_qpel8_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1196 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,8,8); \
1198 static void OPNAME ## h264_qpel16_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
1199 OPNAME ## h264_qpel_hv_lowpass(dst,tmp,src,dstStride,tmpStride,srcStride,16,16); \
1202 #define H264_MC(OPNAME, SIZE) \
1203 static void OPNAME ## h264_qpel ## SIZE ## _mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){\
1204 OPNAME ## pixels ## SIZE ## _c(dst, src, stride, SIZE);\
1207 static void OPNAME ## h264_qpel ## SIZE ## _mc10_sh4(uint8_t *dst, uint8_t *src, int stride){\
1208 uint8_t half[SIZE*SIZE];\
1209 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1210 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src, half, stride, stride, SIZE, SIZE);\
1213 static void OPNAME ## h264_qpel ## SIZE ## _mc20_sh4(uint8_t *dst, uint8_t *src, int stride){\
1214 OPNAME ## h264_qpel ## SIZE ## _h_lowpass(dst, src, stride, stride);\
1217 static void OPNAME ## h264_qpel ## SIZE ## _mc30_sh4(uint8_t *dst, uint8_t *src, int stride){\
1218 uint8_t half[SIZE*SIZE];\
1219 put_h264_qpel ## SIZE ## _h_lowpass(half, src, SIZE, stride);\
1220 OPNAME ## pixels ## SIZE ## _l2_aligned2(dst, src+1, half, stride, stride, SIZE, SIZE);\
1223 static void OPNAME ## h264_qpel ## SIZE ## _mc01_sh4(uint8_t *dst, uint8_t *src, int stride){\
1224 uint8_t full[SIZE*(SIZE+5)];\
1225 uint8_t * const full_mid= full + SIZE*2;\
1226 uint8_t half[SIZE*SIZE];\
1227 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1228 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1229 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid, half, stride, SIZE, SIZE, SIZE);\
1232 static void OPNAME ## h264_qpel ## SIZE ## _mc02_sh4(uint8_t *dst, uint8_t *src, int stride){\
1233 uint8_t full[SIZE*(SIZE+5)];\
1234 uint8_t * const full_mid= full + SIZE*2;\
1235 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1236 OPNAME ## h264_qpel ## SIZE ## _v_lowpass(dst, full_mid, stride, SIZE);\
1239 static void OPNAME ## h264_qpel ## SIZE ## _mc03_sh4(uint8_t *dst, uint8_t *src, int stride){\
1240 uint8_t full[SIZE*(SIZE+5)];\
1241 uint8_t * const full_mid= full + SIZE*2;\
1242 uint8_t half[SIZE*SIZE];\
1243 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1244 put_h264_qpel ## SIZE ## _v_lowpass(half, full_mid, SIZE, SIZE);\
1245 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, full_mid+SIZE, half, stride, SIZE, SIZE, SIZE);\
1248 static void OPNAME ## h264_qpel ## SIZE ## _mc11_sh4(uint8_t *dst, uint8_t *src, int stride){\
1249 uint8_t full[SIZE*(SIZE+5)];\
1250 uint8_t * const full_mid= full + SIZE*2;\
1251 uint8_t halfH[SIZE*SIZE];\
1252 uint8_t halfV[SIZE*SIZE];\
1253 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1254 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1255 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1256 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1259 static void OPNAME ## h264_qpel ## SIZE ## _mc31_sh4(uint8_t *dst, uint8_t *src, int stride){\
1260 uint8_t full[SIZE*(SIZE+5)];\
1261 uint8_t * const full_mid= full + SIZE*2;\
1262 uint8_t halfH[SIZE*SIZE];\
1263 uint8_t halfV[SIZE*SIZE];\
1264 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1265 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1266 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1267 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1270 static void OPNAME ## h264_qpel ## SIZE ## _mc13_sh4(uint8_t *dst, uint8_t *src, int stride){\
1271 uint8_t full[SIZE*(SIZE+5)];\
1272 uint8_t * const full_mid= full + SIZE*2;\
1273 uint8_t halfH[SIZE*SIZE];\
1274 uint8_t halfV[SIZE*SIZE];\
1275 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1276 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1277 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1278 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1281 static void OPNAME ## h264_qpel ## SIZE ## _mc33_sh4(uint8_t *dst, uint8_t *src, int stride){\
1282 uint8_t full[SIZE*(SIZE+5)];\
1283 uint8_t * const full_mid= full + SIZE*2;\
1284 uint8_t halfH[SIZE*SIZE];\
1285 uint8_t halfV[SIZE*SIZE];\
1286 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1287 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1288 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1289 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfV, stride, SIZE, SIZE, SIZE);\
1292 static void OPNAME ## h264_qpel ## SIZE ## _mc22_sh4(uint8_t *dst, uint8_t *src, int stride){\
1293 int16_t tmp[SIZE*(SIZE+5)];\
1294 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass(dst, tmp, src, stride, SIZE, stride);\
1297 static void OPNAME ## h264_qpel ## SIZE ## _mc21_sh4(uint8_t *dst, uint8_t *src, int stride){\
1298 int16_t tmp[SIZE*(SIZE+5)];\
1299 uint8_t halfH[SIZE*SIZE];\
1300 uint8_t halfHV[SIZE*SIZE];\
1301 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src, SIZE, stride);\
1302 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1303 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1306 static void OPNAME ## h264_qpel ## SIZE ## _mc23_sh4(uint8_t *dst, uint8_t *src, int stride){\
1307 int16_t tmp[SIZE*(SIZE+5)];\
1308 uint8_t halfH[SIZE*SIZE];\
1309 uint8_t halfHV[SIZE*SIZE];\
1310 put_h264_qpel ## SIZE ## _h_lowpass(halfH, src + stride, SIZE, stride);\
1311 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1312 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfH, halfHV, stride, SIZE, SIZE, SIZE);\
1315 static void OPNAME ## h264_qpel ## SIZE ## _mc12_sh4(uint8_t *dst, uint8_t *src, int stride){\
1316 uint8_t full[SIZE*(SIZE+5)];\
1317 uint8_t * const full_mid= full + SIZE*2;\
1318 int16_t tmp[SIZE*(SIZE+5)];\
1319 uint8_t halfV[SIZE*SIZE];\
1320 uint8_t halfHV[SIZE*SIZE];\
1321 copy_block ## SIZE (full, src - stride*2, SIZE, stride, SIZE + 5);\
1322 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1323 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1324 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1327 static void OPNAME ## h264_qpel ## SIZE ## _mc32_sh4(uint8_t *dst, uint8_t *src, int stride){\
1328 uint8_t full[SIZE*(SIZE+5)];\
1329 uint8_t * const full_mid= full + SIZE*2;\
1330 int16_t tmp[SIZE*(SIZE+5)];\
1331 uint8_t halfV[SIZE*SIZE];\
1332 uint8_t halfHV[SIZE*SIZE];\
1333 copy_block ## SIZE (full, src - stride*2 + 1, SIZE, stride, SIZE + 5);\
1334 put_h264_qpel ## SIZE ## _v_lowpass(halfV, full_mid, SIZE, SIZE);\
1335 put_h264_qpel ## SIZE ## _hv_lowpass(halfHV, tmp, src, SIZE, SIZE, stride);\
1336 OPNAME ## pixels ## SIZE ## _l2_aligned(dst, halfV, halfHV, stride, SIZE, SIZE, SIZE);\
1339 #define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1340 //#define op_avg2(a, b) a = (((a)*w1+cm[((b) + 16)>>5]*w2 + o + 64)>>7)
1341 #define op_put(a, b) a = cm[((b) + 16)>>5]
1342 #define op2_avg(a, b) a = (((a)+cm[((b) + 512)>>10]+1)>>1)
1343 #define op2_put(a, b) a = cm[((b) + 512)>>10]
1345 H264_LOWPASS(put_ , op_put, op2_put)
1346 H264_LOWPASS(avg_ , op_avg, op2_avg)
1360 static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1361 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1364 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
1370 dst[0]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1372 dst[1]= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4];
1374 dst[2]= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4];
1376 dst[3]= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4];
1378 dst[4]= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4];
1380 dst[5]= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4];
1382 dst[6]= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4];
1384 dst[7]= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4];
1390 static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1391 uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1394 int src_1,src0,src1,src2,src3,src4,src5,src6,src7,src8,src9;
1395 uint8_t *s = src,*d = dst;
1396 src_1 = *(s-srcStride);
1397 src0 = *s; s+=srcStride;
1398 src1 = *s; s+=srcStride;
1399 src2 = *s; s+=srcStride;
1400 *d= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4]; d+=dstStride;
1401 src3 = *s; s+=srcStride;
1402 *d= cm[(9*(src1 + src2) - (src0 + src3) + 8)>>4]; d+=dstStride;
1403 src4 = *s; s+=srcStride;
1404 *d= cm[(9*(src2 + src3) - (src1 + src4) + 8)>>4]; d+=dstStride;
1405 src5 = *s; s+=srcStride;
1406 *d= cm[(9*(src3 + src4) - (src2 + src5) + 8)>>4]; d+=dstStride;
1407 src6 = *s; s+=srcStride;
1408 *d= cm[(9*(src4 + src5) - (src3 + src6) + 8)>>4]; d+=dstStride;
1409 src7 = *s; s+=srcStride;
1410 *d= cm[(9*(src5 + src6) - (src4 + src7) + 8)>>4]; d+=dstStride;
1411 src8 = *s; s+=srcStride;
1412 *d= cm[(9*(src6 + src7) - (src5 + src8) + 8)>>4]; d+=dstStride;
1414 *d= cm[(9*(src7 + src8) - (src6 + src9) + 8)>>4]; d+=dstStride;
1420 static void put_mspel8_mc00_sh4 (uint8_t *dst, uint8_t *src, int stride){
1421 put_pixels8_c(dst, src, stride, 8);
1424 static void put_mspel8_mc10_sh4(uint8_t *dst, uint8_t *src, int stride){
1426 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1427 put_pixels8_l2_aligned2(dst, src, half, stride, stride, 8, 8);
1430 static void put_mspel8_mc20_sh4(uint8_t *dst, uint8_t *src, int stride){
1431 wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1434 static void put_mspel8_mc30_sh4(uint8_t *dst, uint8_t *src, int stride){
1436 wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1437 put_pixels8_l2_aligned2(dst, src+1, half, stride, stride, 8, 8);
1440 static void put_mspel8_mc02_sh4(uint8_t *dst, uint8_t *src, int stride){
1441 wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1444 static void put_mspel8_mc12_sh4(uint8_t *dst, uint8_t *src, int stride){
1448 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1449 wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1450 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1451 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
1453 static void put_mspel8_mc32_sh4(uint8_t *dst, uint8_t *src, int stride){
1457 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1458 wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1459 wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1460 put_pixels8_l2_aligned(dst, halfV, halfHV, stride, 8, 8, 8);
1462 static void put_mspel8_mc22_sh4(uint8_t *dst, uint8_t *src, int stride){
1464 wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1465 wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);