3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of FFmpeg.
10 * FFmpeg is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * FFmpeg is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with FFmpeg; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
30 #define PIXOP2(OPNAME, OP) \
31 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
32 const uint8_t *src1, \
33 const uint8_t *src2, \
41 for (i = 0; i < h; i++) { \
43 a = AV_RN32(&src1[i * src_stride1]); \
44 b = AV_RN32(&src2[i * src_stride2]); \
45 OP(*((uint32_t *) &dst[i * dst_stride]), \
46 no_rnd_avg32(a, b)); \
47 a = AV_RN32(&src1[i * src_stride1 + 4]); \
48 b = AV_RN32(&src2[i * src_stride2 + 4]); \
49 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
50 no_rnd_avg32(a, b)); \
54 static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \
55 const uint8_t *src1, \
56 const uint8_t *src2, \
62 OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \
63 src_stride1, src_stride2, h); \
64 OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \
67 dst_stride, src_stride1, \
71 static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \
72 const uint8_t *src1, \
73 const uint8_t *src2, \
74 const uint8_t *src3, \
75 const uint8_t *src4, \
83 /* FIXME HIGH BIT DEPTH */ \
86 for (i = 0; i < h; i++) { \
87 uint32_t a, b, c, d, l0, l1, h0, h1; \
88 a = AV_RN32(&src1[i * src_stride1]); \
89 b = AV_RN32(&src2[i * src_stride2]); \
90 c = AV_RN32(&src3[i * src_stride3]); \
91 d = AV_RN32(&src4[i * src_stride4]); \
92 l0 = (a & 0x03030303UL) + \
93 (b & 0x03030303UL) + \
95 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
96 ((b & 0xFCFCFCFCUL) >> 2); \
97 l1 = (c & 0x03030303UL) + \
99 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
100 ((d & 0xFCFCFCFCUL) >> 2); \
101 OP(*((uint32_t *) &dst[i * dst_stride]), \
102 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
103 a = AV_RN32(&src1[i * src_stride1 + 4]); \
104 b = AV_RN32(&src2[i * src_stride2 + 4]); \
105 c = AV_RN32(&src3[i * src_stride3 + 4]); \
106 d = AV_RN32(&src4[i * src_stride4 + 4]); \
107 l0 = (a & 0x03030303UL) + \
108 (b & 0x03030303UL) + \
110 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
111 ((b & 0xFCFCFCFCUL) >> 2); \
112 l1 = (c & 0x03030303UL) + \
113 (d & 0x03030303UL); \
114 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
115 ((d & 0xFCFCFCFCUL) >> 2); \
116 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
117 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
121 static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \
122 const uint8_t *src1, \
123 const uint8_t *src2, \
124 const uint8_t *src3, \
125 const uint8_t *src4, \
133 /* FIXME HIGH BIT DEPTH */ \
136 for (i = 0; i < h; i++) { \
137 uint32_t a, b, c, d, l0, l1, h0, h1; \
138 a = AV_RN32(&src1[i * src_stride1]); \
139 b = AV_RN32(&src2[i * src_stride2]); \
140 c = AV_RN32(&src3[i * src_stride3]); \
141 d = AV_RN32(&src4[i * src_stride4]); \
142 l0 = (a & 0x03030303UL) + \
143 (b & 0x03030303UL) + \
145 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
146 ((b & 0xFCFCFCFCUL) >> 2); \
147 l1 = (c & 0x03030303UL) + \
148 (d & 0x03030303UL); \
149 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
150 ((d & 0xFCFCFCFCUL) >> 2); \
151 OP(*((uint32_t *) &dst[i * dst_stride]), \
152 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
153 a = AV_RN32(&src1[i * src_stride1 + 4]); \
154 b = AV_RN32(&src2[i * src_stride2 + 4]); \
155 c = AV_RN32(&src3[i * src_stride3 + 4]); \
156 d = AV_RN32(&src4[i * src_stride4 + 4]); \
157 l0 = (a & 0x03030303UL) + \
158 (b & 0x03030303UL) + \
160 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
161 ((b & 0xFCFCFCFCUL) >> 2); \
162 l1 = (c & 0x03030303UL) + \
163 (d & 0x03030303UL); \
164 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
165 ((d & 0xFCFCFCFCUL) >> 2); \
166 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
167 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
171 static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \
172 const uint8_t *src1, \
173 const uint8_t *src2, \
174 const uint8_t *src3, \
175 const uint8_t *src4, \
183 OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \
184 src_stride1, src_stride2, src_stride3, \
186 OPNAME ## _pixels8_l4_8(dst + 8, \
187 src1 + 8, src2 + 8, \
188 src3 + 8, src4 + 8, \
189 dst_stride, src_stride1, src_stride2, \
190 src_stride3, src_stride4, h); \
193 static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \
194 const uint8_t *src1, \
195 const uint8_t *src2, \
196 const uint8_t *src3, \
197 const uint8_t *src4, \
205 OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \
206 dst_stride, src_stride1, \
207 src_stride2, src_stride3, \
209 OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \
210 src1 + 8, src2 + 8, \
211 src3 + 8, src4 + 8, \
212 dst_stride, src_stride1, \
213 src_stride2, src_stride3, \
217 #define op_avg(a, b) a = rnd_avg32(a, b)
218 #define op_put(a, b) a = b
219 #define put_no_rnd_pixels8_8_c put_pixels8_8_c