3 * Copyright (c) 2000, 2001 Fabrice Bellard
4 * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
6 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
8 * This file is part of Libav.
10 * Libav is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
15 * Libav is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with Libav; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
32 /* draw the edges of width 'w' of an image of size width, height */
33 // FIXME: Check that this is OK for MPEG-4 interlaced.
34 static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
35 int w, int h, int sides)
37 uint8_t *ptr = buf, *last_line;
41 for (i = 0; i < height; i++) {
42 memset(ptr - w, ptr[0], w);
43 memset(ptr + width, ptr[width - 1], w);
47 /* top and bottom + corners */
49 last_line = buf + (height - 1) * wrap;
51 for (i = 0; i < h; i++)
53 memcpy(buf - (i + 1) * wrap, buf, width + w + w);
54 if (sides & EDGE_BOTTOM)
55 for (i = 0; i < h; i++)
57 memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
60 static void clear_block_8_c(int16_t *block)
62 memset(block, 0, sizeof(int16_t) * 64);
65 static void clear_blocks_8_c(int16_t *blocks)
67 memset(blocks, 0, sizeof(int16_t) * 6 * 64);
70 #define PIXOP2(OPNAME, OP) \
71 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst, \
72 const uint8_t *src1, \
73 const uint8_t *src2, \
81 for (i = 0; i < h; i++) { \
83 a = AV_RN32(&src1[i * src_stride1]); \
84 b = AV_RN32(&src2[i * src_stride2]); \
85 OP(*((uint32_t *) &dst[i * dst_stride]), \
86 no_rnd_avg32(a, b)); \
87 a = AV_RN32(&src1[i * src_stride1 + 4]); \
88 b = AV_RN32(&src2[i * src_stride2 + 4]); \
89 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
90 no_rnd_avg32(a, b)); \
94 static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst, \
95 const uint8_t *src1, \
96 const uint8_t *src2, \
102 OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride, \
103 src_stride1, src_stride2, h); \
104 OPNAME ## _no_rnd_pixels8_l2_8(dst + 8, \
107 dst_stride, src_stride1, \
111 static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst, \
112 const uint8_t *src1, \
113 const uint8_t *src2, \
114 const uint8_t *src3, \
115 const uint8_t *src4, \
123 /* FIXME HIGH BIT DEPTH */ \
126 for (i = 0; i < h; i++) { \
127 uint32_t a, b, c, d, l0, l1, h0, h1; \
128 a = AV_RN32(&src1[i * src_stride1]); \
129 b = AV_RN32(&src2[i * src_stride2]); \
130 c = AV_RN32(&src3[i * src_stride3]); \
131 d = AV_RN32(&src4[i * src_stride4]); \
132 l0 = (a & 0x03030303UL) + \
133 (b & 0x03030303UL) + \
135 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
136 ((b & 0xFCFCFCFCUL) >> 2); \
137 l1 = (c & 0x03030303UL) + \
138 (d & 0x03030303UL); \
139 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
140 ((d & 0xFCFCFCFCUL) >> 2); \
141 OP(*((uint32_t *) &dst[i * dst_stride]), \
142 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
143 a = AV_RN32(&src1[i * src_stride1 + 4]); \
144 b = AV_RN32(&src2[i * src_stride2 + 4]); \
145 c = AV_RN32(&src3[i * src_stride3 + 4]); \
146 d = AV_RN32(&src4[i * src_stride4 + 4]); \
147 l0 = (a & 0x03030303UL) + \
148 (b & 0x03030303UL) + \
150 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
151 ((b & 0xFCFCFCFCUL) >> 2); \
152 l1 = (c & 0x03030303UL) + \
153 (d & 0x03030303UL); \
154 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
155 ((d & 0xFCFCFCFCUL) >> 2); \
156 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
157 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
161 static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst, \
162 const uint8_t *src1, \
163 const uint8_t *src2, \
164 const uint8_t *src3, \
165 const uint8_t *src4, \
173 /* FIXME HIGH BIT DEPTH */ \
176 for (i = 0; i < h; i++) { \
177 uint32_t a, b, c, d, l0, l1, h0, h1; \
178 a = AV_RN32(&src1[i * src_stride1]); \
179 b = AV_RN32(&src2[i * src_stride2]); \
180 c = AV_RN32(&src3[i * src_stride3]); \
181 d = AV_RN32(&src4[i * src_stride4]); \
182 l0 = (a & 0x03030303UL) + \
183 (b & 0x03030303UL) + \
185 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
186 ((b & 0xFCFCFCFCUL) >> 2); \
187 l1 = (c & 0x03030303UL) + \
188 (d & 0x03030303UL); \
189 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
190 ((d & 0xFCFCFCFCUL) >> 2); \
191 OP(*((uint32_t *) &dst[i * dst_stride]), \
192 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
193 a = AV_RN32(&src1[i * src_stride1 + 4]); \
194 b = AV_RN32(&src2[i * src_stride2 + 4]); \
195 c = AV_RN32(&src3[i * src_stride3 + 4]); \
196 d = AV_RN32(&src4[i * src_stride4 + 4]); \
197 l0 = (a & 0x03030303UL) + \
198 (b & 0x03030303UL) + \
200 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
201 ((b & 0xFCFCFCFCUL) >> 2); \
202 l1 = (c & 0x03030303UL) + \
203 (d & 0x03030303UL); \
204 h1 = ((c & 0xFCFCFCFCUL) >> 2) + \
205 ((d & 0xFCFCFCFCUL) >> 2); \
206 OP(*((uint32_t *) &dst[i * dst_stride + 4]), \
207 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
211 static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst, \
212 const uint8_t *src1, \
213 const uint8_t *src2, \
214 const uint8_t *src3, \
215 const uint8_t *src4, \
223 OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride, \
224 src_stride1, src_stride2, src_stride3, \
226 OPNAME ## _pixels8_l4_8(dst + 8, \
227 src1 + 8, src2 + 8, \
228 src3 + 8, src4 + 8, \
229 dst_stride, src_stride1, src_stride2, \
230 src_stride3, src_stride4, h); \
233 static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst, \
234 const uint8_t *src1, \
235 const uint8_t *src2, \
236 const uint8_t *src3, \
237 const uint8_t *src4, \
245 OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4, \
246 dst_stride, src_stride1, \
247 src_stride2, src_stride3, \
249 OPNAME ## _no_rnd_pixels8_l4_8(dst + 8, \
250 src1 + 8, src2 + 8, \
251 src3 + 8, src4 + 8, \
252 dst_stride, src_stride1, \
253 src_stride2, src_stride3, \
257 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block, \
258 const uint8_t *pixels, \
259 ptrdiff_t line_size, \
262 /* FIXME HIGH BIT DEPTH */ \
265 for (j = 0; j < 2; j++) { \
267 const uint32_t a = AV_RN32(pixels); \
268 const uint32_t b = AV_RN32(pixels + 1); \
269 uint32_t l0 = (a & 0x03030303UL) + \
270 (b & 0x03030303UL) + \
272 uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
273 ((b & 0xFCFCFCFCUL) >> 2); \
276 pixels += line_size; \
277 for (i = 0; i < h; i += 2) { \
278 uint32_t a = AV_RN32(pixels); \
279 uint32_t b = AV_RN32(pixels + 1); \
280 l1 = (a & 0x03030303UL) + \
281 (b & 0x03030303UL); \
282 h1 = ((a & 0xFCFCFCFCUL) >> 2) + \
283 ((b & 0xFCFCFCFCUL) >> 2); \
284 OP(*((uint32_t *) block), \
285 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
286 pixels += line_size; \
287 block += line_size; \
288 a = AV_RN32(pixels); \
289 b = AV_RN32(pixels + 1); \
290 l0 = (a & 0x03030303UL) + \
291 (b & 0x03030303UL) + \
293 h0 = ((a & 0xFCFCFCFCUL) >> 2) + \
294 ((b & 0xFCFCFCFCUL) >> 2); \
295 OP(*((uint32_t *) block), \
296 h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL)); \
297 pixels += line_size; \
298 block += line_size; \
300 pixels += 4 - line_size * (h + 1); \
301 block += 4 - line_size * h; \
305 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c, \
306 OPNAME ## _pixels8_xy2_8_c, \
309 #define op_avg(a, b) a = rnd_avg32(a, b)
310 #define op_put(a, b) a = b
311 #define put_no_rnd_pixels8_8_c put_pixels8_8_c