2 ===================================================================
3 --- include/mpeg2.h (révision 1193)
4 +++ include/mpeg2.h (copie de travail)
6 #define MPEG2_ACCEL_SPARC_VIS 1
7 #define MPEG2_ACCEL_SPARC_VIS2 2
8 #define MPEG2_ACCEL_ARM 1
9 +#define MPEG2_ACCEL_ARM_NEON 2
10 #define MPEG2_ACCEL_DETECT 0x80000000
12 uint32_t mpeg2_accel (uint32_t accel);
13 Index: libmpeg2/motion_comp_neon.c
14 ===================================================================
15 --- libmpeg2/motion_comp_neon.c (révision 0)
16 +++ libmpeg2/motion_comp_neon.c (révision 0)
19 + * motion_comp_neon.c
20 + * Copyright (C) 2009 Rémi Denis-Courmont
22 + * This file is part of mpeg2dec, a free MPEG-2 video stream decoder.
23 + * See http://libmpeg2.sourceforge.net/ for updates.
25 + * mpeg2dec is free software; you can redistribute it and/or modify
26 + * it under the terms of the GNU General Public License as published by
27 + * the Free Software Foundation; either version 2 of the License, or
28 + * (at your option) any later version.
30 + * mpeg2dec is distributed in the hope that it will be useful,
31 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
32 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 + * GNU General Public License for more details.
35 + * You should have received a copy of the GNU General Public License along
36 + * with mpeg2dec; if not, write to the Free Software Foundation, Inc.,
37 + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
42 +#if defined(ARCH_ARM_NEON)
48 +#include "attributes.h"
49 +#include "mpeg2_internal.h"
52 +static void MC_put_o_16_neon (uint8_t * dest, const uint8_t * ref,
53 + const int stride, int height)
56 + memcpy (dest, ref, 16);
62 +static void MC_put_o_8_neon (uint8_t * dest, const uint8_t * ref,
63 + const int stride, int height)
66 + memcpy (dest, ref, 8);
72 +/* dest = (src1 + src2 + 1) / 2 */
73 +static void MC_avg_1_16_neon (uint8_t * dest, const uint8_t * src1,
74 + const uint8_t * src2,
75 + const int stride, unsigned height)
79 + "vld1.u8 {q0}, [%[src1]]\n"
80 + "vld1.u8 {q1}, [%[src2]]\n"
81 + "vrhadd.u8 q0, q0, q1\n"
82 + /* XXX: three cycles stall */
83 + "vst1.u8 {q0}, [%[dest]]\n"
85 + : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
86 + : "memory", "q0", "q1");
93 +static void MC_avg_1_8_neon (uint8_t * dest, const uint8_t * src1,
94 + const uint8_t * src2,
95 + const int stride, unsigned height)
99 + "vld1.u8 {d0}, [%[src1]]\n"
100 + "vld1.u8 {d1}, [%[src2]]\n"
101 + "vrhadd.u8 d0, d0, d1\n"
102 + "vst1.u8 {d0}, [%[dest]]\n"
104 + : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
110 + } while (--height);
113 +/* dest = (dest + ((src1 + src2 + 1) / 2) + 1) / 2 */
114 +static void MC_avg_2_16_neon (uint8_t * dest, const uint8_t * src1,
115 + const uint8_t * src2,
116 + const int stride, unsigned height)
120 + "vld1.u8 {q0}, [%[src1]]\n"
121 + "vld1.u8 {q1}, [%[src2]]\n"
122 + "vrhadd.u8 q0, q0, q1\n"
123 + "vld1.u8 {q2}, [%[dest]]\n"
124 + /* XXX: one cycle stall */
125 + "vrhadd.u8 q0, q0, q2\n"
126 + /* XXX: three cycles stall */
127 + "vst1.u8 {q0}, [%[dest]]\n"
129 + : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
130 + : "memory", "q0", "q1", "q2");
134 + } while (--height);
137 +static void MC_avg_2_8_neon (uint8_t * dest, const uint8_t * src1,
138 + const uint8_t * src2,
139 + const int stride, unsigned height)
143 + "vld1.u8 {d0}, [%[src1]]\n"
144 + "vld1.u8 {d1}, [%[src2]]\n"
145 + "vrhadd.u8 d0, d0, d1\n"
146 + "vld1.u8 {d2}, [%[dest]]\n"
147 + "vrhadd.u8 d0, d0, d2\n"
148 + "vst1.u8 {d0}, [%[dest]]\n"
150 + : [dest]"r"(dest), [src1]"r"(src1), [src2]"r"(src2)
151 + : "memory", "q0", "d2");
155 + } while (--height);
158 +static void MC_avg_o_16_neon (uint8_t * dest, const uint8_t * ref,
159 + const int stride, int height)
161 + MC_avg_1_16_neon (dest, dest, ref, stride, height);
164 +static void MC_avg_o_8_neon (uint8_t * dest, const uint8_t * ref,
165 + const int stride, int height)
167 + MC_avg_1_8_neon (dest, dest, ref, stride, height);
170 +static void MC_put_x_16_neon (uint8_t * dest, const uint8_t * ref,
171 + const int stride, int height)
173 + MC_avg_1_16_neon (dest, ref, ref + 1, stride, height);
176 +static void MC_put_x_8_neon (uint8_t * dest, const uint8_t * ref,
177 + const int stride, int height)
179 + MC_avg_1_8_neon (dest, ref, ref + 1, stride, height);
182 +static void MC_avg_x_16_neon (uint8_t * dest, const uint8_t * ref,
183 + const int stride, int height)
185 + MC_avg_2_16_neon (dest, ref, ref + 1, stride, height);
188 +static void MC_avg_x_8_neon (uint8_t * dest, const uint8_t * ref,
189 + const int stride, int height)
191 + MC_avg_2_8_neon (dest, ref, ref + 1, stride, height);
194 +static void MC_put_y_16_neon (uint8_t * dest, const uint8_t * ref,
195 + const int stride, int height)
197 + MC_avg_1_16_neon (dest, ref, ref + stride, stride, height);
199 +static void MC_put_y_8_neon (uint8_t * dest, const uint8_t * ref,
200 + const int stride, int height)
202 + MC_avg_1_8_neon (dest, ref, ref + stride, stride, height);
205 +static void MC_avg_y_16_neon (uint8_t * dest, const uint8_t * ref,
206 + const int stride, int height)
208 + MC_avg_2_16_neon (dest, ref, ref + stride, stride, height);
211 +static void MC_avg_y_8_neon (uint8_t * dest, const uint8_t * ref,
212 + const int stride, int height)
214 + MC_avg_2_8_neon (dest, ref, ref + stride, stride, height);
217 +static void MC_put_xy_16_neon (uint8_t * dest, const uint8_t * ref,
218 + const int stride, int height)
222 + "vld1.u8 {q0}, [%[ref]]\n"
223 + "vld1.u8 {q1}, [%[refx]]\n"
224 + "vrhadd.u8 q0, q0, q1\n"
225 + "vld1.u8 {q2}, [%[refy]]\n"
226 + "vld1.u8 {q3}, [%[refxy]]\n"
227 + "vrhadd.u8 q2, q2, q3\n"
228 + /* XXX: three cycles stall */
229 + "vrhadd.u8 q0, q0, q2\n"
230 + /* XXX: three cycles stall */
231 + "vst1.u8 {q0}, [%[dest]]\n"
233 + : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
234 + [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
235 + : "memory", "q0", "q1", "q2", "q3");
238 + } while (--height);
241 +static void MC_put_xy_8_neon (uint8_t * dest, const uint8_t * ref,
242 + const int stride, int height)
246 + "vld1.u8 {d0}, [%[ref]]\n"
247 + "vld1.u8 {d1}, [%[refx]]\n"
248 + "vrhadd.u8 d0, d0, d1\n"
249 + "vld1.u8 {d2}, [%[refy]]\n"
250 + "vld1.u8 {d3}, [%[refxy]]\n"
251 + "vrhadd.u8 d2, d2, d3\n"
252 + /* XXX: three cycles stall */
253 + "vrhadd.u8 d0, d0, d2\n"
254 + /* XXX: three cycles stall */
255 + "vst1.u8 {d0}, [%[dest]]\n"
257 + : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
258 + [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
259 + : "memory", "q0", "q1");
262 + } while (--height);
265 +static void MC_avg_xy_16_neon (uint8_t * dest, const uint8_t * ref,
266 + const int stride, int height)
270 + "vld1.u8 {q0}, [%[ref]]\n"
271 + "vld1.u8 {q1}, [%[refx]]\n"
272 + "vrhadd.u8 q0, q0, q1\n"
273 + "vld1.u8 {q2}, [%[refy]]\n"
274 + "vld1.u8 {q3}, [%[refxy]]\n"
275 + "vrhadd.u8 q2, q2, q3\n"
276 + "vld1.u8 {q4}, [%[dest]]\n"
277 + /* XXX: one cycle stall */
278 + "vrhadd.u8 q0, q0, q2\n"
279 + /* XXX: three cycles stall */
280 + "vrhadd.u8 q0, q4, q0\n"
281 + "vst1.u8 {q0}, [%[dest]]\n"
283 + : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
284 + [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
285 + : "memory", "q0", "q1", "q2", "q3", "q4");
288 + } while (--height);
291 +static void MC_avg_xy_8_neon (uint8_t * dest, const uint8_t * ref,
292 + const int stride, int height)
296 + "vld1.u8 {d0}, [%[ref]]\n"
297 + "vld1.u8 {d1}, [%[refx]]\n"
298 + "vrhadd.u8 d0, d0, d1\n"
299 + "vld1.u8 {d2}, [%[refy]]\n"
300 + "vld1.u8 {d3}, [%[refxy]]\n"
301 + "vrhadd.u8 d2, d2, d3\n"
302 + "vld1.u8 {d4}, [%[dest]]\n"
303 + /* XXX: one cycle stall */
304 + "vrhadd.u8 d0, d0, d2\n"
305 + /* XXX: three cycles stall */
306 + "vrhadd.u8 d0, d4, d0\n"
307 + "vst1.u8 {d0}, [%[dest]]\n"
309 + : [dest]"r"(dest), [ref]"r"(ref), [refx]"r"(ref + 1),
310 + [refy]"r"(ref + stride), [refxy]"r"(ref + stride + 1)
311 + : "memory", "q0", "q1", "d4");
314 + } while (--height);
317 +MPEG2_MC_EXTERN (neon)
319 +#endif /* ARCH_ARM_NEON */
321 Modification de propriétés sur libmpeg2/motion_comp_neon.c
322 ___________________________________________________________________
323 Ajouté : svn:eol-style
326 Index: libmpeg2/mpeg2_internal.h
327 ===================================================================
328 --- libmpeg2/mpeg2_internal.h (révision 1193)
329 +++ libmpeg2/mpeg2_internal.h (copie de travail)
331 extern mpeg2_mc_t mpeg2_mc_alpha;
332 extern mpeg2_mc_t mpeg2_mc_vis;
333 extern mpeg2_mc_t mpeg2_mc_arm;
334 +extern mpeg2_mc_t mpeg2_mc_neon;
336 #endif /* LIBMPEG2_MPEG2_INTERNAL_H */
337 Index: libmpeg2/motion_comp.c
338 ===================================================================
339 --- libmpeg2/motion_comp.c (révision 1193)
340 +++ libmpeg2/motion_comp.c (copie de travail)
345 +#ifdef ARCH_ARM_NEON
346 + if (accel & MPEG2_ACCEL_ARM_NEON)
347 + mpeg2_mc = mpeg2_mc_neon;
350 if (accel & MPEG2_ACCEL_ARM) {
351 mpeg2_mc = mpeg2_mc_arm;
353 Index: libmpeg2/Makefile.am
354 ===================================================================
355 --- libmpeg2/Makefile.am (révision 1193)
356 +++ libmpeg2/Makefile.am (copie de travail)
358 motion_comp_vis.c motion_comp_arm.c \
359 cpu_accel.c cpu_state.c
361 -libmpeg2arch_la_SOURCES += motion_comp_arm_s.S
362 +libmpeg2arch_la_SOURCES += motion_comp_arm_s.S motion_comp_neon.c
364 libmpeg2arch_la_CFLAGS = $(OPT_CFLAGS) $(ARCH_OPT_CFLAGS) $(LIBMPEG2_CFLAGS)
367 ===================================================================
368 --- configure.ac (révision 1193)
369 +++ configure.ac (copie de travail)
371 AC_DEFINE([ARCH_ALPHA],,[alpha architecture]);;
374 - AC_DEFINE([ARCH_ARM],,[ARM architecture]);;
375 + AC_DEFINE([ARCH_ARM],,[ARM architecture])
376 + AC_MSG_CHECKING([if inline ARM Advanced SIMD assembly is supported])
378 + [asm ("vqmovun.s64 d0, q1":::"d0");],
379 + [AC_DEFINE([ARCH_ARM_NEON],, [ARM Advanced SIMD assembly])
380 + AC_MSG_RESULT(yes)],
381 + [AC_MSG_RESULT(no)])
384 elif test x"$CC" = x"tendracc"; then
385 dnl TenDRA portability checking compiler