2 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 .macro call_2x_pixels type, subp
26 function ff_\type\()_pixels16\subp\()_armv6, export=1
28 bl ff_\type\()_pixels8\subp\()_armv6
32 b ff_\type\()_pixels8\subp\()_armv6
37 call_2x_pixels put, _x2
38 call_2x_pixels put, _y2
39 call_2x_pixels put, _x2_no_rnd
40 call_2x_pixels put, _y2_no_rnd
42 function ff_put_pixels16_armv6, export=1
51 strd_post r4, r5, r0, r2
55 strd r10, r11, [r0, #8]
57 strd_post r8, r9, r0, r2
64 function ff_put_pixels8_armv6, export=1
70 strd_post r4, r5, r0, r2
73 strd_post r6, r7, r0, r2
80 function ff_put_pixels8_x2_armv6, export=1
83 orr r12, r12, r12, lsl #8
84 orr r12, r12, r12, lsl #16
92 orr r6, r6, r5, lsl #24
97 orr r10, r10, r9, lsl #24
113 strd_post r4, r5, r0, r2
115 strd_post r8, r9, r0, r2
121 function ff_put_pixels8_y2_armv6, export=1
124 orr r12, r12, r12, lsl #8
125 orr r12, r12, r12, lsl #16
151 strd_post r8, r9, r0, r2
153 strd_post r10, r11, r0, r2
160 function ff_put_pixels8_x2_no_rnd_armv6, export=1
172 orr r6, r6, r5, lsl #24
174 orr r12, r12, r9, lsl #24
188 function ff_put_pixels8_y2_no_rnd_armv6, export=1
213 function ff_avg_pixels8_armv6, export=1
217 orr lr, lr, lr, lsl #8
218 orr lr, lr, lr, lsl #16
228 ldrd_reg r6, r7, r0, r2
240 strd_post r4, r5, r0, r2
244 ldrd_reg r4, r5, r0, r2
251 strd_post r6, r7, r0, r2
258 strd_post r6, r7, r0, r2
263 function ff_add_pixels_clamped_armv6, export=1
267 ldm r0!, {r4,r5,r12,lr}
269 pkhbt r8, r4, r5, lsl #16
270 pkhtb r5, r5, r4, asr #16
271 pkhbt r4, r12, lr, lsl #16
272 pkhtb lr, lr, r12, asr #16
275 uxtab16 r5, r5, r6, ror #8
277 uxtab16 lr, lr, r7, ror #8
282 orr r6, r8, r5, lsl #8
283 orr r7, r4, lr, lsl #8
285 strd_post r6, r7, r1, r2
290 function ff_get_pixels_armv6, export=1
295 ldrd_post r4, r5, r1, r2
298 uxtb16 r4, r4, ror #8
300 uxtb16 r8, r5, ror #8
302 pkhbt r5, r6, r4, lsl #16
303 pkhtb r6, r4, r6, asr #16
304 pkhbt r7, r12, r8, lsl #16
305 pkhtb r12, r8, r12, asr #16
306 stm r0!, {r5,r6,r7,r12}
312 function ff_diff_pixels_armv6, export=1
318 ldrd_post r4, r5, r1, r3
319 ldrd_post r6, r7, r2, r3
321 uxtb16 r4, r4, ror #8
323 uxtb16 r6, r6, ror #8
328 uxtb16 r5, r5, ror #8
330 pkhbt r4, r9, r6, lsl #16
331 pkhtb r6, r6, r9, asr #16
333 uxtb16 r7, r7, ror #8
337 pkhbt r8, r9, r5, lsl #16
338 pkhtb r9, r5, r9, asr #16
339 stm r0!, {r4,r6,r8,r9}
345 function ff_pix_abs16_armv6, export=1
355 usada8 r12, r4, r8, r12
358 usada8 lr, r5, r9, lr
360 usada8 r12, r6, r8, r12
362 usada8 lr, r7, r9, lr
374 function ff_pix_abs16_x2_armv6, export=1
379 orr lr, lr, lr, lsl #8
380 orr lr, lr, lr, lsl #16
387 orr r10, r10, r9, lsl #24
391 orr r6, r6, r5, lsl #24
395 usada8 r0, r4, r7, r0
403 orr r10, r10, r6, lsl #24
404 usada8 r0, r8, r4, r0
414 usada8 r0, r4, r8, r0
415 orr r10, r10, r7, lsl #24
423 usada8 r0, r5, r9, r0
429 .macro usad_y2 p0, p1, p2, p3, n0, n1, n2, n3
437 usada8 r0, \p0, \n2, r0
445 usada8 r0, \p1, \p0, r0
453 usada8 r0, \p2, \p1, r0
460 usada8 r0, \p3, \p0, r0
464 function ff_pix_abs16_y2_armv6, export=1
471 orr lr, lr, lr, lsl #8
472 orr lr, lr, lr, lsl #16
479 usad_y2 r4, r5, r6, r7, r8, r9, r10, r11
481 usad_y2 r8, r9, r10, r11, r4, r5, r6, r7
487 function ff_pix_abs8_armv6, export=1
493 ldrd_post r4, r5, r1, r3
498 ldrd_post r8, r9, r1, r3
499 usada8 r0, r4, r6, r0
501 usada8 lr, r5, r7, lr
505 ldrd_post r4, r5, r1, r3
506 usada8 r0, r8, r6, r0
508 usada8 lr, r9, r7, lr
511 usada8 r0, r8, r6, r0
512 usada8 lr, r9, r7, lr
517 function ff_sse16_armv6, export=1
525 uxtb16 r4, r4, ror #8
527 uxtb16 r8, r8, ror #8
533 uxtb16 lr, r5, ror #8
535 uxtb16 r9, r7, ror #8
537 ldrd r4, r5, [r1, #8]
543 uxtb16 r4, r4, ror #8
545 uxtb16 r7, r7, ror #8
552 uxtb16 r5, r5, ror #8
554 uxtb16 r8, r8, ror #8
568 function ff_pix_norm1_armv6, export=1
575 uxtb16 r2, r2, ror #8
579 uxtb16 r3, r3, ror #8
583 uxtb16 r4, r4, ror #8
587 uxtb16 r5, r5, ror #8
598 function ff_pix_sum_armv6, export=1
608 usada8 r2, r4, lr, r2
610 usada8 r3, r5, lr, r3
612 usada8 r2, r6, lr, r2
615 usada8 r3, r7, lr, r3
618 usada8 r3, r7, lr, r3