2 * Alpha optimized DSP utils
3 * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
21 * These functions are scheduled for pca56. They should work
22 * reasonably on ev6, though.
26 #ifdef HAVE_AV_CONFIG_H
30 /* Some nicer register names. */
35 /* Danger: these overlap with the argument list and the return value */
46 /************************************************************************
47 * void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
48 * int line_size, int h)
51 .globl put_pixels_axp_asm
52 .ent put_pixels_axp_asm
141 .end put_pixels_axp_asm
143 /************************************************************************
144 * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
148 .globl put_pixels_clamped_mvi_asm
149 .ent put_pixels_clamped_mvi_asm
150 put_pixels_clamped_mvi_asm:
156 jsr AT, (AT), _mcount
160 lda t9, 8 # loop counter
161 zap t8, 0xaa, t8 # 00ff00ff00ff00ff
196 .end put_pixels_clamped_mvi_asm
198 /************************************************************************
199 * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
203 .globl add_pixels_clamped_mvi_asm
204 .ent add_pixels_clamped_mvi_asm
205 add_pixels_clamped_mvi_asm:
211 jsr AT, (AT), _mcount
220 xor tg, t0, tg # 0x8000800080008000
221 zap t1, 0xaa, tf # 0x00ff00ff00ff00ff
224 1: ldl t1, 0(a1) # pix0 (try to hit cache line soon)
226 addq a1, a2, te # pixels += line_size
227 ldq t0, 0(a0) # shorts0
229 ldl t7, 0(te) # pix2 (try to hit cache line soon)
231 ldq t3, 8(a0) # shorts1
232 ldq t6, 16(a0) # shorts2
234 ldq t9, 24(a0) # shorts3
235 unpkbw t1, t1 # 0 0 (quarter/op no.)
242 addq t0, t1, t0 # 0 3
247 maxsw4 t0, zero, t0 # 0 5
251 minsw4 t0, tf, t0 # 0 6
252 addq t3, t4, t3 # 1 3
256 maxsw4 t3, zero, t3 # 1 5
257 addq t6, t7, t6 # 2 3
261 minsw4 t3, tf, t3 # 1 6
264 maxsw4 t6, zero, t6 # 2 5
265 addq t9, ta, t9 # 3 3
267 minsw4 t6, tf, t6 # 2 6
270 maxsw4 t9, zero, t9 # 3 5
271 lda a0, 32(a0) # block += 16;
274 minsw4 t9, tf, t9 # 3 6
280 addq te, a2, a1 # pixels += line_size
286 .end add_pixels_clamped_mvi_asm