2 * DSP utils : average functions are compiled twice for 3dnow/mmx2
3 * Copyright (c) 2000, 2001 Gerard Lantau.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
22 static void DEF(put_pixels_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
41 PAVGB" %%mm1, %%mm0\n\t"
42 PAVGB" %%mm3, %%mm2\n\t"
43 PAVGB" %%mm5, %%mm4\n\t"
44 PAVGB" %%mm7, %%mm6\n\t"
49 :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
50 :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
52 pix += line_size*4; p += line_size*4;
58 PAVGB" %%mm1, %%mm0\n\t"
63 pix += line_size; p += line_size;
68 static void DEF(put_pixels_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
83 PAVGB" %%mm1, %%mm0\n\t"
84 PAVGB" %%mm2, %%mm1\n\t"
87 :"=m"(*p), "=m"(*(p+line_size))
88 :"m"(*pix), "m"(*(pix+line_size)),
89 "m"(*(pix+line_size*2))
98 PAVGB" %%mm1, %%mm0\n\t"
102 "m"(*(pix+line_size))
108 static void DEF(avg_pixels)(UINT8 *block, const UINT8 *pixels, int line_size, int h)
127 PAVGB" %%mm1, %%mm0\n\t"
128 PAVGB" %%mm3, %%mm2\n\t"
129 PAVGB" %%mm5, %%mm4\n\t"
130 PAVGB" %%mm7, %%mm6\n\t"
135 :"=m"(*p), "=m"(*(p+line_size)), "=m"(*(p+line_size*2)), "=m"(*(p+line_size*3))
136 :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2)), "m"(*(pix+line_size*3))
138 pix += line_size*4; p += line_size*4;
144 PAVGB" %%mm1, %%mm0\n\t"
149 pix += line_size; p += line_size;
154 static void DEF(avg_pixels_x2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
166 "movq 1%2, %%mm3\n\t"
168 "movq 1%3, %%mm5\n\t"
171 PAVGB" %%mm3, %%mm2\n\t"
172 PAVGB" %%mm2, %%mm0\n\t"
173 PAVGB" %%mm5, %%mm4\n\t"
174 PAVGB" %%mm4, %%mm1\n\t"
177 :"=m"(*p), "=m"(*(p+line_size))
178 :"m"(*pix), "m"(*(pix+line_size))
186 "movq 1%1, %%mm2\n\t"
188 PAVGB" %%mm2, %%mm1\n\t"
189 PAVGB" %%mm1, %%mm0\n\t"
198 static void DEF(avg_pixels_y2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
215 PAVGB" %%mm3, %%mm2\n\t"
216 PAVGB" %%mm2, %%mm0\n\t"
217 PAVGB" %%mm5, %%mm4\n\t"
218 PAVGB" %%mm4, %%mm1\n\t"
221 :"=m"(*p), "=m"(*(p+line_size))
222 :"m"(*pix), "m"(*(pix+line_size)), "m"(*(pix+line_size*2))
232 PAVGB" %%mm2, %%mm1\n\t"
233 PAVGB" %%mm1, %%mm0\n\t"
236 :"m"(*pix), "m"(*(pix+line_size))
242 static void DEF(avg_pixels_xy2)( UINT8 *block, const UINT8 *pixels, int line_size, int h)
249 "pxor %%mm7, %%mm7\n\t"
251 ::"m"(mm_wtwo[0]):"memory");
256 "movq 1%1, %%mm4\n\t"
257 "movq 1%2, %%mm5\n\t"
258 "movq %%mm0, %%mm2\n\t"
259 "movq %%mm1, %%mm3\n\t"
260 "punpcklbw %%mm7, %%mm0\n\t"
261 "punpcklbw %%mm7, %%mm1\n\t"
262 "punpckhbw %%mm7, %%mm2\n\t"
263 "punpckhbw %%mm7, %%mm3\n\t"
264 "paddusw %%mm1, %%mm0\n\t"
265 "paddusw %%mm3, %%mm2\n\t"
266 "movq %%mm4, %%mm1\n\t"
267 "movq %%mm5, %%mm3\n\t"
268 "punpcklbw %%mm7, %%mm4\n\t"
269 "punpcklbw %%mm7, %%mm5\n\t"
270 "punpckhbw %%mm7, %%mm1\n\t"
271 "punpckhbw %%mm7, %%mm3\n\t"
272 "paddusw %%mm5, %%mm4\n\t"
273 "paddusw %%mm3, %%mm1\n\t"
274 "paddusw %%mm6, %%mm4\n\t"
275 "paddusw %%mm6, %%mm1\n\t"
276 "paddusw %%mm4, %%mm0\n\t"
277 "paddusw %%mm1, %%mm2\n\t"
278 "psrlw $2, %%mm0\n\t"
279 "psrlw $2, %%mm2\n\t"
280 "packuswb %%mm2, %%mm0\n\t"
281 PAVGB" %0, %%mm0\n\t"
285 "m"(*(pix+line_size))
293 static void DEF(sub_pixels_x2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
300 "pxor %%mm7, %%mm7":::"memory");
303 "movq 1%1, %%mm2\n\t"
305 PAVGB" %1, %%mm2\n\t"
306 "movq 8%0, %%mm1\n\t"
307 "movq %%mm2, %%mm3\n\t"
308 "punpcklbw %%mm7, %%mm2\n\t"
309 "punpckhbw %%mm7, %%mm3\n\t"
310 "psubsw %%mm2, %%mm0\n\t"
311 "psubsw %%mm3, %%mm1\n\t"
313 "movq %%mm1, 8%0\n\t"
323 static void DEF(sub_pixels_y2)( DCTELEM *block, const UINT8 *pixels, int line_size, int h)
330 "pxor %%mm7, %%mm7":::"memory");
335 PAVGB" %1, %%mm2\n\t"
336 "movq 8%0, %%mm1\n\t"
337 "movq %%mm2, %%mm3\n\t"
338 "punpcklbw %%mm7, %%mm2\n\t"
339 "punpckhbw %%mm7, %%mm3\n\t"
340 "psubsw %%mm2, %%mm0\n\t"
341 "psubsw %%mm3, %%mm1\n\t"
343 "movq %%mm1, 8%0\n\t"
345 :"m"(*pix), "m"(*(pix+line_size))