2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
43 vector unsigned char perm =
44 (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46 const vector unsigned char zero =
47 (const vector unsigned char) vec_splat_u8(0);
49 for (i = 0; i < 8; i++) {
50 /* Read potentially unaligned pixels.
51 * We're reading 16 pixels, and actually only want 8,
52 * but we simply ignore the extras. */
53 vector unsigned char bytes = vec_vsx_ld(0, pixels);
55 // Convert the bytes into shorts.
56 //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57 vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
59 // Save the data to the block, we assume the block is 16-byte aligned.
60 vec_vsx_st(shorts, i * 16, (vector signed short *) block);
66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
70 vec_u8 perm = vec_lvsl(0, pixels);
71 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
73 for (i = 0; i < 8; i++) {
74 /* Read potentially unaligned pixels.
75 * We're reading 16 pixels, and actually only want 8,
76 * but we simply ignore the extras. */
77 vec_u8 pixl = vec_ld(0, pixels);
78 vec_u8 pixr = vec_ld(7, pixels);
79 vec_u8 bytes = vec_perm(pixl, pixr, perm);
81 // Convert the bytes into shorts.
82 vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
84 // Save the data to the block, we assume the block is 16-byte aligned.
85 vec_st(shorts, i * 16, (vec_s16 *)block);
94 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
95 const uint8_t *s2, int stride)
98 const vector unsigned char zero =
99 (const vector unsigned char) vec_splat_u8(0);
100 vector signed short shorts1, shorts2;
102 for (i = 0; i < 4; i++) {
103 /* Read potentially unaligned pixels.
104 * We're reading 16 pixels, and actually only want 8,
105 * but we simply ignore the extras. */
106 vector unsigned char bytes = vec_vsx_ld(0, s1);
108 // Convert the bytes into shorts.
109 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
111 // Do the same for the second block of pixels.
112 bytes =vec_vsx_ld(0, s2);
114 // Convert the bytes into shorts.
115 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
117 // Do the subtraction.
118 shorts1 = vec_sub(shorts1, shorts2);
120 // Save the data to the block, we assume the block is 16-byte aligned.
121 vec_vsx_st(shorts1, 0, (vector signed short *) block);
127 /* The code below is a copy of the code above...
128 * This is a manual unroll. */
130 /* Read potentially unaligned pixels.
131 * We're reading 16 pixels, and actually only want 8,
132 * but we simply ignore the extras. */
133 bytes = vec_vsx_ld(0, s1);
135 // Convert the bytes into shorts.
136 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
138 // Do the same for the second block of pixels.
139 bytes = vec_vsx_ld(0, s2);
141 // Convert the bytes into shorts.
142 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
144 // Do the subtraction.
145 shorts1 = vec_sub(shorts1, shorts2);
147 // Save the data to the block, we assume the block is 16-byte aligned.
148 vec_vsx_st(shorts1, 0, (vector signed short *) block);
156 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
157 const uint8_t *s2, int stride)
160 vec_u8 perm1 = vec_lvsl(0, s1);
161 vec_u8 perm2 = vec_lvsl(0, s2);
162 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
163 vec_s16 shorts1, shorts2;
165 for (i = 0; i < 4; i++) {
166 /* Read potentially unaligned pixels.
167 * We're reading 16 pixels, and actually only want 8,
168 * but we simply ignore the extras. */
169 vec_u8 pixl = vec_ld(0, s1);
170 vec_u8 pixr = vec_ld(15, s1);
171 vec_u8 bytes = vec_perm(pixl, pixr, perm1);
173 // Convert the bytes into shorts.
174 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
176 // Do the same for the second block of pixels.
177 pixl = vec_ld(0, s2);
178 pixr = vec_ld(15, s2);
179 bytes = vec_perm(pixl, pixr, perm2);
181 // Convert the bytes into shorts.
182 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
184 // Do the subtraction.
185 shorts1 = vec_sub(shorts1, shorts2);
187 // Save the data to the block, we assume the block is 16-byte aligned.
188 vec_st(shorts1, 0, (vec_s16 *)block);
194 /* The code below is a copy of the code above...
195 * This is a manual unroll. */
197 /* Read potentially unaligned pixels.
198 * We're reading 16 pixels, and actually only want 8,
199 * but we simply ignore the extras. */
200 pixl = vec_ld(0, s1);
201 pixr = vec_ld(15, s1);
202 bytes = vec_perm(pixl, pixr, perm1);
204 // Convert the bytes into shorts.
205 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
207 // Do the same for the second block of pixels.
208 pixl = vec_ld(0, s2);
209 pixr = vec_ld(15, s2);
210 bytes = vec_perm(pixl, pixr, perm2);
212 // Convert the bytes into shorts.
213 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
215 // Do the subtraction.
216 shorts1 = vec_sub(shorts1, shorts2);
218 // Save the data to the block, we assume the block is 16-byte aligned.
219 vec_st(shorts1, 0, (vec_s16 *)block);
227 #endif /* HAVE_VSX */
229 #endif /* HAVE_ALTIVEC */
232 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
236 for (i = 0; i < 8; i++) {
237 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
239 vec_vsx_st(shorts, i * 16, block);
245 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
246 const uint8_t *s2, int stride)
249 vec_s16 shorts1, shorts2;
250 for (i = 0; i < 8; i++) {
251 shorts1 = vsx_ld_u8_s16(0, s1);
252 shorts2 = vsx_ld_u8_s16(0, s2);
254 shorts1 = vec_sub(shorts1, shorts2);
256 vec_vsx_st(shorts1, 0, block);
263 #endif /* HAVE_VSX */
265 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
266 AVCodecContext *avctx,
267 unsigned high_bit_depth)
270 if (!PPC_ALTIVEC(av_get_cpu_flags()))
273 c->diff_pixels = diff_pixels_altivec;
275 if (!high_bit_depth) {
276 c->get_pixels = get_pixels_altivec;
278 #endif /* HAVE_ALTIVEC */
281 if (!PPC_VSX(av_get_cpu_flags()))
284 c->diff_pixels = diff_pixels_vsx;
287 c->get_pixels = get_pixels_vsx;
288 #endif /* HAVE_VSX */