2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of FFmpeg.
8 * FFmpeg is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * FFmpeg is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with FFmpeg; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
28 #include "libavutil/attributes.h"
29 #include "libavutil/cpu.h"
30 #include "libavutil/ppc/cpu.h"
31 #include "libavutil/ppc/types_altivec.h"
32 #include "libavutil/ppc/util_altivec.h"
33 #include "libavcodec/avcodec.h"
34 #include "libavcodec/pixblockdsp.h"
39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
43 vector unsigned char perm =
44 (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45 0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46 const vector unsigned char zero =
47 (const vector unsigned char) vec_splat_u8(0);
49 for (i = 0; i < 8; i++) {
50 /* Read potentially unaligned pixels.
51 * We're reading 16 pixels, and actually only want 8,
52 * but we simply ignore the extras. */
53 vector unsigned char bytes = vec_vsx_ld(0, pixels);
55 // Convert the bytes into shorts.
56 //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57 vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
59 // Save the data to the block, we assume the block is 16-byte aligned.
60 vec_vsx_st(shorts, i * 16, (vector signed short *) block);
66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
70 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
72 for (i = 0; i < 8; i++) {
73 vec_u8 perm = vec_lvsl(0, pixels);
74 /* Read potentially unaligned pixels.
75 * We're reading 16 pixels, and actually only want 8,
76 * but we simply ignore the extras. */
77 vec_u8 pixl = vec_ld(0, pixels);
78 vec_u8 pixr = vec_ld(7, pixels);
79 vec_u8 bytes = vec_perm(pixl, pixr, perm);
81 // Convert the bytes into shorts.
82 vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
84 // Save the data to the block, we assume the block is 16-byte aligned.
85 vec_st(shorts, i * 16, (vec_s16 *)block);
94 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
95 const uint8_t *s2, ptrdiff_t stride)
98 const vector unsigned char zero =
99 (const vector unsigned char) vec_splat_u8(0);
100 vector signed short shorts1, shorts2;
102 for (i = 0; i < 4; i++) {
103 /* Read potentially unaligned pixels.
104 * We're reading 16 pixels, and actually only want 8,
105 * but we simply ignore the extras. */
106 vector unsigned char bytes = vec_vsx_ld(0, s1);
108 // Convert the bytes into shorts.
109 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
111 // Do the same for the second block of pixels.
112 bytes =vec_vsx_ld(0, s2);
114 // Convert the bytes into shorts.
115 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
117 // Do the subtraction.
118 shorts1 = vec_sub(shorts1, shorts2);
120 // Save the data to the block, we assume the block is 16-byte aligned.
121 vec_vsx_st(shorts1, 0, (vector signed short *) block);
127 /* The code below is a copy of the code above...
128 * This is a manual unroll. */
130 /* Read potentially unaligned pixels.
131 * We're reading 16 pixels, and actually only want 8,
132 * but we simply ignore the extras. */
133 bytes = vec_vsx_ld(0, s1);
135 // Convert the bytes into shorts.
136 shorts1 = (vector signed short) vec_mergeh(bytes, zero);
138 // Do the same for the second block of pixels.
139 bytes = vec_vsx_ld(0, s2);
141 // Convert the bytes into shorts.
142 shorts2 = (vector signed short) vec_mergeh(bytes, zero);
144 // Do the subtraction.
145 shorts1 = vec_sub(shorts1, shorts2);
147 // Save the data to the block, we assume the block is 16-byte aligned.
148 vec_vsx_st(shorts1, 0, (vector signed short *) block);
156 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
157 const uint8_t *s2, ptrdiff_t stride)
161 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
162 vec_s16 shorts1, shorts2;
164 for (i = 0; i < 4; i++) {
165 /* Read potentially unaligned pixels.
166 * We're reading 16 pixels, and actually only want 8,
167 * but we simply ignore the extras. */
168 perm = vec_lvsl(0, s1);
169 vec_u8 pixl = vec_ld(0, s1);
170 vec_u8 pixr = vec_ld(15, s1);
171 vec_u8 bytes = vec_perm(pixl, pixr, perm);
173 // Convert the bytes into shorts.
174 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
176 // Do the same for the second block of pixels.
177 perm = vec_lvsl(0, s2);
178 pixl = vec_ld(0, s2);
179 pixr = vec_ld(15, s2);
180 bytes = vec_perm(pixl, pixr, perm);
182 // Convert the bytes into shorts.
183 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
185 // Do the subtraction.
186 shorts1 = vec_sub(shorts1, shorts2);
188 // Save the data to the block, we assume the block is 16-byte aligned.
189 vec_st(shorts1, 0, (vec_s16 *)block);
195 /* The code below is a copy of the code above...
196 * This is a manual unroll. */
198 /* Read potentially unaligned pixels.
199 * We're reading 16 pixels, and actually only want 8,
200 * but we simply ignore the extras. */
201 perm = vec_lvsl(0, s1);
202 pixl = vec_ld(0, s1);
203 pixr = vec_ld(15, s1);
204 bytes = vec_perm(pixl, pixr, perm);
206 // Convert the bytes into shorts.
207 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
209 // Do the same for the second block of pixels.
210 perm = vec_lvsl(0, s2);
211 pixl = vec_ld(0, s2);
212 pixr = vec_ld(15, s2);
213 bytes = vec_perm(pixl, pixr, perm);
215 // Convert the bytes into shorts.
216 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
218 // Do the subtraction.
219 shorts1 = vec_sub(shorts1, shorts2);
221 // Save the data to the block, we assume the block is 16-byte aligned.
222 vec_st(shorts1, 0, (vec_s16 *)block);
230 #endif /* HAVE_VSX */
232 #endif /* HAVE_ALTIVEC */
235 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
239 for (i = 0; i < 8; i++) {
240 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
242 vec_vsx_st(shorts, i * 16, block);
248 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
249 const uint8_t *s2, ptrdiff_t stride)
252 vec_s16 shorts1, shorts2;
253 for (i = 0; i < 8; i++) {
254 shorts1 = vsx_ld_u8_s16(0, s1);
255 shorts2 = vsx_ld_u8_s16(0, s2);
257 shorts1 = vec_sub(shorts1, shorts2);
259 vec_vsx_st(shorts1, 0, block);
266 #endif /* HAVE_VSX */
268 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
269 AVCodecContext *avctx,
270 unsigned high_bit_depth)
273 if (!PPC_ALTIVEC(av_get_cpu_flags()))
276 c->diff_pixels = diff_pixels_altivec;
278 if (!high_bit_depth) {
279 c->get_pixels = get_pixels_altivec;
281 #endif /* HAVE_ALTIVEC */
284 if (!PPC_VSX(av_get_cpu_flags()))
287 c->diff_pixels = diff_pixels_vsx;
290 c->get_pixels = get_pixels_vsx;
291 #endif /* HAVE_VSX */