2 * Copyright (c) 2002 Brian Foley
3 * Copyright (c) 2002 Dieter Shirley
4 * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
6 * This file is part of Libav.
8 * Libav is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Lesser General Public
10 * License as published by the Free Software Foundation; either
11 * version 2.1 of the License, or (at your option) any later version.
13 * Libav is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Lesser General Public License for more details.
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with Libav; if not, write to the Free Software
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "libavutil/attributes.h"
26 #include "libavutil/cpu.h"
27 #include "libavutil/ppc/cpu.h"
28 #include "libavutil/ppc/util_altivec.h"
30 #include "libavcodec/avcodec.h"
31 #include "libavcodec/pixblockdsp.h"
33 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
35 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
39 vec_u8 perm = vec_lvsl(0, pixels);
40 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
42 for (i = 0; i < 8; i++) {
43 /* Read potentially unaligned pixels.
44 * We're reading 16 pixels, and actually only want 8,
45 * but we simply ignore the extras. */
46 vec_u8 pixl = vec_ld(0, pixels);
47 vec_u8 pixr = vec_ld(7, pixels);
48 vec_u8 bytes = vec_perm(pixl, pixr, perm);
50 // Convert the bytes into shorts.
51 vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
53 // Save the data to the block, we assume the block is 16-byte aligned.
54 vec_st(shorts, i * 16, (vec_s16 *)block);
60 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
61 const uint8_t *s2, ptrdiff_t stride)
64 vec_u8 perm1 = vec_lvsl(0, s1);
65 vec_u8 perm2 = vec_lvsl(0, s2);
66 const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
67 vec_s16 shorts1, shorts2;
69 for (i = 0; i < 4; i++) {
70 /* Read potentially unaligned pixels.
71 * We're reading 16 pixels, and actually only want 8,
72 * but we simply ignore the extras. */
73 vec_u8 pixl = vec_ld(0, s1);
74 vec_u8 pixr = vec_ld(15, s1);
75 vec_u8 bytes = vec_perm(pixl, pixr, perm1);
77 // Convert the bytes into shorts.
78 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
80 // Do the same for the second block of pixels.
82 pixr = vec_ld(15, s2);
83 bytes = vec_perm(pixl, pixr, perm2);
85 // Convert the bytes into shorts.
86 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
88 // Do the subtraction.
89 shorts1 = vec_sub(shorts1, shorts2);
91 // Save the data to the block, we assume the block is 16-byte aligned.
92 vec_st(shorts1, 0, (vec_s16 *)block);
98 /* The code below is a copy of the code above...
99 * This is a manual unroll. */
101 /* Read potentially unaligned pixels.
102 * We're reading 16 pixels, and actually only want 8,
103 * but we simply ignore the extras. */
104 pixl = vec_ld(0, s1);
105 pixr = vec_ld(15, s1);
106 bytes = vec_perm(pixl, pixr, perm1);
108 // Convert the bytes into shorts.
109 shorts1 = (vec_s16)vec_mergeh(zero, bytes);
111 // Do the same for the second block of pixels.
112 pixl = vec_ld(0, s2);
113 pixr = vec_ld(15, s2);
114 bytes = vec_perm(pixl, pixr, perm2);
116 // Convert the bytes into shorts.
117 shorts2 = (vec_s16)vec_mergeh(zero, bytes);
119 // Do the subtraction.
120 shorts1 = vec_sub(shorts1, shorts2);
122 // Save the data to the block, we assume the block is 16-byte aligned.
123 vec_st(shorts1, 0, (vec_s16 *)block);
131 #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
134 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
138 for (i = 0; i < 8; i++) {
139 vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
141 vec_vsx_st(shorts, i * 16, block);
147 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
148 const uint8_t *s2, ptrdiff_t stride)
151 vec_s16 shorts1, shorts2;
152 for (i = 0; i < 8; i++) {
153 shorts1 = vsx_ld_u8_s16(0, s1);
154 shorts2 = vsx_ld_u8_s16(0, s2);
156 shorts1 = vec_sub(shorts1, shorts2);
158 vec_vsx_st(shorts1, 0, block);
165 #endif /* HAVE_VSX */
167 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
168 AVCodecContext *avctx,
169 unsigned high_bit_depth)
171 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
172 if (!PPC_ALTIVEC(av_get_cpu_flags()))
175 c->diff_pixels = diff_pixels_altivec;
177 if (!high_bit_depth) {
178 c->get_pixels = get_pixels_altivec;
180 #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
183 if (!PPC_VSX(av_get_cpu_flags()))
186 c->diff_pixels = diff_pixels_vsx;
189 c->get_pixels = get_pixels_vsx;
190 #endif /* HAVE_VSX */