git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/pixblockdsp.c

   1 /*
   2  * Copyright (c) 2002 Brian Foley
   3  * Copyright (c) 2002 Dieter Shirley
   4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "config.h"
  24
  25 #include "libavutil/attributes.h"
  26 #include "libavutil/cpu.h"
  27 #include "libavutil/ppc/cpu.h"
  28 #include "libavutil/ppc/util_altivec.h"
  29
  30 #include "libavcodec/avcodec.h"
  31 #include "libavcodec/pixblockdsp.h"
  32
  33 #if HAVE_ALTIVEC
  34
  35 #if HAVE_VSX
  36 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  37                                ptrdiff_t stride)
  38 {
  39     int i;
  40     vector unsigned char perm =
  41         (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
  42             0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
  43     const vector unsigned char zero =
  44         (const vector unsigned char) vec_splat_u8(0);
  45
  46     for (i = 0; i < 8; i++) {
  47         /* Read potentially unaligned pixels.
  48          * We're reading 16 pixels, and actually only want 8,
  49          * but we simply ignore the extras. */
  50         vector unsigned char bytes = vec_vsx_ld(0, pixels);
  51
  52         // Convert the bytes into shorts.
  53         //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
  54         vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
  55
  56         // Save the data to the block, we assume the block is 16-byte aligned.
  57         vec_vsx_st(shorts, i * 16, (vector signed short *) block);
  58
  59         pixels += stride;
  60     }
  61 }
  62 #else
  63 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  64                                ptrdiff_t stride)
  65 {
  66     int i;
  67     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  68
  69     for (i = 0; i < 8; i++) {
  70         vec_u8 perm = vec_lvsl(0, pixels);
  71         /* Read potentially unaligned pixels.
  72          * We're reading 16 pixels, and actually only want 8,
  73          * but we simply ignore the extras. */
  74         vec_u8 pixl = vec_ld(0, pixels);
  75         vec_u8 pixr = vec_ld(7, pixels);
  76         vec_u8 bytes = vec_perm(pixl, pixr, perm);
  77
  78         // Convert the bytes into shorts.
  79         vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
  80
  81         // Save the data to the block, we assume the block is 16-byte aligned.
  82         vec_st(shorts, i * 16, (vec_s16 *)block);
  83
  84         pixels += stride;
  85     }
  86 }
  87
  88 #endif /* HAVE_VSX */
  89
  90 #if HAVE_VSX
  91 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  92                                 const uint8_t *s2, ptrdiff_t stride)
  93 {
  94   int i;
  95   const vector unsigned char zero =
  96     (const vector unsigned char) vec_splat_u8(0);
  97   vector signed short shorts1, shorts2;
  98
  99   for (i = 0; i < 4; i++) {
 100     /* Read potentially unaligned pixels.
 101      * We're reading 16 pixels, and actually only want 8,
 102      * but we simply ignore the extras. */
 103     vector unsigned char bytes = vec_vsx_ld(0,  s1);
 104
 105     // Convert the bytes into shorts.
 106     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 107
 108     // Do the same for the second block of pixels.
 109     bytes =vec_vsx_ld(0,  s2);
 110
 111     // Convert the bytes into shorts.
 112     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 113
 114     // Do the subtraction.
 115     shorts1 = vec_sub(shorts1, shorts2);
 116
 117     // Save the data to the block, we assume the block is 16-byte aligned.
 118     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 119
 120     s1    += stride;
 121     s2    += stride;
 122     block += 8;
 123
 124     /* The code below is a copy of the code above...
 125      * This is a manual unroll. */
 126
 127     /* Read potentially unaligned pixels.
 128      * We're reading 16 pixels, and actually only want 8,
 129      * but we simply ignore the extras. */
 130     bytes = vec_vsx_ld(0,  s1);
 131
 132     // Convert the bytes into shorts.
 133     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 134
 135     // Do the same for the second block of pixels.
 136     bytes = vec_vsx_ld(0,  s2);
 137
 138     // Convert the bytes into shorts.
 139     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 140
 141     // Do the subtraction.
 142     shorts1 = vec_sub(shorts1, shorts2);
 143
 144     // Save the data to the block, we assume the block is 16-byte aligned.
 145     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 146
 147     s1    += stride;
 148     s2    += stride;
 149     block += 8;
 150   }
 151 }
 152 #else
 153 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
 154                                 const uint8_t *s2, ptrdiff_t stride)
 155 {
 156     int i;
 157     vec_u8 perm;
 158     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
 159     vec_s16 shorts1, shorts2;
 160
 161     for (i = 0; i < 4; i++) {
 162         /* Read potentially unaligned pixels.
 163          * We're reading 16 pixels, and actually only want 8,
 164          * but we simply ignore the extras. */
 165         perm = vec_lvsl(0, s1);
 166         vec_u8 pixl  = vec_ld(0,  s1);
 167         vec_u8 pixr  = vec_ld(15, s1);
 168         vec_u8 bytes = vec_perm(pixl, pixr, perm);
 169
 170         // Convert the bytes into shorts.
 171         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
 172
 173         // Do the same for the second block of pixels.
 174         perm = vec_lvsl(0, s2);
 175         pixl  = vec_ld(0,  s2);
 176         pixr  = vec_ld(15, s2);
 177         bytes = vec_perm(pixl, pixr, perm);
 178
 179         // Convert the bytes into shorts.
 180         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
 181
 182         // Do the subtraction.
 183         shorts1 = vec_sub(shorts1, shorts2);
 184
 185         // Save the data to the block, we assume the block is 16-byte aligned.
 186         vec_st(shorts1, 0, (vec_s16 *)block);
 187
 188         s1    += stride;
 189         s2    += stride;
 190         block += 8;
 191
 192         /* The code below is a copy of the code above...
 193          * This is a manual unroll. */
 194
 195         /* Read potentially unaligned pixels.
 196          * We're reading 16 pixels, and actually only want 8,
 197          * but we simply ignore the extras. */
 198         perm = vec_lvsl(0, s1);
 199         pixl  = vec_ld(0,  s1);
 200         pixr  = vec_ld(15, s1);
 201         bytes = vec_perm(pixl, pixr, perm);
 202
 203         // Convert the bytes into shorts.
 204         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
 205
 206         // Do the same for the second block of pixels.
 207         perm = vec_lvsl(0, s2);
 208         pixl  = vec_ld(0,  s2);
 209         pixr  = vec_ld(15, s2);
 210         bytes = vec_perm(pixl, pixr, perm);
 211
 212         // Convert the bytes into shorts.
 213         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
 214
 215         // Do the subtraction.
 216         shorts1 = vec_sub(shorts1, shorts2);
 217
 218         // Save the data to the block, we assume the block is 16-byte aligned.
 219         vec_st(shorts1, 0, (vec_s16 *)block);
 220
 221         s1    += stride;
 222         s2    += stride;
 223         block += 8;
 224     }
 225 }
 226
 227 #endif /* HAVE_VSX */
 228
 229 #endif /* HAVE_ALTIVEC */
 230
 231 #if HAVE_VSX
 232 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
 233                            ptrdiff_t stride)
 234 {
 235     int i;
 236     for (i = 0; i < 8; i++) {
 237         vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
 238
 239         vec_vsx_st(shorts, i * 16, block);
 240
 241         pixels += stride;
 242     }
 243 }
 244
 245 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
 246                             const uint8_t *s2, ptrdiff_t stride)
 247 {
 248     int i;
 249     vec_s16 shorts1, shorts2;
 250     for (i = 0; i < 8; i++) {
 251         shorts1 = vsx_ld_u8_s16(0, s1);
 252         shorts2 = vsx_ld_u8_s16(0, s2);
 253
 254         shorts1 = vec_sub(shorts1, shorts2);
 255
 256         vec_vsx_st(shorts1, 0, block);
 257
 258         s1    += stride;
 259         s2    += stride;
 260         block += 8;
 261     }
 262 }
 263 #endif /* HAVE_VSX */
 264
 265 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
 266                                      AVCodecContext *avctx,
 267                                      unsigned high_bit_depth)
 268 {
 269 #if HAVE_ALTIVEC
 270     if (!PPC_ALTIVEC(av_get_cpu_flags()))
 271         return;
 272
 273     c->diff_pixels = diff_pixels_altivec;
 274
 275     if (!high_bit_depth) {
 276         c->get_pixels = get_pixels_altivec;
 277     }
 278 #endif /* HAVE_ALTIVEC */
 279
 280 #if HAVE_VSX
 281     if (!PPC_VSX(av_get_cpu_flags()))
 282         return;
 283
 284     c->diff_pixels = diff_pixels_vsx;
 285
 286     if (!high_bit_depth)
 287         c->get_pixels = get_pixels_vsx;
 288 #endif /* HAVE_VSX */
 289 }