git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/pixblockdsp.c

   1 /*
   2  * Copyright (c) 2002 Brian Foley
   3  * Copyright (c) 2002 Dieter Shirley
   4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "config.h"
  24 #if HAVE_ALTIVEC_H
  25 #include <altivec.h>
  26 #endif
  27
  28 #include "libavutil/attributes.h"
  29 #include "libavutil/cpu.h"
  30 #include "libavutil/ppc/cpu.h"
  31 #include "libavutil/ppc/types_altivec.h"
  32 #include "libavutil/ppc/util_altivec.h"
  33 #include "libavcodec/avcodec.h"
  34 #include "libavcodec/pixblockdsp.h"
  35
  36 #if HAVE_ALTIVEC
  37
  38 #if HAVE_VSX
  39 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  40                                ptrdiff_t line_size)
  41 {
  42     int i;
  43     vector unsigned char perm =
  44         (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
  45             0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
  46     const vector unsigned char zero =
  47         (const vector unsigned char) vec_splat_u8(0);
  48
  49     for (i = 0; i < 8; i++) {
  50         /* Read potentially unaligned pixels.
  51          * We're reading 16 pixels, and actually only want 8,
  52          * but we simply ignore the extras. */
  53         vector unsigned char bytes = vec_vsx_ld(0, pixels);
  54
  55         // Convert the bytes into shorts.
  56         //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
  57         vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
  58
  59         // Save the data to the block, we assume the block is 16-byte aligned.
  60         vec_vsx_st(shorts, i * 16, (vector signed short *) block);
  61
  62         pixels += line_size;
  63     }
  64 }
  65 #else
  66 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  67                                ptrdiff_t line_size)
  68 {
  69     int i;
  70     vec_u8 perm = vec_lvsl(0, pixels);
  71     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  72
  73     for (i = 0; i < 8; i++) {
  74         /* Read potentially unaligned pixels.
  75          * We're reading 16 pixels, and actually only want 8,
  76          * but we simply ignore the extras. */
  77         vec_u8 pixl = vec_ld(0, pixels);
  78         vec_u8 pixr = vec_ld(7, pixels);
  79         vec_u8 bytes = vec_perm(pixl, pixr, perm);
  80
  81         // Convert the bytes into shorts.
  82         vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
  83
  84         // Save the data to the block, we assume the block is 16-byte aligned.
  85         vec_st(shorts, i * 16, (vec_s16 *)block);
  86
  87         pixels += line_size;
  88     }
  89 }
  90
  91 #endif /* HAVE_VSX */
  92
  93 #if HAVE_VSX
  94 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  95                                 const uint8_t *s2, int stride)
  96 {
  97   int i;
  98   const vector unsigned char zero =
  99     (const vector unsigned char) vec_splat_u8(0);
 100   vector signed short shorts1, shorts2;
 101
 102   for (i = 0; i < 4; i++) {
 103     /* Read potentially unaligned pixels.
 104      * We're reading 16 pixels, and actually only want 8,
 105      * but we simply ignore the extras. */
 106     vector unsigned char bytes = vec_vsx_ld(0,  s1);
 107
 108     // Convert the bytes into shorts.
 109     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 110
 111     // Do the same for the second block of pixels.
 112     bytes =vec_vsx_ld(0,  s2);
 113
 114     // Convert the bytes into shorts.
 115     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 116
 117     // Do the subtraction.
 118     shorts1 = vec_sub(shorts1, shorts2);
 119
 120     // Save the data to the block, we assume the block is 16-byte aligned.
 121     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 122
 123     s1    += stride;
 124     s2    += stride;
 125     block += 8;
 126
 127     /* The code below is a copy of the code above...
 128      * This is a manual unroll. */
 129
 130     /* Read potentially unaligned pixels.
 131      * We're reading 16 pixels, and actually only want 8,
 132      * but we simply ignore the extras. */
 133     bytes = vec_vsx_ld(0,  s1);
 134
 135     // Convert the bytes into shorts.
 136     shorts1 = (vector signed short) vec_mergeh(bytes, zero);
 137
 138     // Do the same for the second block of pixels.
 139     bytes = vec_vsx_ld(0,  s2);
 140
 141     // Convert the bytes into shorts.
 142     shorts2 = (vector signed short) vec_mergeh(bytes, zero);
 143
 144     // Do the subtraction.
 145     shorts1 = vec_sub(shorts1, shorts2);
 146
 147     // Save the data to the block, we assume the block is 16-byte aligned.
 148     vec_vsx_st(shorts1, 0, (vector signed short *) block);
 149
 150     s1    += stride;
 151     s2    += stride;
 152     block += 8;
 153   }
 154 }
 155 #else
 156 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
 157                                 const uint8_t *s2, int stride)
 158 {
 159     int i;
 160     vec_u8 perm1 = vec_lvsl(0, s1);
 161     vec_u8 perm2 = vec_lvsl(0, s2);
 162     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
 163     vec_s16 shorts1, shorts2;
 164
 165     for (i = 0; i < 4; i++) {
 166         /* Read potentially unaligned pixels.
 167          * We're reading 16 pixels, and actually only want 8,
 168          * but we simply ignore the extras. */
 169         vec_u8 pixl  = vec_ld(0,  s1);
 170         vec_u8 pixr  = vec_ld(15, s1);
 171         vec_u8 bytes = vec_perm(pixl, pixr, perm1);
 172
 173         // Convert the bytes into shorts.
 174         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
 175
 176         // Do the same for the second block of pixels.
 177         pixl  = vec_ld(0,  s2);
 178         pixr  = vec_ld(15, s2);
 179         bytes = vec_perm(pixl, pixr, perm2);
 180
 181         // Convert the bytes into shorts.
 182         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
 183
 184         // Do the subtraction.
 185         shorts1 = vec_sub(shorts1, shorts2);
 186
 187         // Save the data to the block, we assume the block is 16-byte aligned.
 188         vec_st(shorts1, 0, (vec_s16 *)block);
 189
 190         s1    += stride;
 191         s2    += stride;
 192         block += 8;
 193
 194         /* The code below is a copy of the code above...
 195          * This is a manual unroll. */
 196
 197         /* Read potentially unaligned pixels.
 198          * We're reading 16 pixels, and actually only want 8,
 199          * but we simply ignore the extras. */
 200         pixl  = vec_ld(0,  s1);
 201         pixr  = vec_ld(15, s1);
 202         bytes = vec_perm(pixl, pixr, perm1);
 203
 204         // Convert the bytes into shorts.
 205         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
 206
 207         // Do the same for the second block of pixels.
 208         pixl  = vec_ld(0,  s2);
 209         pixr  = vec_ld(15, s2);
 210         bytes = vec_perm(pixl, pixr, perm2);
 211
 212         // Convert the bytes into shorts.
 213         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
 214
 215         // Do the subtraction.
 216         shorts1 = vec_sub(shorts1, shorts2);
 217
 218         // Save the data to the block, we assume the block is 16-byte aligned.
 219         vec_st(shorts1, 0, (vec_s16 *)block);
 220
 221         s1    += stride;
 222         s2    += stride;
 223         block += 8;
 224     }
 225 }
 226
 227 #endif /* HAVE_VSX */
 228
 229 #endif /* HAVE_ALTIVEC */
 230
 231 #if HAVE_VSX
 232 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
 233                            ptrdiff_t line_size)
 234 {
 235     int i;
 236     for (i = 0; i < 8; i++) {
 237         vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
 238
 239         vec_vsx_st(shorts, i * 16, block);
 240
 241         pixels += line_size;
 242     }
 243 }
 244
 245 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
 246                             const uint8_t *s2, int stride)
 247 {
 248     int i;
 249     vec_s16 shorts1, shorts2;
 250     for (i = 0; i < 8; i++) {
 251         shorts1 = vsx_ld_u8_s16(0, s1);
 252         shorts2 = vsx_ld_u8_s16(0, s2);
 253
 254         shorts1 = vec_sub(shorts1, shorts2);
 255
 256         vec_vsx_st(shorts1, 0, block);
 257
 258         s1    += stride;
 259         s2    += stride;
 260         block += 8;
 261     }
 262 }
 263 #endif /* HAVE_VSX */
 264
 265 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
 266                                      AVCodecContext *avctx,
 267                                      unsigned high_bit_depth)
 268 {
 269 #if HAVE_ALTIVEC
 270     if (!PPC_ALTIVEC(av_get_cpu_flags()))
 271         return;
 272
 273     c->diff_pixels = diff_pixels_altivec;
 274
 275     if (!high_bit_depth) {
 276         c->get_pixels = get_pixels_altivec;
 277     }
 278 #endif /* HAVE_ALTIVEC */
 279
 280 #if HAVE_VSX
 281     if (!PPC_VSX(av_get_cpu_flags()))
 282         return;
 283
 284     c->diff_pixels = diff_pixels_vsx;
 285
 286     if (!high_bit_depth)
 287         c->get_pixels = get_pixels_vsx;
 288 #endif /* HAVE_VSX */
 289 }