git.sesse.net Git - ffmpeg/blob - libavcodec/ppc/pixblockdsp.c

   1 /*
   2  * Copyright (c) 2002 Brian Foley
   3  * Copyright (c) 2002 Dieter Shirley
   4  * Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
   5  *
   6  * This file is part of Libav.
   7  *
   8  * Libav is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * Libav is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with Libav; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  */
  22
  23 #include "config.h"
  24
  25 #include "libavutil/attributes.h"
  26 #include "libavutil/cpu.h"
  27 #include "libavutil/ppc/cpu.h"
  28 #include "libavutil/ppc/util_altivec.h"
  29
  30 #include "libavcodec/avcodec.h"
  31 #include "libavcodec/pixblockdsp.h"
  32
  33 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
  34
  35 static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
  36                                ptrdiff_t stride)
  37 {
  38     int i;
  39     vec_u8 perm = vec_lvsl(0, pixels);
  40     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  41
  42     for (i = 0; i < 8; i++) {
  43         /* Read potentially unaligned pixels.
  44          * We're reading 16 pixels, and actually only want 8,
  45          * but we simply ignore the extras. */
  46         vec_u8 pixl = vec_ld(0, pixels);
  47         vec_u8 pixr = vec_ld(7, pixels);
  48         vec_u8 bytes = vec_perm(pixl, pixr, perm);
  49
  50         // Convert the bytes into shorts.
  51         vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
  52
  53         // Save the data to the block, we assume the block is 16-byte aligned.
  54         vec_st(shorts, i * 16, (vec_s16 *)block);
  55
  56         pixels += stride;
  57     }
  58 }
  59
  60 static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
  61                                 const uint8_t *s2, ptrdiff_t stride)
  62 {
  63     int i;
  64     vec_u8 perm1 = vec_lvsl(0, s1);
  65     vec_u8 perm2 = vec_lvsl(0, s2);
  66     const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
  67     vec_s16 shorts1, shorts2;
  68
  69     for (i = 0; i < 4; i++) {
  70         /* Read potentially unaligned pixels.
  71          * We're reading 16 pixels, and actually only want 8,
  72          * but we simply ignore the extras. */
  73         vec_u8 pixl  = vec_ld(0,  s1);
  74         vec_u8 pixr  = vec_ld(15, s1);
  75         vec_u8 bytes = vec_perm(pixl, pixr, perm1);
  76
  77         // Convert the bytes into shorts.
  78         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
  79
  80         // Do the same for the second block of pixels.
  81         pixl  = vec_ld(0,  s2);
  82         pixr  = vec_ld(15, s2);
  83         bytes = vec_perm(pixl, pixr, perm2);
  84
  85         // Convert the bytes into shorts.
  86         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
  87
  88         // Do the subtraction.
  89         shorts1 = vec_sub(shorts1, shorts2);
  90
  91         // Save the data to the block, we assume the block is 16-byte aligned.
  92         vec_st(shorts1, 0, (vec_s16 *)block);
  93
  94         s1    += stride;
  95         s2    += stride;
  96         block += 8;
  97
  98         /* The code below is a copy of the code above...
  99          * This is a manual unroll. */
 100
 101         /* Read potentially unaligned pixels.
 102          * We're reading 16 pixels, and actually only want 8,
 103          * but we simply ignore the extras. */
 104         pixl  = vec_ld(0,  s1);
 105         pixr  = vec_ld(15, s1);
 106         bytes = vec_perm(pixl, pixr, perm1);
 107
 108         // Convert the bytes into shorts.
 109         shorts1 = (vec_s16)vec_mergeh(zero, bytes);
 110
 111         // Do the same for the second block of pixels.
 112         pixl  = vec_ld(0,  s2);
 113         pixr  = vec_ld(15, s2);
 114         bytes = vec_perm(pixl, pixr, perm2);
 115
 116         // Convert the bytes into shorts.
 117         shorts2 = (vec_s16)vec_mergeh(zero, bytes);
 118
 119         // Do the subtraction.
 120         shorts1 = vec_sub(shorts1, shorts2);
 121
 122         // Save the data to the block, we assume the block is 16-byte aligned.
 123         vec_st(shorts1, 0, (vec_s16 *)block);
 124
 125         s1    += stride;
 126         s2    += stride;
 127         block += 8;
 128     }
 129 }
 130
 131 #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
 132
 133 #if HAVE_VSX
 134 static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
 135                            ptrdiff_t stride)
 136 {
 137     int i;
 138     for (i = 0; i < 8; i++) {
 139         vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
 140
 141         vec_vsx_st(shorts, i * 16, block);
 142
 143         pixels += stride;
 144     }
 145 }
 146
 147 static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
 148                             const uint8_t *s2, ptrdiff_t stride)
 149 {
 150     int i;
 151     vec_s16 shorts1, shorts2;
 152     for (i = 0; i < 8; i++) {
 153         shorts1 = vsx_ld_u8_s16(0, s1);
 154         shorts2 = vsx_ld_u8_s16(0, s2);
 155
 156         shorts1 = vec_sub(shorts1, shorts2);
 157
 158         vec_vsx_st(shorts1, 0, block);
 159
 160         s1    += stride;
 161         s2    += stride;
 162         block += 8;
 163     }
 164 }
 165 #endif /* HAVE_VSX */
 166
 167 av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
 168                                      AVCodecContext *avctx,
 169                                      unsigned high_bit_depth)
 170 {
 171 #if HAVE_ALTIVEC && HAVE_BIGENDIAN
 172     if (!PPC_ALTIVEC(av_get_cpu_flags()))
 173         return;
 174
 175     c->diff_pixels = diff_pixels_altivec;
 176
 177     if (!high_bit_depth) {
 178         c->get_pixels = get_pixels_altivec;
 179     }
 180 #endif /* HAVE_ALTIVEC && HAVE_BIGENDIAN */
 181
 182 #if HAVE_VSX
 183     if (!PPC_VSX(av_get_cpu_flags()))
 184         return;
 185
 186     c->diff_pixels = diff_pixels_vsx;
 187
 188     if (!high_bit_depth)
 189         c->get_pixels = get_pixels_vsx;
 190 #endif /* HAVE_VSX */
 191 }