git.sesse.net Git - ffmpeg/blob - libswscale/ppc/yuv2yuv_altivec.c

   1 /*
   2  * AltiVec-enhanced yuv-to-yuv convertion routines.
   3  *
   4  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
   5  * based on the equivalent C code in swscale.c
   6  *
   7  * This file is part of Libav.
   8  *
   9  * Libav is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public
  11  * License as published by the Free Software Foundation; either
  12  * version 2.1 of the License, or (at your option) any later version.
  13  *
  14  * Libav is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with Libav; if not, write to the Free Software
  21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22  */
  23
  24 #include <inttypes.h>
  25 #include "config.h"
  26 #include "libswscale/swscale.h"
  27 #include "libswscale/swscale_internal.h"
  28 #include "libavutil/cpu.h"
  29
  30 static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t* src[],
  31                                        int srcStride[], int srcSliceY,
  32                                        int srcSliceH, uint8_t* dstParam[],
  33                                        int dstStride_a[])
  34 {
  35     uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
  36     // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
  37     const uint8_t *ysrc = src[0];
  38     const uint8_t *usrc = src[1];
  39     const uint8_t *vsrc = src[2];
  40     const int width = c->srcW;
  41     const int height = srcSliceH;
  42     const int lumStride = srcStride[0];
  43     const int chromStride = srcStride[1];
  44     const int dstStride = dstStride_a[0];
  45     const vector unsigned char yperm = vec_lvsl(0, ysrc);
  46     const int vertLumPerChroma = 2;
  47     register unsigned int y;
  48
  49     /* This code assumes:
  50
  51     1) dst is 16 bytes-aligned
  52     2) dstStride is a multiple of 16
  53     3) width is a multiple of 16
  54     4) lum & chrom stride are multiples of 8
  55     */
  56
  57     for (y=0; y<height; y++) {
  58         int i;
  59         for (i = 0; i < width - 31; i+= 32) {
  60             const unsigned int j = i >> 1;
  61             vector unsigned char v_yA = vec_ld(i, ysrc);
  62             vector unsigned char v_yB = vec_ld(i + 16, ysrc);
  63             vector unsigned char v_yC = vec_ld(i + 32, ysrc);
  64             vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
  65             vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
  66             vector unsigned char v_uA = vec_ld(j, usrc);
  67             vector unsigned char v_uB = vec_ld(j + 16, usrc);
  68             vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
  69             vector unsigned char v_vA = vec_ld(j, vsrc);
  70             vector unsigned char v_vB = vec_ld(j + 16, vsrc);
  71             vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
  72             vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
  73             vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
  74             vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
  75             vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
  76             vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
  77             vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
  78             vec_st(v_yuy2_0, (i << 1), dst);
  79             vec_st(v_yuy2_1, (i << 1) + 16, dst);
  80             vec_st(v_yuy2_2, (i << 1) + 32, dst);
  81             vec_st(v_yuy2_3, (i << 1) + 48, dst);
  82         }
  83         if (i < width) {
  84             const unsigned int j = i >> 1;
  85             vector unsigned char v_y1 = vec_ld(i, ysrc);
  86             vector unsigned char v_u = vec_ld(j, usrc);
  87             vector unsigned char v_v = vec_ld(j, vsrc);
  88             vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
  89             vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
  90             vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
  91             vec_st(v_yuy2_0, (i << 1), dst);
  92             vec_st(v_yuy2_1, (i << 1) + 16, dst);
  93         }
  94         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
  95             usrc += chromStride;
  96             vsrc += chromStride;
  97         }
  98         ysrc += lumStride;
  99         dst += dstStride;
 100     }
 101
 102     return srcSliceH;
 103 }
 104
 105 static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t* src[],
 106                                        int srcStride[], int srcSliceY,
 107                                        int srcSliceH, uint8_t* dstParam[],
 108                                        int dstStride_a[])
 109 {
 110     uint8_t *dst=dstParam[0] + dstStride_a[0]*srcSliceY;
 111     // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH, srcStride[0], srcStride[1], dstStride[0]);
 112     const uint8_t *ysrc = src[0];
 113     const uint8_t *usrc = src[1];
 114     const uint8_t *vsrc = src[2];
 115     const int width = c->srcW;
 116     const int height = srcSliceH;
 117     const int lumStride = srcStride[0];
 118     const int chromStride = srcStride[1];
 119     const int dstStride = dstStride_a[0];
 120     const int vertLumPerChroma = 2;
 121     const vector unsigned char yperm = vec_lvsl(0, ysrc);
 122     register unsigned int y;
 123
 124     /* This code assumes:
 125
 126     1) dst is 16 bytes-aligned
 127     2) dstStride is a multiple of 16
 128     3) width is a multiple of 16
 129     4) lum & chrom stride are multiples of 8
 130     */
 131
 132     for (y=0; y<height; y++) {
 133         int i;
 134         for (i = 0; i < width - 31; i+= 32) {
 135             const unsigned int j = i >> 1;
 136             vector unsigned char v_yA = vec_ld(i, ysrc);
 137             vector unsigned char v_yB = vec_ld(i + 16, ysrc);
 138             vector unsigned char v_yC = vec_ld(i + 32, ysrc);
 139             vector unsigned char v_y1 = vec_perm(v_yA, v_yB, yperm);
 140             vector unsigned char v_y2 = vec_perm(v_yB, v_yC, yperm);
 141             vector unsigned char v_uA = vec_ld(j, usrc);
 142             vector unsigned char v_uB = vec_ld(j + 16, usrc);
 143             vector unsigned char v_u = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
 144             vector unsigned char v_vA = vec_ld(j, vsrc);
 145             vector unsigned char v_vB = vec_ld(j + 16, vsrc);
 146             vector unsigned char v_v = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
 147             vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
 148             vector unsigned char v_uv_b = vec_mergel(v_u, v_v);
 149             vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
 150             vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
 151             vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
 152             vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
 153             vec_st(v_uyvy_0, (i << 1), dst);
 154             vec_st(v_uyvy_1, (i << 1) + 16, dst);
 155             vec_st(v_uyvy_2, (i << 1) + 32, dst);
 156             vec_st(v_uyvy_3, (i << 1) + 48, dst);
 157         }
 158         if (i < width) {
 159             const unsigned int j = i >> 1;
 160             vector unsigned char v_y1 = vec_ld(i, ysrc);
 161             vector unsigned char v_u = vec_ld(j, usrc);
 162             vector unsigned char v_v = vec_ld(j, vsrc);
 163             vector unsigned char v_uv_a = vec_mergeh(v_u, v_v);
 164             vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
 165             vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
 166             vec_st(v_uyvy_0, (i << 1), dst);
 167             vec_st(v_uyvy_1, (i << 1) + 16, dst);
 168         }
 169         if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
 170             usrc += chromStride;
 171             vsrc += chromStride;
 172         }
 173         ysrc += lumStride;
 174         dst += dstStride;
 175     }
 176     return srcSliceH;
 177 }
 178
 179 void ff_swscale_get_unscaled_altivec(SwsContext *c)
 180 {
 181     if ((av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC) && !(c->srcW & 15) &&
 182         !(c->flags & SWS_BITEXACT) && c->srcFormat == PIX_FMT_YUV420P) {
 183         enum PixelFormat dstFormat = c->dstFormat;
 184
 185         // unscaled YV12 -> packed YUV, we want speed
 186         if (dstFormat == PIX_FMT_YUYV422)
 187             c->swScale= yv12toyuy2_unscaled_altivec;
 188         else if (dstFormat == PIX_FMT_UYVY422)
 189             c->swScale= yv12touyvy_unscaled_altivec;
 190     }
 191 }