git.sesse.net Git - ffmpeg/blob - libavcodec/dsputil_template.c

   1 /*
   2  * DSP utils
   3  * Copyright (c) 2000, 2001 Fabrice Bellard
   4  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
   5  *
   6  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
   7  *
   8  * This file is part of Libav.
   9  *
  10  * Libav is free software; you can redistribute it and/or
  11  * modify it under the terms of the GNU Lesser General Public
  12  * License as published by the Free Software Foundation; either
  13  * version 2.1 of the License, or (at your option) any later version.
  14  *
  15  * Libav is distributed in the hope that it will be useful,
  16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18  * Lesser General Public License for more details.
  19  *
  20  * You should have received a copy of the GNU Lesser General Public
  21  * License along with Libav; if not, write to the Free Software
  22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23  */
  24
  25 /**
  26  * @file
  27  * DSP utils
  28  */
  29
  30 #include "pixels.h"
  31
  32 /* draw the edges of width 'w' of an image of size width, height */
  33 // FIXME: Check that this is OK for MPEG-4 interlaced.
  34 static void draw_edges_8_c(uint8_t *buf, int wrap, int width, int height,
  35                            int w, int h, int sides)
  36 {
  37     uint8_t *ptr = buf, *last_line;
  38     int i;
  39
  40     /* left and right */
  41     for (i = 0; i < height; i++) {
  42         memset(ptr - w, ptr[0], w);
  43         memset(ptr + width, ptr[width - 1], w);
  44         ptr += wrap;
  45     }
  46
  47     /* top and bottom + corners */
  48     buf -= w;
  49     last_line = buf + (height - 1) * wrap;
  50     if (sides & EDGE_TOP)
  51         for (i = 0; i < h; i++)
  52             // top
  53             memcpy(buf - (i + 1) * wrap, buf, width + w + w);
  54     if (sides & EDGE_BOTTOM)
  55         for (i = 0; i < h; i++)
  56             // bottom
  57             memcpy(last_line + (i + 1) * wrap, last_line, width + w + w);
  58 }
  59
  60 static void clear_block_8_c(int16_t *block)
  61 {
  62     memset(block, 0, sizeof(int16_t) * 64);
  63 }
  64
  65 static void clear_blocks_8_c(int16_t *blocks)
  66 {
  67     memset(blocks, 0, sizeof(int16_t) * 6 * 64);
  68 }
  69
  70 #define PIXOP2(OPNAME, OP)                                              \
  71 static inline void OPNAME ## _no_rnd_pixels8_l2_8(uint8_t *dst,         \
  72                                                   const uint8_t *src1,  \
  73                                                   const uint8_t *src2,  \
  74                                                   int dst_stride,       \
  75                                                   int src_stride1,      \
  76                                                   int src_stride2,      \
  77                                                   int h)                \
  78 {                                                                       \
  79     int i;                                                              \
  80                                                                         \
  81     for (i = 0; i < h; i++) {                                           \
  82         uint32_t a, b;                                                  \
  83         a = AV_RN32(&src1[i * src_stride1]);                            \
  84         b = AV_RN32(&src2[i * src_stride2]);                            \
  85         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
  86            no_rnd_avg32(a, b));                                         \
  87         a = AV_RN32(&src1[i * src_stride1 + 4]);                        \
  88         b = AV_RN32(&src2[i * src_stride2 + 4]);                        \
  89         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
  90            no_rnd_avg32(a, b));                                         \
  91     }                                                                   \
  92 }                                                                       \
  93                                                                         \
  94 static inline void OPNAME ## _no_rnd_pixels16_l2_8(uint8_t *dst,        \
  95                                                    const uint8_t *src1, \
  96                                                    const uint8_t *src2, \
  97                                                    int dst_stride,      \
  98                                                    int src_stride1,     \
  99                                                    int src_stride2,     \
 100                                                    int h)               \
 101 {                                                                       \
 102     OPNAME ## _no_rnd_pixels8_l2_8(dst, src1, src2, dst_stride,         \
 103                                    src_stride1, src_stride2, h);        \
 104     OPNAME ## _no_rnd_pixels8_l2_8(dst  + 8,                            \
 105                                    src1 + 8,                            \
 106                                    src2 + 8,                            \
 107                                    dst_stride, src_stride1,             \
 108                                    src_stride2, h);                     \
 109 }                                                                       \
 110                                                                         \
 111 static inline void OPNAME ## _pixels8_l4_8(uint8_t *dst,                \
 112                                            const uint8_t *src1,         \
 113                                            const uint8_t *src2,         \
 114                                            const uint8_t *src3,         \
 115                                            const uint8_t *src4,         \
 116                                            int dst_stride,              \
 117                                            int src_stride1,             \
 118                                            int src_stride2,             \
 119                                            int src_stride3,             \
 120                                            int src_stride4,             \
 121                                            int h)                       \
 122 {                                                                       \
 123     /* FIXME HIGH BIT DEPTH */                                          \
 124     int i;                                                              \
 125                                                                         \
 126     for (i = 0; i < h; i++) {                                           \
 127         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
 128         a  = AV_RN32(&src1[i * src_stride1]);                           \
 129         b  = AV_RN32(&src2[i * src_stride2]);                           \
 130         c  = AV_RN32(&src3[i * src_stride3]);                           \
 131         d  = AV_RN32(&src4[i * src_stride4]);                           \
 132         l0 = (a & 0x03030303UL) +                                       \
 133              (b & 0x03030303UL) +                                       \
 134                   0x02020202UL;                                         \
 135         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
 136              ((b & 0xFCFCFCFCUL) >> 2);                                 \
 137         l1 = (c & 0x03030303UL) +                                       \
 138              (d & 0x03030303UL);                                        \
 139         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
 140              ((d & 0xFCFCFCFCUL) >> 2);                                 \
 141         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
 142            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
 143         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
 144         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
 145         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
 146         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
 147         l0 = (a & 0x03030303UL) +                                       \
 148              (b & 0x03030303UL) +                                       \
 149                   0x02020202UL;                                         \
 150         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
 151              ((b & 0xFCFCFCFCUL) >> 2);                                 \
 152         l1 = (c & 0x03030303UL) +                                       \
 153              (d & 0x03030303UL);                                        \
 154         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
 155              ((d & 0xFCFCFCFCUL) >> 2);                                 \
 156         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
 157            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
 158     }                                                                   \
 159 }                                                                       \
 160                                                                         \
 161 static inline void OPNAME ## _no_rnd_pixels8_l4_8(uint8_t *dst,         \
 162                                                   const uint8_t *src1,  \
 163                                                   const uint8_t *src2,  \
 164                                                   const uint8_t *src3,  \
 165                                                   const uint8_t *src4,  \
 166                                                   int dst_stride,       \
 167                                                   int src_stride1,      \
 168                                                   int src_stride2,      \
 169                                                   int src_stride3,      \
 170                                                   int src_stride4,      \
 171                                                   int h)                \
 172 {                                                                       \
 173     /* FIXME HIGH BIT DEPTH */                                          \
 174     int i;                                                              \
 175                                                                         \
 176     for (i = 0; i < h; i++) {                                           \
 177         uint32_t a, b, c, d, l0, l1, h0, h1;                            \
 178         a  = AV_RN32(&src1[i * src_stride1]);                           \
 179         b  = AV_RN32(&src2[i * src_stride2]);                           \
 180         c  = AV_RN32(&src3[i * src_stride3]);                           \
 181         d  = AV_RN32(&src4[i * src_stride4]);                           \
 182         l0 = (a & 0x03030303UL) +                                       \
 183              (b & 0x03030303UL) +                                       \
 184                   0x01010101UL;                                         \
 185         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
 186              ((b & 0xFCFCFCFCUL) >> 2);                                 \
 187         l1 = (c & 0x03030303UL) +                                       \
 188              (d & 0x03030303UL);                                        \
 189         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
 190              ((d & 0xFCFCFCFCUL) >> 2);                                 \
 191         OP(*((uint32_t *) &dst[i * dst_stride]),                        \
 192            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
 193         a  = AV_RN32(&src1[i * src_stride1 + 4]);                       \
 194         b  = AV_RN32(&src2[i * src_stride2 + 4]);                       \
 195         c  = AV_RN32(&src3[i * src_stride3 + 4]);                       \
 196         d  = AV_RN32(&src4[i * src_stride4 + 4]);                       \
 197         l0 = (a & 0x03030303UL) +                                       \
 198              (b & 0x03030303UL) +                                       \
 199                   0x01010101UL;                                         \
 200         h0 = ((a & 0xFCFCFCFCUL) >> 2) +                                \
 201              ((b & 0xFCFCFCFCUL) >> 2);                                 \
 202         l1 = (c & 0x03030303UL) +                                       \
 203              (d & 0x03030303UL);                                        \
 204         h1 = ((c & 0xFCFCFCFCUL) >> 2) +                                \
 205              ((d & 0xFCFCFCFCUL) >> 2);                                 \
 206         OP(*((uint32_t *) &dst[i * dst_stride + 4]),                    \
 207            h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));                \
 208     }                                                                   \
 209 }                                                                       \
 210                                                                         \
 211 static inline void OPNAME ## _pixels16_l4_8(uint8_t *dst,               \
 212                                             const uint8_t *src1,        \
 213                                             const uint8_t *src2,        \
 214                                             const uint8_t *src3,        \
 215                                             const uint8_t *src4,        \
 216                                             int dst_stride,             \
 217                                             int src_stride1,            \
 218                                             int src_stride2,            \
 219                                             int src_stride3,            \
 220                                             int src_stride4,            \
 221                                             int h)                      \
 222 {                                                                       \
 223     OPNAME ## _pixels8_l4_8(dst, src1, src2, src3, src4, dst_stride,    \
 224                             src_stride1, src_stride2, src_stride3,      \
 225                             src_stride4, h);                            \
 226     OPNAME ## _pixels8_l4_8(dst  + 8,                                   \
 227                             src1 + 8, src2 + 8,                         \
 228                             src3 + 8, src4 + 8,                         \
 229                             dst_stride, src_stride1, src_stride2,       \
 230                             src_stride3, src_stride4, h);               \
 231 }                                                                       \
 232                                                                         \
 233 static inline void OPNAME ## _no_rnd_pixels16_l4_8(uint8_t *dst,        \
 234                                                    const uint8_t *src1, \
 235                                                    const uint8_t *src2, \
 236                                                    const uint8_t *src3, \
 237                                                    const uint8_t *src4, \
 238                                                    int dst_stride,      \
 239                                                    int src_stride1,     \
 240                                                    int src_stride2,     \
 241                                                    int src_stride3,     \
 242                                                    int src_stride4,     \
 243                                                    int h)               \
 244 {                                                                       \
 245     OPNAME ## _no_rnd_pixels8_l4_8(dst, src1, src2, src3, src4,         \
 246                                    dst_stride, src_stride1,             \
 247                                    src_stride2, src_stride3,            \
 248                                    src_stride4, h);                     \
 249     OPNAME ## _no_rnd_pixels8_l4_8(dst  + 8,                            \
 250                                    src1 + 8, src2 + 8,                  \
 251                                    src3 + 8, src4 + 8,                  \
 252                                    dst_stride, src_stride1,             \
 253                                    src_stride2, src_stride3,            \
 254                                    src_stride4, h);                     \
 255 }                                                                       \
 256                                                                         \
 257 static inline void OPNAME ## _pixels8_xy2_8_c(uint8_t *block,           \
 258                                               const uint8_t *pixels,    \
 259                                               ptrdiff_t line_size,      \
 260                                               int h)                    \
 261 {                                                                       \
 262     /* FIXME HIGH BIT DEPTH */                                          \
 263     int j;                                                              \
 264                                                                         \
 265     for (j = 0; j < 2; j++) {                                           \
 266         int i;                                                          \
 267         const uint32_t a = AV_RN32(pixels);                             \
 268         const uint32_t b = AV_RN32(pixels + 1);                         \
 269         uint32_t l0 = (a & 0x03030303UL) +                              \
 270                       (b & 0x03030303UL) +                              \
 271                            0x02020202UL;                                \
 272         uint32_t h0 = ((a & 0xFCFCFCFCUL) >> 2) +                       \
 273                       ((b & 0xFCFCFCFCUL) >> 2);                        \
 274         uint32_t l1, h1;                                                \
 275                                                                         \
 276         pixels += line_size;                                            \
 277         for (i = 0; i < h; i += 2) {                                    \
 278             uint32_t a = AV_RN32(pixels);                               \
 279             uint32_t b = AV_RN32(pixels + 1);                           \
 280             l1 = (a & 0x03030303UL) +                                   \
 281                  (b & 0x03030303UL);                                    \
 282             h1 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
 283                  ((b & 0xFCFCFCFCUL) >> 2);                             \
 284             OP(*((uint32_t *) block),                                   \
 285                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
 286             pixels += line_size;                                        \
 287             block  += line_size;                                        \
 288             a = AV_RN32(pixels);                                        \
 289             b = AV_RN32(pixels + 1);                                    \
 290             l0 = (a & 0x03030303UL) +                                   \
 291                  (b & 0x03030303UL) +                                   \
 292                       0x02020202UL;                                     \
 293             h0 = ((a & 0xFCFCFCFCUL) >> 2) +                            \
 294                  ((b & 0xFCFCFCFCUL) >> 2);                             \
 295             OP(*((uint32_t *) block),                                   \
 296                h0 + h1 + (((l0 + l1) >> 2) & 0x0F0F0F0FUL));            \
 297             pixels += line_size;                                        \
 298             block  += line_size;                                        \
 299         }                                                               \
 300         pixels += 4 - line_size * (h + 1);                              \
 301         block  += 4 - line_size * h;                                    \
 302     }                                                                   \
 303 }                                                                       \
 304                                                                         \
 305 CALL_2X_PIXELS(OPNAME ## _pixels16_xy2_8_c,                             \
 306                OPNAME ## _pixels8_xy2_8_c,                              \
 307                8)                                                       \
 308
 309 #define op_avg(a, b) a = rnd_avg32(a, b)
 310 #define op_put(a, b) a = b
 311 #define put_no_rnd_pixels8_8_c put_pixels8_8_c
 312 PIXOP2(avg, op_avg)
 313 PIXOP2(put, op_put)
 314 #undef op_avg
 315 #undef op_put