Merge commit 'de452e503734ebb0fdbce86e9d16693b3530fad3'

author Clément Bœsch <u@pkh.me>

Mon, 20 Mar 2017 12:47:29 +0000 (13:47 +0100)

committer Clément Bœsch <u@pkh.me>

Mon, 20 Mar 2017 14:58:32 +0000 (15:58 +0100)
author Clément Bœsch <u@pkh.me>
Mon, 20 Mar 2017 12:47:29 +0000 (13:47 +0100)
committer Clément Bœsch <u@pkh.me>
Mon, 20 Mar 2017 14:58:32 +0000 (15:58 +0100)
diff --cc libavcodec/alpha/pixblockdsp_alpha.c

index 866b762b162754c7b0358e70228db2b25e50b344,0000000000000000000000000000000000000000..c2f1a1d79c539f19dfe28e5e7bdf8bc87c243a9f

mode 100644,000000..100644
--- 1/libavcodec/alpha/pixblockdsp_alpha.c
--- /dev/null
+++ b/libavcodec/alpha/pixblockdsp_alpha.c
@@@ -1,78 -1,0 +1,79 @@@
-                            const uint8_t *restrict pixels, ptrdiff_t line_size)
+ +/*
+ + * SIMD-optimized pixel operations
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public
+ + * License as published by the Free Software Foundation; either
+ + * version 2.1 of the License, or (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with FFmpeg; if not, write to the Free Software
+ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ + */
+ +
+ +#include "libavutil/attributes.h"
+ +#include "libavcodec/pixblockdsp.h"
+ +#include "asm.h"
+ +
+ +static void get_pixels_mvi(int16_t *restrict block,
-         pixels += line_size;
++                           const uint8_t *restrict pixels, ptrdiff_t stride)
+ +{
+ +    int h = 8;
+ +
+ +    do {
+ +        uint64_t p;
+ +
+ +        p = ldq(pixels);
+ +        stq(unpkbw(p),       block);
+ +        stq(unpkbw(p >> 32), block + 4);
+ +
-                             int stride) {
++        pixels += stride;
+ +        block += 8;
+ +    } while (--h);
+ +}
+ +
+ +static void diff_pixels_mvi(int16_t *block, const uint8_t *s1, const uint8_t *s2,
++                            ptrdiff_t stride)
++{
+ +    int h = 8;
+ +    uint64_t mask = 0x4040;
+ +
+ +    mask |= mask << 16;
+ +    mask |= mask << 32;
+ +    do {
+ +        uint64_t x, y, c, d, a;
+ +        uint64_t signs;
+ +
+ +        x = ldq(s1);
+ +        y = ldq(s2);
+ +        c = cmpbge(x, y);
+ +        d = x - y;
+ +        a = zap(mask, c);       /* We use 0x4040404040404040 here...  */
+ +        d += 4 * a;             /* ...so we can use s4addq here.      */
+ +        signs = zap(-1, c);
+ +
+ +        stq(unpkbw(d)       | (unpkbw(signs)       << 8), block);
+ +        stq(unpkbw(d >> 32) | (unpkbw(signs >> 32) << 8), block + 4);
+ +
+ +        s1 += stride;
+ +        s2 += stride;
+ +        block += 8;
+ +    } while (--h);
+ +}
+ +
+ +av_cold void ff_pixblockdsp_init_alpha(PixblockDSPContext *c, AVCodecContext *avctx,
+ +                                       unsigned high_bit_depth)
+ +{
+ +    if (amask(AMASK_MVI) == 0) {
+ +        if (!high_bit_depth)
+ +            c->get_pixels = get_pixels_mvi;
+ +        c->diff_pixels = diff_pixels_mvi;
+ +    }
+ +}
diff --cc libavcodec/arm/pixblockdsp_init_arm.c
Simple merge
diff --cc libavcodec/dv.h
Simple merge
diff --cc libavcodec/dvenc.c
Simple merge
diff --cc libavcodec/mips/pixblockdsp_mips.h

index 7f8cc96683da6ee1ef146fcc7c1dd93d72cbaa7c,0000000000000000000000000000000000000000..a12b1a6949b01db4aa7d0d3885ea1010be572050

mode 100644,000000..100644
--- 1/libavcodec/mips/pixblockdsp_mips.h
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mips.h
@@@ -1,39 -1,0 +1,39 @@@
-                         const uint8_t *src2, int stride);
+ +/*
+ + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ + *                    Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public
+ + * License as published by the Free Software Foundation; either
+ + * version 2.1 of the License, or (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with FFmpeg; if not, write to the Free Software
+ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ + */
+ +
+ +#ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+ +#define AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
+ +
+ +#include "../mpegvideo.h"
+ +
+ +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
-         ptrdiff_t line_size);
++                        const uint8_t *src2, ptrdiff_t stride);
+ +void ff_get_pixels_16_msa(int16_t *restrict dst, const uint8_t *src,
+ +                          ptrdiff_t stride);
+ +void ff_get_pixels_8_msa(int16_t *restrict dst, const uint8_t *src,
+ +                         ptrdiff_t stride);
+ +
+ +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-         const uint8_t *src2, int stride);
++                         ptrdiff_t stride);
+ +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
++                        const uint8_t *src2, ptrdiff_t stride);
+ +
+ +#endif  // #ifndef AVCODEC_MIPS_PIXBLOCKDSP_MIPS_H
diff --cc libavcodec/mips/pixblockdsp_mmi.c

index 9f2eac36ec6aca0657c6218f45a8663b58e622fb,0000000000000000000000000000000000000000..a915a3c28bc9f78d6e208f68e401a06e61812b5b

mode 100644,000000..100644
--- 1/libavcodec/mips/pixblockdsp_mmi.c
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_mmi.c
@@@ -1,135 -1,0 +1,135 @@@
-         ptrdiff_t line_size)
+ +/*
+ + * Loongson SIMD optimized pixblockdsp
+ + *
+ + * Copyright (c) 2015 Loongson Technology Corporation Limited
+ + * Copyright (c) 2015 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public
+ + * License as published by the Free Software Foundation; either
+ + * version 2.1 of the License, or (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with FFmpeg; if not, write to the Free Software
+ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ + */
+ +
+ +#include "pixblockdsp_mips.h"
+ +#include "libavutil/mips/asmdefs.h"
+ +#include "libavutil/mips/mmiutils.h"
+ +
+ +void ff_get_pixels_8_mmi(int16_t *av_restrict block, const uint8_t *pixels,
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++                         ptrdiff_t stride)
+ +{
+ +    double ftmp[7];
+ +    DECLARE_VAR_ALL64;
+ +    DECLARE_VAR_ADDRT;
+ +
+ +    __asm__ volatile (
+ +        "xor        %[ftmp0],   %[ftmp0],       %[ftmp0]                \n\t"
+ +
+ +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        MMI_SDC1(%[ftmp3], %[block], 0x00)
+ +        MMI_SDC1(%[ftmp4], %[block], 0x08)
+ +        MMI_SDC1(%[ftmp5], %[block], 0x10)
+ +        MMI_SDC1(%[ftmp6], %[block], 0x18)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
+ +
+ +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        MMI_SDC1(%[ftmp3], %[block], 0x20)
+ +        MMI_SDC1(%[ftmp4], %[block], 0x28)
+ +        MMI_SDC1(%[ftmp5], %[block], 0x30)
+ +        MMI_SDC1(%[ftmp6], %[block], 0x38)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
+ +
+ +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         PTR_ADDU   "%[pixels],  %[pixels],      %[line_size_x2]         \n\t"
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        MMI_SDC1(%[ftmp3], %[block], 0x40)
+ +        MMI_SDC1(%[ftmp4], %[block], 0x48)
+ +        MMI_SDC1(%[ftmp5], %[block], 0x50)
+ +        MMI_SDC1(%[ftmp6], %[block], 0x58)
-         MMI_LDXC1(%[ftmp2], %[pixels], %[line_size], 0x00)
++        PTR_ADDU   "%[pixels],  %[pixels],      %[stride_x2]            \n\t"
+ +
+ +        MMI_LDC1(%[ftmp1], %[pixels], 0x00)
-         : [block]"r"((mips_reg)block),      [line_size]"r"((mips_reg)line_size),
-           [line_size_x2]"r"((mips_reg)(line_size<<1))
++        MMI_LDXC1(%[ftmp2], %[pixels], %[stride], 0x00)
+ +        "punpcklbh  %[ftmp3],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp4],   %[ftmp1],       %[ftmp0]                \n\t"
+ +        "punpcklbh  %[ftmp5],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        "punpckhbh  %[ftmp6],   %[ftmp2],       %[ftmp0]                \n\t"
+ +        MMI_SDC1(%[ftmp3], %[block], 0x60)
+ +        MMI_SDC1(%[ftmp4], %[block], 0x68)
+ +        MMI_SDC1(%[ftmp5], %[block], 0x70)
+ +        MMI_SDC1(%[ftmp6], %[block], 0x78)
+ +        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
+ +          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
+ +          [ftmp4]"=&f"(ftmp[4]),            [ftmp5]"=&f"(ftmp[5]),
+ +          [ftmp6]"=&f"(ftmp[6]),
+ +          RESTRICT_ASM_ALL64
+ +          RESTRICT_ASM_ADDRT
+ +          [pixels]"+&r"(pixels)
-         const uint8_t *src2, int stride)
++        : [block]"r"((mips_reg)block),      [stride]"r"((mips_reg)stride),
++          [stride_x2]"r"((mips_reg)(stride<<1))
+ +        : "memory"
+ +    );
+ +}
+ +
+ +void ff_diff_pixels_mmi(int16_t *av_restrict block, const uint8_t *src1,
++        const uint8_t *src2, ptrdiff_t stride)
+ +{
+ +    double ftmp[5];
+ +    mips_reg tmp[1];
+ +    DECLARE_VAR_ALL64;
+ +
+ +    __asm__ volatile (
+ +        "li         %[tmp0],    0x08                                    \n\t"
+ +        "xor        %[ftmp4],   %[ftmp4],       %[ftmp4]                \n\t"
+ +        "1:                                                             \n\t"
+ +        MMI_LDC1(%[ftmp0], %[src1], 0x00)
+ +        "or         %[ftmp1],   %[ftmp0],       %[ftmp0]                \n\t"
+ +        MMI_LDC1(%[ftmp2], %[src2], 0x00)
+ +        "or         %[ftmp3],   %[ftmp2],       %[ftmp2]                \n\t"
+ +        "punpcklbh  %[ftmp0],   %[ftmp0],       %[ftmp4]                \n\t"
+ +        "punpckhbh  %[ftmp1],   %[ftmp1],       %[ftmp4]                \n\t"
+ +        "punpcklbh  %[ftmp2],   %[ftmp2],       %[ftmp4]                \n\t"
+ +        "punpckhbh  %[ftmp3],   %[ftmp3],       %[ftmp4]                \n\t"
+ +        "psubh      %[ftmp0],   %[ftmp0],       %[ftmp2]                \n\t"
+ +        "psubh      %[ftmp1],   %[ftmp1],       %[ftmp3]                \n\t"
+ +        MMI_SDC1(%[ftmp0], %[block], 0x00)
+ +        MMI_SDC1(%[ftmp1], %[block], 0x08)
+ +        PTR_ADDI   "%[tmp0],    %[tmp0], -0x01                          \n\t"
+ +        PTR_ADDIU  "%[block],   %[block], 0x10                          \n\t"
+ +        PTR_ADDU   "%[src1],    %[src1],        %[stride]               \n\t"
+ +        PTR_ADDU   "%[src2],    %[src2],        %[stride]               \n\t"
+ +        "bgtz       %[tmp0],    1b                                      \n\t"
+ +        : [ftmp0]"=&f"(ftmp[0]),            [ftmp1]"=&f"(ftmp[1]),
+ +          [ftmp2]"=&f"(ftmp[2]),            [ftmp3]"=&f"(ftmp[3]),
+ +          [ftmp4]"=&f"(ftmp[4]),
+ +          [tmp0]"=&r"(tmp[0]),
+ +          RESTRICT_ASM_ALL64
+ +          [block]"+&r"(block),              [src1]"+&r"(src1),
+ +          [src2]"+&r"(src2)
+ +        : [stride]"r"((mips_reg)stride)
+ +        : "memory"
+ +    );
+ +}
diff --cc libavcodec/mips/pixblockdsp_msa.c

index 966e11a7f587efaef997568b09948ac3af650f53,0000000000000000000000000000000000000000..86a4576c1ddb19e57b6bee90f05d7482a738c628

mode 100644,000000..100644
--- 1/libavcodec/mips/pixblockdsp_msa.c
--- /dev/null
+++ b/libavcodec/mips/pixblockdsp_msa.c
@@@ -1,143 -1,0 +1,143 @@@
-                         const uint8_t *src2, int stride)
+ +/*
+ + * Copyright (c) 2015 Shivraj Patil (Shivraj.Patil@imgtec.com)
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or
+ + * modify it under the terms of the GNU Lesser General Public
+ + * License as published by the Free Software Foundation; either
+ + * version 2.1 of the License, or (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ + * Lesser General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU Lesser General Public
+ + * License along with FFmpeg; if not, write to the Free Software
+ + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ + */
+ +
+ +#include "libavutil/mips/generic_macros_msa.h"
+ +#include "pixblockdsp_mips.h"
+ +
+ +static void diff_pixels_msa(int16_t *block, const uint8_t *src1,
+ +                            const uint8_t *src2, int32_t stride)
+ +{
+ +    v16u8 in10, in11, in12, in13, in14, in15, in16, in17;
+ +    v16u8 in20, in21, in22, in23, in24, in25, in26, in27;
+ +    v8i16 out0, out1, out2, out3, out4, out5, out6, out7;
+ +
+ +    LD_UB8(src1, stride, in10, in11, in12, in13, in14, in15, in16, in17);
+ +    LD_UB8(src2, stride, in20, in21, in22, in23, in24, in25, in26, in27);
+ +    ILVR_B4_SH(in10, in20, in11, in21, in12, in22, in13, in23,
+ +               out0, out1, out2, out3);
+ +    ILVR_B4_SH(in14, in24, in15, in25, in16, in26, in17, in27,
+ +               out4, out5, out6, out7);
+ +    HSUB_UB4_SH(out0, out1, out2, out3, out0, out1, out2, out3);
+ +    HSUB_UB4_SH(out4, out5, out6, out7, out4, out5, out6, out7);
+ +    ST_SH8(out0, out1, out2, out3, out4, out5, out6, out7, block, 8);
+ +}
+ +
+ +static void copy_8bit_to_16bit_width8_msa(const uint8_t *src, int32_t src_stride,
+ +                                          int16_t *dst, int32_t dst_stride,
+ +                                          int32_t height)
+ +{
+ +    uint8_t *dst_ptr;
+ +    int32_t cnt;
+ +    v16u8 src0, src1, src2, src3;
+ +    v16i8 zero = { 0 };
+ +
+ +    dst_ptr = (uint8_t *) dst;
+ +
+ +    for (cnt = (height >> 2); cnt--;) {
+ +        LD_UB4(src, src_stride, src0, src1, src2, src3);
+ +        src += (4 * src_stride);
+ +
+ +        ILVR_B4_UB(zero, src0, zero, src1, zero, src2, zero, src3,
+ +                   src0, src1, src2, src3);
+ +
+ +        ST_UB4(src0, src1, src2, src3, dst_ptr, (dst_stride * 2));
+ +        dst_ptr += (4 * 2 * dst_stride);
+ +    }
+ +}
+ +
+ +static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride,
+ +                                  uint8_t *dst, int32_t dst_stride,
+ +                                  int32_t height, int32_t width)
+ +{
+ +    int32_t cnt, loop_cnt;
+ +    const uint8_t *src_tmp;
+ +    uint8_t *dst_tmp;
+ +    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ +
+ +    for (cnt = (width >> 4); cnt--;) {
+ +        src_tmp = src;
+ +        dst_tmp = dst;
+ +
+ +        for (loop_cnt = (height >> 3); loop_cnt--;) {
+ +            LD_UB8(src_tmp, src_stride,
+ +                   src0, src1, src2, src3, src4, src5, src6, src7);
+ +            src_tmp += (8 * src_stride);
+ +
+ +            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+ +                   dst_tmp, dst_stride);
+ +            dst_tmp += (8 * dst_stride);
+ +        }
+ +
+ +        src += 16;
+ +        dst += 16;
+ +    }
+ +}
+ +
+ +static void copy_width16_msa(const uint8_t *src, int32_t src_stride,
+ +                             uint8_t *dst, int32_t dst_stride,
+ +                             int32_t height)
+ +{
+ +    int32_t cnt;
+ +    v16u8 src0, src1, src2, src3, src4, src5, src6, src7;
+ +
+ +    if (0 == height % 12) {
+ +        for (cnt = (height / 12); cnt--;) {
+ +            LD_UB8(src, src_stride,
+ +                   src0, src1, src2, src3, src4, src5, src6, src7);
+ +            src += (8 * src_stride);
+ +            ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7,
+ +                   dst, dst_stride);
+ +            dst += (8 * dst_stride);
+ +
+ +            LD_UB4(src, src_stride, src0, src1, src2, src3);
+ +            src += (4 * src_stride);
+ +            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ +            dst += (4 * dst_stride);
+ +        }
+ +    } else if (0 == height % 8) {
+ +        copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16);
+ +    } else if (0 == height % 4) {
+ +        for (cnt = (height >> 2); cnt--;) {
+ +            LD_UB4(src, src_stride, src0, src1, src2, src3);
+ +            src += (4 * src_stride);
+ +
+ +            ST_UB4(src0, src1, src2, src3, dst, dst_stride);
+ +            dst += (4 * dst_stride);
+ +        }
+ +    }
+ +}
+ +
+ +void ff_get_pixels_16_msa(int16_t *av_restrict dest, const uint8_t *src,
+ +                          ptrdiff_t stride)
+ +{
+ +    copy_width16_msa(src, stride, (uint8_t *) dest, 16, 8);
+ +}
+ +
+ +void ff_get_pixels_8_msa(int16_t *av_restrict dest, const uint8_t *src,
+ +                         ptrdiff_t stride)
+ +{
+ +    copy_8bit_to_16bit_width8_msa(src, stride, dest, 8, 8);
+ +}
+ +
+ +void ff_diff_pixels_msa(int16_t *av_restrict block, const uint8_t *src1,
++                        const uint8_t *src2, ptrdiff_t stride)
+ +{
+ +    diff_pixels_msa(block, src1, src2, stride);
+ +}
diff --cc libavcodec/pixblockdsp.c

index f0883d3d08484ff0f96cdebfb47891022eb5f513,9d68d26245acd3866cd15388fe521ff2d274303c..417c944e00bb0904bd1fd9669c64afac47c558c2
--- 1/libavcodec/pixblockdsp.c
--- 2/libavcodec/pixblockdsp.c
+++ b/libavcodec/pixblockdsp.c
@@@ -24,41 -23,15 +24,41 @@@
   #include "avcodec.h"
   #include "pixblockdsp.h"
   
- -#define BIT_DEPTH 16
- -#include "pixblockdsp_template.c"
- -#undef BIT_DEPTH
+ +static void get_pixels_16_c(int16_t *av_restrict block, const uint8_t *pixels,
-                             ptrdiff_t line_size)
++                            ptrdiff_t stride)
+ +{
-     AV_COPY128U(block + 0 * 8, pixels + 0 * line_size);
-     AV_COPY128U(block + 1 * 8, pixels + 1 * line_size);
-     AV_COPY128U(block + 2 * 8, pixels + 2 * line_size);
-     AV_COPY128U(block + 3 * 8, pixels + 3 * line_size);
-     AV_COPY128U(block + 4 * 8, pixels + 4 * line_size);
-     AV_COPY128U(block + 5 * 8, pixels + 5 * line_size);
-     AV_COPY128U(block + 6 * 8, pixels + 6 * line_size);
-     AV_COPY128U(block + 7 * 8, pixels + 7 * line_size);
++    AV_COPY128U(block + 0 * 8, pixels + 0 * stride);
++    AV_COPY128U(block + 1 * 8, pixels + 1 * stride);
++    AV_COPY128U(block + 2 * 8, pixels + 2 * stride);
++    AV_COPY128U(block + 3 * 8, pixels + 3 * stride);
++    AV_COPY128U(block + 4 * 8, pixels + 4 * stride);
++    AV_COPY128U(block + 5 * 8, pixels + 5 * stride);
++    AV_COPY128U(block + 6 * 8, pixels + 6 * stride);
++    AV_COPY128U(block + 7 * 8, pixels + 7 * stride);
+ +}
+ +
+ +static void get_pixels_8_c(int16_t *av_restrict block, const uint8_t *pixels,
-                            ptrdiff_t line_size)
++                           ptrdiff_t stride)
+ +{
+ +    int i;
   
- -#define BIT_DEPTH 8
- -#include "pixblockdsp_template.c"
+ +    /* read the pixels */
+ +    for (i = 0; i < 8; i++) {
+ +        block[0] = pixels[0];
+ +        block[1] = pixels[1];
+ +        block[2] = pixels[2];
+ +        block[3] = pixels[3];
+ +        block[4] = pixels[4];
+ +        block[5] = pixels[5];
+ +        block[6] = pixels[6];
+ +        block[7] = pixels[7];
-         pixels  += line_size;
++        pixels  += stride;
+ +        block   += 8;
+ +    }
+ +}
   
- -static void diff_pixels_c(int16_t *restrict block, const uint8_t *s1,
+ +static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
-                           const uint8_t *s2, int stride)
+                           const uint8_t *s2, ptrdiff_t stride)
   {
       int i;
   
diff --cc libavcodec/pixblockdsp.h
Simple merge
diff --cc libavcodec/ppc/pixblockdsp.c

index f3a5050469baddc4cc10092ffdae481ce82506ef,96e702452f15e7a7402bc1e680e53dc7d3374456..f5ac8509f0c821f56b488e7ce4b77a5f9a5736c4
--- 1/libavcodec/ppc/pixblockdsp.c
--- 2/libavcodec/ppc/pixblockdsp.c
+++ b/libavcodec/ppc/pixblockdsp.c
@@@ -33,40 -33,13 +33,40 @@@
   #include "libavcodec/avcodec.h"
   #include "libavcodec/pixblockdsp.h"
   
- -#if HAVE_ALTIVEC && HAVE_BIGENDIAN
+ +#if HAVE_ALTIVEC
   
-                                ptrdiff_t line_size)
+ +#if HAVE_VSX
+ +static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-         pixels += line_size;
++                               ptrdiff_t stride)
+ +{
+ +    int i;
+ +    vector unsigned char perm =
+ +        (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
+ +            0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
+ +    const vector unsigned char zero =
+ +        (const vector unsigned char) vec_splat_u8(0);
+ +
+ +    for (i = 0; i < 8; i++) {
+ +        /* Read potentially unaligned pixels.
+ +         * We're reading 16 pixels, and actually only want 8,
+ +         * but we simply ignore the extras. */
+ +        vector unsigned char bytes = vec_vsx_ld(0, pixels);
+ +
+ +        // Convert the bytes into shorts.
+ +        //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
+ +        vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
+ +
+ +        // Save the data to the block, we assume the block is 16-byte aligned.
+ +        vec_vsx_st(shorts, i * 16, (vector signed short *) block);
+ +
++        pixels += stride;
+ +    }
+ +}
+ +#else
   static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
-                                ptrdiff_t line_size)
+                                ptrdiff_t stride)
   {
       int i;
- -    vec_u8 perm = vec_lvsl(0, pixels);
       const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
   
       for (i = 0; i < 8; i++) {
@@@ -88,76 -60,12 +88,76 @@@
       }
   }
   
-                                 const uint8_t *s2, int stride)
+ +#endif /* HAVE_VSX */
+ +
+ +#if HAVE_VSX
+ +static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
++                                const uint8_t *s2, ptrdiff_t stride)
+ +{
+ +  int i;
+ +  const vector unsigned char zero =
+ +    (const vector unsigned char) vec_splat_u8(0);
+ +  vector signed short shorts1, shorts2;
+ +
+ +  for (i = 0; i < 4; i++) {
+ +    /* Read potentially unaligned pixels.
+ +     * We're reading 16 pixels, and actually only want 8,
+ +     * but we simply ignore the extras. */
+ +    vector unsigned char bytes = vec_vsx_ld(0,  s1);
+ +
+ +    // Convert the bytes into shorts.
+ +    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
+ +
+ +    // Do the same for the second block of pixels.
+ +    bytes =vec_vsx_ld(0,  s2);
+ +
+ +    // Convert the bytes into shorts.
+ +    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
+ +
+ +    // Do the subtraction.
+ +    shorts1 = vec_sub(shorts1, shorts2);
+ +
+ +    // Save the data to the block, we assume the block is 16-byte aligned.
+ +    vec_vsx_st(shorts1, 0, (vector signed short *) block);
+ +
+ +    s1    += stride;
+ +    s2    += stride;
+ +    block += 8;
+ +
+ +    /* The code below is a copy of the code above...
+ +     * This is a manual unroll. */
+ +
+ +    /* Read potentially unaligned pixels.
+ +     * We're reading 16 pixels, and actually only want 8,
+ +     * but we simply ignore the extras. */
+ +    bytes = vec_vsx_ld(0,  s1);
+ +
+ +    // Convert the bytes into shorts.
+ +    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
+ +
+ +    // Do the same for the second block of pixels.
+ +    bytes = vec_vsx_ld(0,  s2);
+ +
+ +    // Convert the bytes into shorts.
+ +    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
+ +
+ +    // Do the subtraction.
+ +    shorts1 = vec_sub(shorts1, shorts2);
+ +
+ +    // Save the data to the block, we assume the block is 16-byte aligned.
+ +    vec_vsx_st(shorts1, 0, (vector signed short *) block);
+ +
+ +    s1    += stride;
+ +    s2    += stride;
+ +    block += 8;
+ +  }
+ +}
+ +#else
   static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
-                                 const uint8_t *s2, int stride)
+                                 const uint8_t *s2, ptrdiff_t stride)
   {
       int i;
- -    vec_u8 perm1 = vec_lvsl(0, s1);
- -    vec_u8 perm2 = vec_lvsl(0, s2);
+ +    vec_u8 perm;
       const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
       vec_s16 shorts1, shorts2;
   
diff --cc libavcodec/x86/pixblockdsp.asm

index 2864d0c977469efd91b2a62dbb90db59bf32be62,871244297c7240ada6365ad05ab5ae63fc0285ac..440fe29bccdd98205ba3195c7d70fe28f9887e46
--- 1/libavcodec/x86/pixblockdsp.asm
--- 2/libavcodec/x86/pixblockdsp.asm
+++ b/libavcodec/x86/pixblockdsp.asm
@@@ -80,12 -80,11 +80,11 @@@ cglobal get_pixels, 3, 4, 
       mova  [r0+0x70], m3
       RET
   
- -INIT_MMX mmx
   ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
- ;                         int stride);
+ ;                         ptrdiff_t stride);
- -cglobal diff_pixels, 4,5
- -    pxor         m7, m7
+ +%macro DIFF_PIXELS 0
+ +cglobal diff_pixels, 4,5,5
-     movsxdifnidn r3, r3d
+ +    pxor         m4, m4
       add          r0,  128
       mov          r4, -128
   .loop:
diff --cc libavcodec/x86/pixblockdsp_init.c

index 4d06a44c6d442560fe17550145514db151366766,faa5141327e8f4318945bd08a03f39499799f405..fa9578a2d373a0bb11d19f004ebd065cf6625cc6
--- 1/libavcodec/x86/pixblockdsp_init.c
--- 2/libavcodec/x86/pixblockdsp_init.c
+++ b/libavcodec/x86/pixblockdsp_init.c
@@@ -23,12 -23,10 +23,12 @@@
   #include "libavutil/x86/cpu.h"
   #include "libavcodec/pixblockdsp.h"
   
- void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
- void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);
+ void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
+ void ff_get_pixels_sse2(int16_t *block, const uint8_t *pixels, ptrdiff_t stride);
   void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                         int stride);
+                         ptrdiff_t stride);
+ +void ff_diff_pixels_sse2(int16_t *block, const uint8_t *s1, const uint8_t *s2,
-                          int stride);
++                         ptrdiff_t stride);
   
   av_cold void ff_pixblockdsp_init_x86(PixblockDSPContext *c,
                                        AVCodecContext *avctx,
diff --cc tests/checkasm/pixblockdsp.c

index 2b88e7d1acaada1f5118433bbfe775bdfa08930c,0000000000000000000000000000000000000000..e14b0a90ded4ba34809e558253d436b359f97de8

mode 100644,000000..100644
--- 1/tests/checkasm/pixblockdsp.c
--- /dev/null
+++ b/tests/checkasm/pixblockdsp.c
@@@ -1,107 -1,0 +1,107 @@@
-         declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, int stride); \
+ +/*
+ + * Copyright (c) 2015 Tiancheng "Timothy" Gu
+ + *
+ + * This file is part of FFmpeg.
+ + *
+ + * FFmpeg is free software; you can redistribute it and/or modify
+ + * it under the terms of the GNU General Public License as published by
+ + * the Free Software Foundation; either version 2 of the License, or
+ + * (at your option) any later version.
+ + *
+ + * FFmpeg is distributed in the hope that it will be useful,
+ + * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ + * GNU General Public License for more details.
+ + *
+ + * You should have received a copy of the GNU General Public License along
+ + * with FFmpeg; if not, write to the Free Software Foundation, Inc.,
+ + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ + */
+ +
+ +#include <string.h>
+ +#include "checkasm.h"
+ +#include "libavcodec/pixblockdsp.h"
+ +#include "libavutil/common.h"
+ +#include "libavutil/internal.h"
+ +#include "libavutil/intreadwrite.h"
+ +
+ +#define BUF_UNITS 8
+ +#define BUF_SIZE (BUF_UNITS * 128 + 8 * BUF_UNITS)
+ +
+ +#define randomize_buffers()                 \
+ +    do {                                    \
+ +        int i;                              \
+ +        for (i = 0; i < BUF_SIZE; i += 4) { \
+ +            uint32_t r = rnd();             \
+ +            AV_WN32A(src10 + i, r);         \
+ +            AV_WN32A(src11 + i, r);         \
+ +            r = rnd();                      \
+ +            AV_WN32A(src20 + i, r);         \
+ +            AV_WN32A(src21 + i, r);         \
+ +            r = rnd();                      \
+ +            AV_WN32A(dst0_ + i, r);         \
+ +            AV_WN32A(dst1_ + i, r);         \
+ +        }                                   \
+ +    } while (0)
+ +
+ +#define check_get_pixels(type)                                                             \
+ +    do {                                                                                   \
+ +        int i;                                                                             \
+ +        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *block, const uint8_t *pixels, ptrdiff_t line_size);    \
+ +                                                                                           \
+ +        for (i = 0; i < BUF_UNITS; i++) {                                              \
+ +            int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */      \
+ +            int dst_offset = i * 64; /* dst must be aligned */                             \
+ +            randomize_buffers();                                                           \
+ +            call_ref(dst0 + dst_offset, src10 + src_offset, 8);                            \
+ +            call_new(dst1 + dst_offset, src11 + src_offset, 8);                            \
+ +            if (memcmp(src10, src11, BUF_SIZE)|| memcmp(dst0, dst1, BUF_SIZE)) \
+ +                fail();                                                                    \
+ +            bench_new(dst1 + dst_offset, src11 + src_offset, 8);                           \
+ +        }                                                                                  \
+ +    } while (0)
+ +
+ +#define check_diff_pixels(type)                                                            \
+ +    do {                                                                                   \
+ +        int i;                                                                             \
++        declare_func_emms(AV_CPU_FLAG_MMX, void, int16_t *av_restrict block, const uint8_t *s1, const uint8_t *s2, ptrdiff_t stride); \
+ +                                                                                           \
+ +        for (i = 0; i < BUF_UNITS; i++) {                                              \
+ +            int src_offset = i * 64 * sizeof(type) + 8 * i; /* Test various alignments */      \
+ +            int dst_offset = i * 64; /* dst must be aligned */                             \
+ +            randomize_buffers();                                                           \
+ +            call_ref(dst0 + dst_offset, src10 + src_offset, src20 + src_offset, 8);        \
+ +            call_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);        \
+ +            if (memcmp(src10, src11, BUF_SIZE) || memcmp(src20, src21, BUF_SIZE) || memcmp(dst0, dst1, BUF_SIZE)) \
+ +                fail();                                                                    \
+ +            bench_new(dst1 + dst_offset, src11 + src_offset, src21 + src_offset, 8);       \
+ +        }                                                                                  \
+ +    } while (0)
+ +
+ +void checkasm_check_pixblockdsp(void)
+ +{
+ +    LOCAL_ALIGNED_16(uint8_t, src10, [BUF_SIZE]);
+ +    LOCAL_ALIGNED_16(uint8_t, src11, [BUF_SIZE]);
+ +    LOCAL_ALIGNED_16(uint8_t, src20, [BUF_SIZE]);
+ +    LOCAL_ALIGNED_16(uint8_t, src21, [BUF_SIZE]);
+ +    LOCAL_ALIGNED_16(uint8_t, dst0_, [BUF_SIZE]);
+ +    LOCAL_ALIGNED_16(uint8_t, dst1_, [BUF_SIZE]);
+ +    uint16_t *dst0 = (uint16_t *)dst0_;
+ +    uint16_t *dst1 = (uint16_t *)dst1_;
+ +    PixblockDSPContext h;
+ +    AVCodecContext avctx = {
+ +        .bits_per_raw_sample = 8,
+ +    };
+ +
+ +    ff_pixblockdsp_init(&h, &avctx);
+ +
+ +    if (check_func(h.get_pixels, "get_pixels"))
+ +        check_get_pixels(uint8_t);
+ +
+ +    report("get_pixels");
+ +
+ +    if (check_func(h.diff_pixels, "diff_pixels"))
+ +        check_diff_pixels(uint8_t);
+ +
+ +    report("diff_pixels");
+ +}
author	Clément Bœsch <u@pkh.me>
	Mon, 20 Mar 2017 12:47:29 +0000 (13:47 +0100)
committer	Clément Bœsch <u@pkh.me>
	Mon, 20 Mar 2017 14:58:32 +0000 (15:58 +0100)
		1	2
libavcodec/alpha/pixblockdsp_alpha.c	patch \|	diff1 \|	\|	blob \| history
libavcodec/arm/pixblockdsp_init_arm.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dv.h	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/dvenc.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/mips/pixblockdsp_mips.h	patch \|	diff1 \|	\|	blob \| history
libavcodec/mips/pixblockdsp_mmi.c	patch \|	diff1 \|	\|	blob \| history
libavcodec/mips/pixblockdsp_msa.c	patch \|	diff1 \|	\|	blob \| history
libavcodec/pixblockdsp.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/pixblockdsp.h	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/ppc/pixblockdsp.c	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/x86/pixblockdsp.asm	patch \|	diff1 \|	diff2 \|	blob \| history
libavcodec/x86/pixblockdsp_init.c	patch \|	diff1 \|	diff2 \|	blob \| history
tests/checkasm/pixblockdsp.c	patch \|	diff1 \|	\|	blob \| history