X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fppc%2Fmpegvideo_altivec.c;h=a2ba5e12514007c04aa337aa37a2440913dd8541;hb=c367d0c65336b4238eb7188921996fd1387261d4;hp=4477d3ffa899bc4cbcd46e367679652ad2f469e8;hpb=115329f16062074e11ccf3b89ead6176606c9696;p=ffmpeg diff --git a/libavcodec/ppc/mpegvideo_altivec.c b/libavcodec/ppc/mpegvideo_altivec.c index 4477d3ffa89..a2ba5e12514 100644 --- a/libavcodec/ppc/mpegvideo_altivec.c +++ b/libavcodec/ppc/mpegvideo_altivec.c @@ -4,30 +4,32 @@ * dct_unquantize_h263_altivec: * Copyright (c) 2003 Romain Dolbeau * - * This library is free software; you can redistribute it and/or + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * - * This library is distributed in the hope that it will be useful, + * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #include #include -#include "../dsputil.h" -#include "../mpegvideo.h" +#include "dsputil.h" +#include "mpegvideo.h" #include "gcc_fixes.h" -#include "dsputil_altivec.h" - +#include "dsputil_ppc.h" +#include "util_altivec.h" // Swaps two variables (used for altivec registers) #define SWAP(a,b) \ do { \ @@ -50,39 +52,6 @@ do { \ d = vec_mergel(_trans_acl, _trans_bdl); \ } while (0) -#define TRANSPOSE8(a,b,c,d,e,f,g,h) \ -do { \ - __typeof__(a) _A1, _B1, _C1, _D1, _E1, _F1, _G1, _H1; \ - __typeof__(a) _A2, _B2, _C2, _D2, _E2, _F2, _G2, _H2; \ - \ - _A1 = vec_mergeh (a, e); \ - _B1 = vec_mergel (a, e); \ - _C1 = vec_mergeh (b, f); \ - _D1 = vec_mergel (b, f); \ - _E1 = vec_mergeh (c, g); \ - _F1 = vec_mergel (c, g); \ - _G1 = vec_mergeh (d, h); \ - _H1 = vec_mergel (d, h); \ - \ - _A2 = vec_mergeh (_A1, _E1); \ - _B2 = vec_mergel (_A1, _E1); \ - _C2 = vec_mergeh (_B1, _F1); \ - _D2 = vec_mergel (_B1, _F1); \ - _E2 = vec_mergeh (_C1, _G1); \ - _F2 = vec_mergel (_C1, _G1); \ - _G2 = vec_mergeh (_D1, _H1); \ - _H2 = vec_mergel (_D1, _H1); \ - \ - a = vec_mergeh (_A2, _E2); \ - b = vec_mergel (_A2, _E2); \ - c = vec_mergeh (_B2, _F2); \ - d = vec_mergel (_B2, _F2); \ - e = vec_mergeh (_C2, _G2); \ - f = vec_mergel (_C2, _G2); \ - g = vec_mergeh (_D2, _H2); \ - h = vec_mergel (_D2, _H2); \ -} while (0) - // Loads a four-byte value (int or float) from the target address // into every element in the target vector. Only works if the @@ -97,12 +66,8 @@ do { \ } -#ifdef CONFIG_DARWIN -#define FOUROF(a) (a) -#else -// slower, for dumb non-apple GCC -#define FOUROF(a) {a,a,a,a} -#endif +#define FOUROF(a) AVV(a,a,a,a) + int dct_quantize_altivec(MpegEncContext* s, DCTELEM* data, int n, int qscale, int* overflow) @@ -110,8 +75,8 @@ int dct_quantize_altivec(MpegEncContext* s, int lastNonZero; vector float row0, row1, row2, row3, row4, row5, row6, row7; vector float alt0, alt1, alt2, alt3, alt4, alt5, alt6, alt7; - const_vector float zero = (const_vector float)FOUROF(0.); - // used after quantise step + const vector float zero = (const vector float)FOUROF(0.); + // used after quantize step int oldBaseValue = 0; // Load the data into the row/alt vectors @@ -152,9 +117,9 @@ int dct_quantize_altivec(MpegEncContext* s, } // The following block could exist as a separate an altivec dct - // function. However, if we put it inline, the DCT data can remain - // in the vector local variables, as floats, which we'll use during the - // quantize step... + // function. However, if we put it inline, the DCT data can remain + // in the vector local variables, as floats, which we'll use during the + // quantize step... { const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f); const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f); @@ -206,11 +171,11 @@ int dct_quantize_altivec(MpegEncContext* s, z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero); // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865), - // CONST_BITS-PASS1_BITS); + // CONST_BITS-PASS1_BITS); row2 = vec_madd(tmp13, vec_0_765366865, z1); // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065), - // CONST_BITS-PASS1_BITS); + // CONST_BITS-PASS1_BITS); row6 = vec_madd(tmp12, vec_1_847759065, z1); z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7; @@ -289,7 +254,7 @@ int dct_quantize_altivec(MpegEncContext* s, } } - // perform the quantise step, using the floating point data + // perform the quantize step, using the floating point data // still in the row/alt registers { const int* biasAddr; @@ -315,7 +280,7 @@ int dct_quantize_altivec(MpegEncContext* s, } // Load the bias vector (We add 0.5 to the bias so that we're - // rounding when we convert to int, instead of flooring.) + // rounding when we convert to int, instead of flooring.) { vector signed int biasInt; const vector float negOneFloat = (vector float)FOUROF(-1.0f); @@ -505,7 +470,7 @@ int dct_quantize_altivec(MpegEncContext* s, data[0] = (oldBaseValue + 4) >> 3; } - // We handled the tranpose permutation above and we don't + // We handled the transpose permutation above and we don't // need to permute the "no" permutation case. if ((lastNonZero > 0) && (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && @@ -517,7 +482,6 @@ int dct_quantize_altivec(MpegEncContext* s, return lastNonZero; } -#undef FOUROF /* AltiVec version of dct_unquantize_h263 @@ -546,38 +510,25 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); }else qadd = 0; i = 1; - nCoeffs= 63; //does not allways use zigzag table + nCoeffs= 63; //does not always use zigzag table } else { i = 0; nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; } -#ifdef ALTIVEC_USE_REFERENCE_C_CODE - for(;i<=nCoeffs;i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } -#else /* ALTIVEC_USE_REFERENCE_C_CODE */ { - register const_vector signed short vczero = (const_vector signed short)vec_splat_s16(0); - short __attribute__ ((aligned(16))) qmul8[] = + register const vector signed short vczero = (const vector signed short)vec_splat_s16(0); + DECLARE_ALIGNED_16(short, qmul8[]) = { qmul, qmul, qmul, qmul, qmul, qmul, qmul, qmul }; - short __attribute__ ((aligned(16))) qadd8[] = + DECLARE_ALIGNED_16(short, qadd8[]) = { qadd, qadd, qadd, qadd, qadd, qadd, qadd, qadd }; - short __attribute__ ((aligned(16))) nqadd8[] = + DECLARE_ALIGNED_16(short, nqadd8[]) = { -qadd, -qadd, -qadd, -qadd, -qadd, -qadd, -qadd, -qadd @@ -643,7 +594,52 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1); block[0] = backup_0; } } -#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ - POWERPC_PERF_STOP_COUNT(altivec_dct_unquantize_h263_num, nCoeffs == 63); } + + +extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); +extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); + +void MPV_common_init_altivec(MpegEncContext *s) +{ + if ((mm_flags & MM_ALTIVEC) == 0) return; + + if (s->avctx->lowres==0) + { + if ((s->avctx->idct_algo == FF_IDCT_AUTO) || + (s->avctx->idct_algo == FF_IDCT_ALTIVEC)) + { + s->dsp.idct_put = idct_put_altivec; + s->dsp.idct_add = idct_add_altivec; + s->dsp.idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; + } + } + + // Test to make sure that the dct required alignments are met. + if ((((long)(s->q_intra_matrix) & 0x0f) != 0) || + (((long)(s->q_inter_matrix) & 0x0f) != 0)) + { + av_log(s->avctx, AV_LOG_INFO, "Internal Error: q-matrix blocks must be 16-byte aligned " + "to use AltiVec DCT. Reverting to non-AltiVec version.\n"); + return; + } + + if (((long)(s->intra_scantable.inverse) & 0x0f) != 0) + { + av_log(s->avctx, AV_LOG_INFO, "Internal Error: scan table blocks must be 16-byte aligned " + "to use AltiVec DCT. Reverting to non-AltiVec version.\n"); + return; + } + + + if ((s->avctx->dct_algo == FF_DCT_AUTO) || + (s->avctx->dct_algo == FF_DCT_ALTIVEC)) + { +#if 0 /* seems to cause trouble under some circumstances */ + s->dct_quantize = dct_quantize_altivec; +#endif + s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec; + s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec; + } +}