X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fjfdctfst.c;h=bbcf5984901c89798e213eba631f16c11ed6d677;hb=af964baf090616fbd858e8937d95bf1f1d8be277;hp=4e3b55bb51e6e33342e72f9a768e7de58a38106d;hpb=03c94ede9365c22855c4f63941a13d0a4574f7a4;p=ffmpeg diff --git a/libavcodec/jfdctfst.c b/libavcodec/jfdctfst.c index 4e3b55bb51e..bbcf5984901 100644 --- a/libavcodec/jfdctfst.c +++ b/libavcodec/jfdctfst.c @@ -1,9 +1,40 @@ /* - * jfdctfst.c - * - * Copyright (C) 1994-1996, Thomas G. Lane. * This file is part of the Independent JPEG Group's software. - * For conditions of distribution and use, see the accompanying README file. + * + * The authors make NO WARRANTY or representation, either express or implied, + * with respect to this software, its quality, accuracy, merchantability, or + * fitness for a particular purpose. This software is provided "AS IS", and + * you, its user, assume the entire risk as to its quality and accuracy. + * + * This software is copyright (C) 1994-1996, Thomas G. Lane. + * All Rights Reserved except as specified below. + * + * Permission is hereby granted to use, copy, modify, and distribute this + * software (or portions thereof) for any purpose, without fee, subject to + * these conditions: + * (1) If any part of the source code for this software is distributed, then + * this README file must be included, with this copyright and no-warranty + * notice unaltered; and any additions, deletions, or changes to the original + * files must be clearly indicated in accompanying documentation. + * (2) If only executable code is distributed, then the accompanying + * documentation must state that "this software is based in part on the work + * of the Independent JPEG Group". + * (3) Permission for use of this software is granted only if the user accepts + * full responsibility for any undesirable consequences; the authors accept + * NO LIABILITY for damages of any kind. + * + * These conditions apply to any software derived from or based on the IJG + * code, not just to the unmodified library. If you use our work, you ought + * to acknowledge us. + * + * Permission is NOT granted for the use of any IJG author's name or company + * name in advertising or publicity relating to this software or products + * derived from it. This software may be referred to only as "the Independent + * JPEG Group's software". + * + * We specifically permit and encourage the use of this software as the basis + * of commercial products, provided that all warranty or liability claims are + * assumed by the product vendor. * * This file contains a fast, not so accurate integer implementation of the * forward DCT (Discrete Cosine Transform). @@ -30,15 +61,19 @@ * quality-setting files than with low-quality ones. */ +/** + * @file + * Independent JPEG Group's fast AAN dct. + */ + #include #include -#include "common.h" -#include "dsputil.h" +#include "libavutil/common.h" +#include "dct.h" #define DCTSIZE 8 #define GLOBAL(x) x #define RIGHT_SHIFT(x, n) ((x) >> (n)) -#define SHIFT_TEMPS /* * This module is specialized to the case DCTSIZE = 8. @@ -78,10 +113,10 @@ */ #if CONST_BITS == 8 -#define FIX_0_382683433 ((INT32) 98) /* FIX(0.382683433) */ -#define FIX_0_541196100 ((INT32) 139) /* FIX(0.541196100) */ -#define FIX_0_707106781 ((INT32) 181) /* FIX(0.707106781) */ -#define FIX_1_306562965 ((INT32) 334) /* FIX(1.306562965) */ +#define FIX_0_382683433 ((int32_t) 98) /* FIX(0.382683433) */ +#define FIX_0_541196100 ((int32_t) 139) /* FIX(0.541196100) */ +#define FIX_0_707106781 ((int32_t) 181) /* FIX(0.707106781) */ +#define FIX_1_306562965 ((int32_t) 334) /* FIX(1.306562965) */ #else #define FIX_0_382683433 FIX(0.382683433) #define FIX_0_541196100 FIX(0.541196100) @@ -101,26 +136,18 @@ #endif -/* Multiply a DCTELEM variable by an INT32 constant, and immediately - * descale to yield a DCTELEM result. +/* Multiply a int16_t variable by an int32_t constant, and immediately + * descale to yield a int16_t result. */ -#define MULTIPLY(var,const) ((DCTELEM) DESCALE((var) * (const), CONST_BITS)) +#define MULTIPLY(var,const) ((int16_t) DESCALE((var) * (const), CONST_BITS)) - -/* - * Perform the forward DCT on one block of samples. - */ - -GLOBAL(void) -fdct_ifast (DCTELEM * data) -{ - DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; - DCTELEM tmp10, tmp11, tmp12, tmp13; - DCTELEM z1, z2, z3, z4, z5, z11, z13; - DCTELEM *dataptr; +static av_always_inline void row_fdct(int16_t * data){ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1, z2, z3, z4, z5, z11, z13; + int16_t *dataptr; int ctr; - SHIFT_TEMPS /* Pass 1: process rows. */ @@ -134,43 +161,59 @@ fdct_ifast (DCTELEM * data) tmp5 = dataptr[2] - dataptr[5]; tmp3 = dataptr[3] + dataptr[4]; tmp4 = dataptr[3] - dataptr[4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[0] = tmp10 + tmp11; /* phase 3 */ dataptr[4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ - dataptr[2] = tmp13 + z1; /* phase 5 */ + dataptr[2] = tmp13 + z1; /* phase 5 */ dataptr[6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; /* The rotator is modified from fig 4-8 to avoid extra negations. */ z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */ - z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ - z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ - z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ + z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */ + z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ + z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; - dataptr[5] = z13 + z2; /* phase 6 */ + dataptr[5] = z13 + z2; /* phase 6 */ dataptr[3] = z13 - z2; dataptr[1] = z11 + z4; dataptr[7] = z11 - z4; - dataptr += DCTSIZE; /* advance pointer to next row */ + dataptr += DCTSIZE; /* advance pointer to next row */ } +} + +/* + * Perform the forward DCT on one block of samples. + */ + +GLOBAL(void) +ff_fdct_ifast (int16_t * data) +{ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1, z2, z3, z4, z5, z11, z13; + int16_t *dataptr; + int ctr; + + row_fdct(data); /* Pass 2: process columns. */ @@ -184,24 +227,24 @@ fdct_ifast (DCTELEM * data) tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5]; tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4]; tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4]; - + /* Even part */ - - tmp10 = tmp0 + tmp3; /* phase 2 */ + + tmp10 = tmp0 + tmp3; /* phase 2 */ tmp13 = tmp0 - tmp3; tmp11 = tmp1 + tmp2; tmp12 = tmp1 - tmp2; - + dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */ dataptr[DCTSIZE*4] = tmp10 - tmp11; - + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */ dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */ dataptr[DCTSIZE*6] = tmp13 - z1; - + /* Odd part */ - tmp10 = tmp4 + tmp5; /* phase 2 */ + tmp10 = tmp4 + tmp5; /* phase 2 */ tmp11 = tmp5 + tmp6; tmp12 = tmp6 + tmp7; @@ -211,7 +254,7 @@ fdct_ifast (DCTELEM * data) z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */ z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */ - z11 = tmp7 + z3; /* phase 5 */ + z11 = tmp7 + z3; /* phase 5 */ z13 = tmp7 - z3; dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */ @@ -219,7 +262,65 @@ fdct_ifast (DCTELEM * data) dataptr[DCTSIZE*1] = z11 + z4; dataptr[DCTSIZE*7] = z11 - z4; - dataptr++; /* advance pointer to next column */ + dataptr++; /* advance pointer to next column */ + } +} + +/* + * Perform the forward 2-4-8 DCT on one block of samples. + */ + +GLOBAL(void) +ff_fdct_ifast248 (int16_t * data) +{ + int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; + int tmp10, tmp11, tmp12, tmp13; + int z1; + int16_t *dataptr; + int ctr; + + row_fdct(data); + + /* Pass 2: process columns. */ + + dataptr = data; + for (ctr = DCTSIZE-1; ctr >= 0; ctr--) { + tmp0 = dataptr[DCTSIZE*0] + dataptr[DCTSIZE*1]; + tmp1 = dataptr[DCTSIZE*2] + dataptr[DCTSIZE*3]; + tmp2 = dataptr[DCTSIZE*4] + dataptr[DCTSIZE*5]; + tmp3 = dataptr[DCTSIZE*6] + dataptr[DCTSIZE*7]; + tmp4 = dataptr[DCTSIZE*0] - dataptr[DCTSIZE*1]; + tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3]; + tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5]; + tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7]; + + /* Even part */ + + tmp10 = tmp0 + tmp3; + tmp11 = tmp1 + tmp2; + tmp12 = tmp1 - tmp2; + tmp13 = tmp0 - tmp3; + + dataptr[DCTSIZE*0] = tmp10 + tmp11; + dataptr[DCTSIZE*4] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); + dataptr[DCTSIZE*2] = tmp13 + z1; + dataptr[DCTSIZE*6] = tmp13 - z1; + + tmp10 = tmp4 + tmp7; + tmp11 = tmp5 + tmp6; + tmp12 = tmp5 - tmp6; + tmp13 = tmp4 - tmp7; + + dataptr[DCTSIZE*1] = tmp10 + tmp11; + dataptr[DCTSIZE*5] = tmp10 - tmp11; + + z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); + dataptr[DCTSIZE*3] = tmp13 + z1; + dataptr[DCTSIZE*7] = tmp13 - z1; + + dataptr++; /* advance pointer to next column */ } }