Faster 2x2 chroma DC dequant

author Henrik Gramner <hengar-6@student.ltu.se>

Mon, 8 Feb 2010 23:53:52 +0000 (15:53 -0800)

committer Fiona Glaser <fiona@x264.com>

Mon, 15 Feb 2010 08:28:01 +0000 (00:28 -0800)
author Henrik Gramner <hengar-6@student.ltu.se>
Mon, 8 Feb 2010 23:53:52 +0000 (15:53 -0800)
committer Fiona Glaser <fiona@x264.com>
Mon, 15 Feb 2010 08:28:01 +0000 (00:28 -0800)
diff --git a/doc/standards.txt b/doc/standards.txt

index db9a69178056ddff139f547b8241bb51db75ff06..7474d8fb06fceced47dd8eebcf4674cf0b4b3d7c 100644 (file)
--- a/doc/standards.txt
+++ b/doc/standards.txt
@@ -4,6 +4,7 @@ checkasm is written in gcc, with no attempt at compatibility with anything else.
  We make the following additional assumptions which are true of real systems but not guaranteed by C99:
  * Two's complement.
  * Signed right-shifts are sign-extended.
+* int is 32-bit or larger.
  
  x86-specific assumptions:
  * The stack is 16-byte aligned. We align it on entry to libx264 and on entry to any thread, but the compiler must preserve alignment after that.
diff --git a/encoder/macroblock.c b/encoder/macroblock.c

index 8000d6daf4510671eafdafcc2bb7d56632ff04b0..974aed39911354517c6f3c6f646f87f85d84fbb4 100644 (file)
--- a/encoder/macroblock.c
+++ b/encoder/macroblock.c
@@ -42,30 +42,24 @@ static inline void zigzag_scan_2x2_dc( int16_t level[4], int16_t dct[4] )
      int d1 = dct[2] + dct[3]; \
      int d2 = dct[0] - dct[1]; \
      int d3 = dct[2] - dct[3]; \
-    int dmf = dequant_mf[i_qp%6][0]; \
-    int qbits = i_qp/6 - 5; \
-    if( qbits > 0 ) \
-    { \
-        dmf <<= qbits; \
-        qbits = 0; \
-    }
+    int dmf = dequant_mf[i_qp%6][0] << i_qp/6;
  
  static inline void idct_dequant_2x2_dc( int16_t dct[4], int16_t dct4x4[4][16], int dequant_mf[6][16], int i_qp )
  {
      IDCT_DEQUANT_START
-    dct4x4[0][0] = (d0 + d1) * dmf >> -qbits;
-    dct4x4[1][0] = (d0 - d1) * dmf >> -qbits;
-    dct4x4[2][0] = (d2 + d3) * dmf >> -qbits;
-    dct4x4[3][0] = (d2 - d3) * dmf >> -qbits;
+    dct4x4[0][0] = (d0 + d1) * dmf >> 5;
+    dct4x4[1][0] = (d0 - d1) * dmf >> 5;
+    dct4x4[2][0] = (d2 + d3) * dmf >> 5;
+    dct4x4[3][0] = (d2 - d3) * dmf >> 5;
  }
  
  static inline void idct_dequant_2x2_dconly( int16_t out[4], int16_t dct[4], int dequant_mf[6][16], int i_qp )
  {
      IDCT_DEQUANT_START
-    out[0] = (d0 + d1) * dmf >> -qbits;
-    out[1] = (d0 - d1) * dmf >> -qbits;
-    out[2] = (d2 + d3) * dmf >> -qbits;
-    out[3] = (d2 - d3) * dmf >> -qbits;
+    out[0] = (d0 + d1) * dmf >> 5;
+    out[1] = (d0 - d1) * dmf >> 5;
+    out[2] = (d2 + d3) * dmf >> 5;
+    out[3] = (d2 - d3) * dmf >> 5;
  }
  
  static inline void dct2x2dc( int16_t d[4], int16_t dct4x4[4][16] )
author	Henrik Gramner <hengar-6@student.ltu.se>
	Mon, 8 Feb 2010 23:53:52 +0000 (15:53 -0800)
committer	Fiona Glaser <fiona@x264.com>
	Mon, 15 Feb 2010 08:28:01 +0000 (00:28 -0800)
doc/standards.txt		patch \| blob \| history
encoder/macroblock.c		patch \| blob \| history