SRCS = common/mc.c common/predict.c common/pixel.c common/macroblock.c \
common/frame.c common/dct.c common/cpu.c common/cabac.c \
- common/common.c common/mdate.c common/csp.c common/set.c\
+ common/common.c common/mdate.c common/csp.c common/set.c \
+ common/quant.c \
encoder/analyse.c encoder/me.c encoder/ratecontrol.c \
encoder/set.c encoder/macroblock.c encoder/cabac.c \
encoder/cavlc.c encoder/encoder.c encoder/eval.c
ASMSRC = common/i386/dct-a.asm common/i386/cpu-a.asm \
common/i386/pixel-a.asm common/i386/mc-a.asm \
common/i386/mc-a2.asm common/i386/predict-a.asm \
- common/i386/pixel-sse2.asm
+ common/i386/pixel-sse2.asm common/i386/quant-a.asm
OBJASM = $(ASMSRC:%.asm=%.o)
endif
ASMSRC = common/amd64/dct-a.asm common/amd64/cpu-a.asm \
common/amd64/pixel-a.asm common/amd64/mc-a.asm \
common/amd64/mc-a2.asm common/amd64/predict-a.asm \
- common/amd64/pixel-sse2.asm
+ common/amd64/pixel-sse2.asm common/amd64/quant-a.asm
OBJASM = $(ASMSRC:%.asm=%.o)
endif
--- /dev/null
+;*****************************************************************************
+;* quant-a.asm: h264 encoder library
+;*****************************************************************************
+;* Copyright (C) 2005 x264 project
+;*
+;* Authors: Alex Izvorski <aizvorksi@gmail.com>
+;* Christian Heine <sennindemokrit@gmx.net>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+;*****************************************************************************
+
+BITS 64
+
+%macro cglobal 1
+ %ifdef PREFIX
+ global _%1
+ %define %1 _%1
+ %else
+ global %1
+ %endif
+%endmacro
+
+ALIGN 16
+
+SECTION .text
+
+cglobal x264_quant_8x8_core16_mmx
+cglobal x264_quant_4x4_core16_mmx
+cglobal x264_quant_8x8_core32_mmx
+cglobal x264_quant_4x4_core32_mmx
+cglobal x264_quant_4x4_dc_core32_mmx
+cglobal x264_quant_2x2_dc_core32_mmx
+
+%macro QUANT_AC_START 0
+; mov rdi, rdi ; dct
+; mov rsi, rsi ; quant_mf
+ movd mm6, edx ; i_qbits
+ movd mm7, ecx ; f
+ punpckldq mm7, mm7
+%endmacro
+
+%macro QUANT_DC_START 0
+; mov rdi, rdi ; dct
+ movd mm5, rsi ; i_quant_mf
+ movd mm6, edx ; i_qbits
+ movd mm7, ecx ; f
+ punpckldq mm5, mm5
+ punpckldq mm7, mm7
+%endmacro
+
+%macro QUANT16_1x4 5
+;;; %1 dct[y][x]
+;;; %2,%3 quant_mf[i_mf][y][x], entries must fit in int16
+;;; %4 i_qbits
+;;; %5 f as doublewords
+;;; trashes mm0-mm5
+ movq mm0, %1
+ movq mm1, %2
+ movq mm2, %3
+ packssdw mm1, mm2
+
+ movq mm4, mm0
+ pxor mm5, mm5
+ pcmpgtw mm4, mm5
+
+ movq mm2, mm0
+ pmullw mm0, mm1
+ pmulhw mm2, mm1
+
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+
+ movq mm2, %5
+ movq mm3, %5
+ psubd mm2, mm0
+ psubd mm3, mm1
+ paddd mm0, %5
+ paddd mm1, %5
+
+ psrad mm0, %4
+ psrad mm1, %4
+ psrad mm2, %4
+ psrad mm3, %4
+
+ packssdw mm0, mm1
+ packssdw mm2, mm3
+ pxor mm5, mm5
+ psubw mm5, mm2
+
+ pand mm0, mm4
+ pandn mm4, mm5
+
+ por mm0, mm4
+ movq %1, mm0
+%endmacro
+
+%macro QUANT32_1x4 5
+;;; %1 dct[y][x]
+;;; %2,%3 quant_mf[i_mf][y][x]
+;;; %4 i_qbits
+;;; %5 f as doublewords
+;;; trashes mm0-mm4
+ movq mm0, %1
+ pxor mm4, mm4
+ pcmpgtw mm4, mm0 ; mm4 = sign(mm0)
+ pxor mm0, mm4
+ psubw mm0, mm4 ; mm0 = abs(mm0)
+ movq mm1, mm0
+ punpcklwd mm0, mm0 ; duplicate the words for the upcomming
+ punpckhwd mm1, mm1 ; 32 bit multiplication
+
+ movq mm2, mm0 ; like in school ...
+ movq mm3, mm1
+ pmulhuw mm0, %2 ; ... multiply the parts ...
+ pmulhuw mm1, %3
+ pmullw mm2, %2
+ pmullw mm3, %3
+ pslld mm0, 16 ; ... shift ...
+ pslld mm1, 16
+ paddd mm0, mm2 ; ... and add them
+ paddd mm1, mm3
+
+ paddd mm0, %5 ; round with f
+ paddd mm1, %5
+ psrad mm0, %4
+ psrad mm1, %4
+ packssdw mm0, mm1 ; pack & store
+ pxor mm0, mm4
+ psubw mm0, mm4 ; restore sign
+ movq %1, mm0
+%endmacro
+
+
+ALIGN 16
+;;; void x264_quant_8x8_core16_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
+x264_quant_8x8_core16_mmx:
+ QUANT_AC_START
+
+%rep 16
+ QUANT16_1x4 [rdi], [rsi], [rsi+8], mm6, mm7
+ add rdi, 8
+ add rsi, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_core16_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
+x264_quant_4x4_core16_mmx:
+ QUANT_AC_START
+
+%rep 4
+ QUANT16_1x4 [rdi], [rsi], [rsi+8], mm6, mm7
+ add rdi, 8
+ add rsi, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_8x8_core32_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
+x264_quant_8x8_core32_mmx:
+ QUANT_AC_START
+
+%rep 16
+ QUANT32_1x4 [rdi], [rsi], [rsi+8], mm6, mm7
+ add rdi, 8
+ add rsi, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_core32_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
+x264_quant_4x4_core32_mmx:
+ QUANT_AC_START
+
+%rep 4
+ QUANT32_1x4 [rdi], [rsi], [rsi+8], mm6, mm7
+ add rdi, 8
+ add rsi, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_dc_core32_mmx( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f )
+x264_quant_4x4_dc_core32_mmx:
+ QUANT_DC_START
+
+%rep 4
+ QUANT32_1x4 [rdi], mm5, mm5, mm6, mm7
+ add rdi, 8
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_2x2_dc_core32_mmx( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f )
+x264_quant_2x2_dc_core32_mmx:
+ QUANT_DC_START
+
+ QUANT32_1x4 [rdi], mm5, mm5, mm6, mm7
+
+ ret
+
#include "dct.h"
#include "cabac.h"
#include "csp.h"
+#include "quant.h"
/****************************************************************************
* Macros
x264_mc_functions_t mc;
x264_dct_function_t dctf;
x264_csp_function_t csp;
+ x264_quant_function_t quantf;
/* vlc table for decoding purpose only */
x264_vlc_table_t *x264_coeff_token_lookup[5];
--- /dev/null
+;*****************************************************************************
+;* quant-a.asm: h264 encoder library
+;*****************************************************************************
+;* Copyright (C) 2005 x264 project
+;*
+;* Authors: Alex Izvorski <aizvorksi@gmail.com>
+;* Christian Heine <sennindemokrit@gmx.net>
+;*
+;* This program is free software; you can redistribute it and/or modify
+;* it under the terms of the GNU General Public License as published by
+;* the Free Software Foundation; either version 2 of the License, or
+;* (at your option) any later version.
+;*
+;* This program is distributed in the hope that it will be useful,
+;* but WITHOUT ANY WARRANTY; without even the implied warranty of
+;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;* GNU General Public License for more details.
+;*
+;* You should have received a copy of the GNU General Public License
+;* along with this program; if not, write to the Free Software
+;* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+;*****************************************************************************
+
+BITS 32
+
+%macro cglobal 1
+ %ifdef PREFIX
+ global _%1
+ %define %1 _%1
+ %else
+ global %1
+ %endif
+%endmacro
+
+ALIGN 16
+
+SECTION .text
+
+cglobal x264_quant_8x8_core16_mmx
+cglobal x264_quant_4x4_core16_mmx
+cglobal x264_quant_8x8_core32_mmx
+cglobal x264_quant_4x4_core32_mmx
+cglobal x264_quant_4x4_dc_core32_mmx
+cglobal x264_quant_2x2_dc_core32_mmx
+
+%macro QUANT_AC_START 0
+ mov eax, [esp+ 4] ; dct
+ mov ecx, [esp+ 8] ; quant_mf
+ movd mm6, [esp+12] ; i_qbits
+ movd mm7, [esp+16] ; f
+ punpckldq mm7, mm7
+%endmacro
+
+%macro QUANT_DC_START 0
+ mov eax, [esp+ 4] ; dct
+ movd mm5, [esp+ 8] ; i_quant_mf
+ movd mm6, [esp+12] ; i_qbits
+ movd mm7, [esp+16] ; f
+ punpckldq mm5, mm5
+ punpckldq mm7, mm7
+%endmacro
+
+%macro QUANT16_1x4 5
+;;; %1 dct[y][x]
+;;; %2,%3 quant_mf[i_mf][y][x], entries must fit in int16
+;;; %4 i_qbits
+;;; %5 f as doublewords
+;;; trashes mm0-mm5
+ movq mm0, %1
+ movq mm1, %2
+ movq mm2, %3
+ packssdw mm1, mm2
+
+ movq mm4, mm0
+ pxor mm5, mm5
+ pcmpgtw mm4, mm5
+
+ movq mm2, mm0
+ pmullw mm0, mm1
+ pmulhw mm2, mm1
+
+ movq mm1, mm0
+ punpcklwd mm0, mm2
+ punpckhwd mm1, mm2
+
+ movq mm2, %5
+ movq mm3, %5
+ psubd mm2, mm0
+ psubd mm3, mm1
+ paddd mm0, %5
+ paddd mm1, %5
+
+ psrad mm0, %4
+ psrad mm1, %4
+ psrad mm2, %4
+ psrad mm3, %4
+
+ packssdw mm0, mm1
+ packssdw mm2, mm3
+ pxor mm5, mm5
+ psubw mm5, mm2
+
+ pand mm0, mm4
+ pandn mm4, mm5
+
+ por mm0, mm4
+ movq %1, mm0
+%endmacro
+
+%macro QUANT32_1x4 5
+;;; %1 dct[y][x]
+;;; %2,%3 quant_mf[i_mf][y][x]
+;;; %4 i_qbits
+;;; %5 f as doublewords
+;;; trashes mm0-mm4
+ movq mm0, %1
+ pxor mm4, mm4
+ pcmpgtw mm4, mm0 ; mm4 = sign(mm0)
+ pxor mm0, mm4
+ psubw mm0, mm4 ; mm0 = abs(mm0)
+ movq mm1, mm0
+ punpcklwd mm0, mm0 ; duplicate the words for the upcomming
+ punpckhwd mm1, mm1 ; 32 bit multiplication
+
+ movq mm2, mm0 ; like in school ...
+ movq mm3, mm1
+ pmulhuw mm0, %2 ; ... multiply the parts ...
+ pmulhuw mm1, %3
+ pmullw mm2, %2
+ pmullw mm3, %3
+ pslld mm0, 16 ; ... shift ...
+ pslld mm1, 16
+ paddd mm0, mm2 ; ... and add them
+ paddd mm1, mm3
+
+ paddd mm0, %5 ; round with f
+ paddd mm1, %5
+ psrad mm0, %4
+ psrad mm1, %4
+ packssdw mm0, mm1 ; pack & store
+ pxor mm0, mm4
+ psubw mm0, mm4 ; restore sign
+ movq %1, mm0
+%endmacro
+
+
+ALIGN 16
+;;; void x264_quant_8x8_core16_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
+x264_quant_8x8_core16_mmx:
+ QUANT_AC_START
+
+%rep 16
+ QUANT16_1x4 [eax], [ecx], [ecx+8], mm6, mm7
+ add eax, 8
+ add ecx, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_core16_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
+x264_quant_4x4_core16_mmx:
+ QUANT_AC_START
+
+%rep 4
+ QUANT16_1x4 [eax], [ecx], [ecx+8], mm6, mm7
+ add eax, 8
+ add ecx, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_8x8_core32_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
+x264_quant_8x8_core32_mmx:
+ QUANT_AC_START
+
+%rep 16
+ QUANT32_1x4 [eax], [ecx], [ecx+8], mm6, mm7
+ add eax, 8
+ add ecx, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_core32_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
+x264_quant_4x4_core32_mmx:
+ QUANT_AC_START
+
+%rep 4
+ QUANT32_1x4 [eax], [ecx], [ecx+8], mm6, mm7
+ add eax, 8
+ add ecx, 16
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_4x4_dc_core32_mmx( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f )
+x264_quant_4x4_dc_core32_mmx:
+ QUANT_DC_START
+
+%rep 4
+ QUANT32_1x4 [eax], mm5, mm5, mm6, mm7
+ add eax, 8
+%endrep
+
+ ret
+
+ALIGN 16
+;;; void x264_quant_2x2_dc_core32_mmx( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f )
+x264_quant_2x2_dc_core32_mmx:
+ QUANT_DC_START
+
+ QUANT32_1x4 [eax], mm5, mm5, mm6, mm7
+
+ ret
+
--- /dev/null
+/*****************************************************************************
+ * quant.c: h264 encoder library
+ *****************************************************************************
+ * Copyright (C) 2005 x264 project
+ *
+ * Authors: Christian Heine <sennindemokrit@gmx.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#include "common.h"
+
+void x264_quant_8x8_core16_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f );
+void x264_quant_4x4_core16_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f );
+void x264_quant_8x8_core32_mmx( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f );
+void x264_quant_4x4_core32_mmx( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f );
+void x264_quant_4x4_dc_core32_mmx( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f );
+void x264_quant_2x2_dc_core32_mmx( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f );
+
+
+#define QUANT_ONE( coef, mf ) \
+{ \
+ if( (coef) > 0 ) \
+ (coef) = ( f + (coef) * (mf) ) >> i_qbits; \
+ else \
+ (coef) = - ( ( f - (coef) * (mf) ) >> i_qbits ); \
+}
+
+static void quant_8x8_core( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f )
+{
+ int i;
+ for( i = 0; i < 64; i++ )
+ QUANT_ONE( dct[0][i], quant_mf[0][i] );
+}
+
+static void quant_4x4_core( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f )
+{
+ int i;
+ for( i = 0; i < 16; i++ )
+ QUANT_ONE( dct[0][i], quant_mf[0][i] );
+}
+
+static void quant_4x4_dc_core( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f )
+{
+ int i;
+ for( i = 0; i < 16; i++ )
+ QUANT_ONE( dct[0][i], i_quant_mf );
+}
+
+static void quant_2x2_dc_core( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f )
+{
+ QUANT_ONE( dct[0][0], i_quant_mf );
+ QUANT_ONE( dct[0][1], i_quant_mf );
+ QUANT_ONE( dct[0][2], i_quant_mf );
+ QUANT_ONE( dct[0][3], i_quant_mf );
+}
+
+
+void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf )
+{
+ const char *name[4] = { "C", "C", "C", "C" };
+
+ pf->quant_8x8_core = quant_8x8_core;
+ pf->quant_4x4_core = quant_4x4_core;
+ pf->quant_4x4_dc_core = quant_4x4_dc_core;
+ pf->quant_2x2_dc_core = quant_2x2_dc_core;
+
+#ifdef HAVE_MMXEXT
+ if( cpu&X264_CPU_MMX )
+ {
+ int i;
+
+ pf->quant_8x8_core = x264_quant_8x8_core16_mmx;
+ pf->quant_4x4_core = x264_quant_4x4_core16_mmx;
+ pf->quant_4x4_dc_core = x264_quant_4x4_dc_core32_mmx;
+ pf->quant_2x2_dc_core = x264_quant_2x2_dc_core32_mmx;
+
+ name[0] = name[1] = "16MMX";
+ name[2] = name[3] = "32MMX";
+
+ for( i = 0; i < 2*6*8*8; i++ )
+ if( (***h->quant8_mf)[i] >= 0x8000 )
+ {
+ pf->quant_8x8_core = x264_quant_8x8_core32_mmx;
+ name[0] = "32MMX";
+ }
+
+ for( i = 0; i < 4*6*4*4; i++ )
+ if( (***h->quant4_mf)[i] >= 0x8000 )
+ {
+ pf->quant_4x4_core = x264_quant_4x4_core32_mmx;
+ name[1] = "32MMX";
+ }
+ }
+#endif
+
+ x264_log( h, X264_LOG_DEBUG, "using quant functions 8x8=%s 4x4=%s dc4x4=%s dc2x2=%s\n",
+ name[0], name[1], name[2], name[3] );
+}
--- /dev/null
+/*****************************************************************************
+ * quant.h: h264 encoder library
+ *****************************************************************************
+ * Copyright (C) 2005 x264 project
+ *
+ * Authors: Christian Heine <sennindemokrit@gmx.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA.
+ *****************************************************************************/
+
+#ifndef _QUANT_H
+#define _QUANT_H 1
+
+typedef struct
+{
+ void (*quant_8x8_core)( int16_t dct[8][8], int quant_mf[8][8], int i_qbits, int f );
+ void (*quant_4x4_core)( int16_t dct[4][4], int quant_mf[4][4], int i_qbits, int f );
+ void (*quant_4x4_dc_core)( int16_t dct[4][4], int i_quant_mf, int i_qbits, int f );
+ void (*quant_2x2_dc_core)( int16_t dct[2][2], int i_quant_mf, int i_qbits, int f );
+} x264_quant_function_t;
+
+void x264_quant_init( x264_t *h, int cpu, x264_quant_function_t *pf );
+
+#endif
x264_dct_init( h->param.cpu, &h->dctf );
x264_mc_init( h->param.cpu, &h->mc );
x264_csp_init( h->param.cpu, h->param.i_csp, &h->csp );
+ x264_quant_init( h, h->param.cpu, &h->quantf );
memcpy( h->pixf.mbcmp,
( h->mb.b_lossless || h->param.analyse.i_subpel_refine <= 1 ) ? h->pixf.sad : h->pixf.satd,
}
#undef ZIG
-static void quant_8x8( int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
+static void quant_8x8( x264_t *h, int16_t dct[8][8], int quant_mf[6][8][8], int i_qscale, int b_intra )
{
const int i_qbits = 16 + i_qscale / 6;
const int i_mf = i_qscale % 6;
const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
-
- int x,y;
- for( y = 0; y < 8; y++ )
- {
- for( x = 0; x < 8; x++ )
- {
- if( dct[y][x] > 0 )
- dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
- else
- dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
- }
- }
+ h->quantf.quant_8x8_core( dct, quant_mf[i_mf], i_qbits, f );
}
-static void quant_4x4( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
+static void quant_4x4( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale, int b_intra )
{
const int i_qbits = 15 + i_qscale / 6;
const int i_mf = i_qscale % 6;
const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
-
- int x,y;
- for( y = 0; y < 4; y++ )
- {
- for( x = 0; x < 4; x++ )
- {
- if( dct[y][x] > 0 )
- dct[y][x] = ( f + dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits;
- else
- dct[y][x] = - ( ( f - dct[y][x] * quant_mf[i_mf][y][x] ) >> i_qbits );
- }
- }
+ h->quantf.quant_4x4_core( dct, quant_mf[i_mf], i_qbits, f );
}
-static void quant_4x4_dc( int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
+static void quant_4x4_dc( x264_t *h, int16_t dct[4][4], int quant_mf[6][4][4], int i_qscale )
{
- const int i_qbits = 15 + i_qscale / 6;
- const int f2 = ( 2 << i_qbits ) / 3;
- const int i_qmf = quant_mf[i_qscale%6][0][0];
- int x,y;
-
- for( y = 0; y < 4; y++ )
- {
- for( x = 0; x < 4; x++ )
- {
- if( dct[y][x] > 0 )
- dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
- else
- dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
- }
- }
+ const int i_qbits = 16 + i_qscale / 6;
+ const int i_mf = i_qscale % 6;
+ const int f = ( 1 << i_qbits ) / 3;
+ h->quantf.quant_4x4_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
}
-static void quant_2x2_dc( int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
+static void quant_2x2_dc( x264_t *h, int16_t dct[2][2], int quant_mf[6][4][4], int i_qscale, int b_intra )
{
- int const i_qbits = 15 + i_qscale / 6;
- const int f2 = ( 2 << i_qbits ) / ( b_intra ? 3 : 6 );
- const int i_qmf = quant_mf[i_qscale%6][0][0];
-
- int x,y;
- for( y = 0; y < 2; y++ )
- {
- for( x = 0; x < 2; x++ )
- {
- if( dct[y][x] > 0 )
- dct[y][x] =( f2 + dct[y][x] * i_qmf) >> ( 1 + i_qbits );
- else
- dct[y][x] = - ( ( f2 - dct[y][x] * i_qmf ) >> (1 + i_qbits ) );
- }
- }
+ const int i_qbits = 16 + i_qscale / 6;
+ const int i_mf = i_qscale % 6;
+ const int f = ( 1 << i_qbits ) / ( b_intra ? 3 : 6 );
+ h->quantf.quant_2x2_dc_core( dct, quant_mf[i_mf][0][0], i_qbits, f );
}
+
#if 0
/* From a JVT doc */
static const int f_deadzone_intra[4][4][2] = /* [num][den] */
}
h->dctf.sub4x4_dct( dct4x4, p_src, i_stride, p_dst, i_stride );
- quant_4x4( dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
+ quant_4x4( h, dct4x4, h->quant4_mf[CQM_4IY], i_qscale, 1 );
scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4 );
x264_mb_dequant_4x4( dct4x4, h->dequant4_mf[CQM_4IY], i_qscale );
int16_t dct8x8[8][8];
h->dctf.sub8x8_dct8( dct8x8, p_src, i_stride, p_dst, i_stride );
- quant_8x8( dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
+ quant_8x8( h, dct8x8, h->quant8_mf[CQM_8IY], i_qscale, 1 );
scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8 );
x264_mb_dequant_8x8( dct8x8, h->dequant8_mf[CQM_8IY], i_qscale );
h->dctf.add8x8_idct8( p_dst, i_stride, dct8x8 );
dct4x4[0][block_idx_y[i]][block_idx_x[i]] = dct4x4[1+i][0][0];
/* quant/scan/dequant */
- quant_4x4( dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
+ quant_4x4( h, dct4x4[1+i], h->quant4_mf[CQM_4IY], i_qscale, 1 );
scan_zigzag_4x4( h->dct.block[i].residual_ac, dct4x4[1+i] );
x264_mb_dequant_4x4( dct4x4[1+i], h->dequant4_mf[CQM_4IY], i_qscale );
}
h->dctf.dct4x4dc( dct4x4[0] );
- quant_4x4_dc( dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
+ quant_4x4_dc( h, dct4x4[0], h->quant4_mf[CQM_4IY], i_qscale );
scan_zigzag_4x4full( h->dct.luma16x16_dc, dct4x4[0] );
/* output samples to fdec */
/* copy dc coeff */
dct2x2[block_idx_y[i]][block_idx_x[i]] = dct4x4[i][0][0];
- quant_4x4( dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
+ quant_4x4( h, dct4x4[i], h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
scan_zigzag_4x4( h->dct.block[16+i+ch*4].residual_ac, dct4x4[i] );
x264_mb_dequant_4x4( dct4x4[i], h->dequant4_mf[CQM_4IC + b_inter], i_qscale );
}
h->dctf.dct2x2dc( dct2x2 );
- quant_2x2_dc( dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, b_inter ? 0 : 1 );
+ quant_2x2_dc( h, dct2x2, h->quant4_mf[CQM_4IC + b_inter], i_qscale, !b_inter );
scan_zigzag_2x2_dc( h->dct.chroma_dc[ch], dct2x2 );
/* output samples to fdec */
{
int i_decimate_8x8;
- quant_8x8( dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
+ quant_8x8( h, dct8x8[idx], h->quant8_mf[CQM_8PY], i_qp, 0 );
scan_zigzag_8x8full( h->dct.luma8x8[idx], dct8x8[idx] );
x264_mb_dequant_8x8( dct8x8[idx], h->dequant8_mf[CQM_8PY], i_qp );
{
idx = i8x8 * 4 + i4x4;
- quant_4x4( dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
+ quant_4x4( h, dct4x4[idx], h->quant4_mf[CQM_4PY], i_qp, 0 );
scan_zigzag_4x4full( h->dct.block[idx].luma4x4, dct4x4[idx] );
x264_mb_dequant_4x4( dct4x4[idx], h->dequant4_mf[CQM_4PY], i_qp );
{
const int idx = i8x8 * 4 + i4x4;
- quant_4x4( dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
+ quant_4x4( h, dct4x4[idx], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
scan_zigzag_4x4full( dctscan, dct4x4[idx] );
i_decimate_mb += x264_mb_decimate_score( dctscan, 16 );
dct2x2[1][0] = dct4x4[2][0][0];
dct2x2[1][1] = dct4x4[3][0][0];
h->dctf.dct2x2dc( dct2x2 );
- quant_2x2_dc( dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
+ quant_2x2_dc( h, dct2x2, (int(*)[4][4])def_quant4_mf, i_qp, 0 );
if( dct2x2[0][0] || dct2x2[0][1] || dct2x2[1][0] || dct2x2[1][1] )
{
/* can't be */
/* calculate dct coeffs */
for( i4x4 = 0, i_decimate_mb = 0; i4x4 < 4; i4x4++ )
{
- quant_4x4( dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
+ quant_4x4( h, dct4x4[i4x4], (int(*)[4][4])def_quant4_mf, i_qp, 0 );
scan_zigzag_4x4( dctscan, dct4x4[i4x4] );
i_decimate_mb += x264_mb_decimate_score( dctscan, 15 );