From: Christophe Massiot <massiot@videolan.org>
Date: Wed, 21 Apr 2004 15:54:09 +0000 (+0000)
Subject: * modules/stream_out/transrate: Rewrote the transrater to use a SAD error
X-Git-Tag: 0.7.2~335
X-Git-Url: https://git.sesse.net/?a=commitdiff_plain;h=9c11fac39a69409759548b7b37ffa213956e39b1;p=vlc

* modules/stream_out/transrate: Rewrote the transrater to use a SAD error
  estimation ; if the error is too high, lower the quantizer scale.
---

diff --git a/modules/stream_out/transrate/Modules.am b/modules/stream_out/transrate/Modules.am
index 3c7ea7c027..725d373ac4 100644
--- a/modules/stream_out/transrate/Modules.am
+++ b/modules/stream_out/transrate/Modules.am
@@ -1 +1 @@
-SOURCES_stream_out_transrate = transrate.c frame.c getvlc.h putvlc.h
+SOURCES_stream_out_transrate = transrate.c frame.c block.c getvlc.h putvlc.h
diff --git a/modules/stream_out/transrate/block.c b/modules/stream_out/transrate/block.c
new file mode 100644
index 0000000000..07c3b433cf
--- /dev/null
+++ b/modules/stream_out/transrate/block.c
@@ -0,0 +1,806 @@
+/*****************************************************************************
+ * block.c: MPEG2 video transrating module
+ *****************************************************************************
+ * Copyright (C) 2003-2004 VideoLAN
+ * Copyright (C) 2003 Antoine Missout
+ * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
+ * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ * $Id: frame.c,v 1.2 2004/03/03 11:39:06 massiot Exp $
+ *
+ * Authors: Christophe Massiot <massiot@via.ecp.fr>
+ *          Laurent Aimar <fenrir@via.ecp.fr>
+ *          Antoine Missout
+ *          Michel Lespinasse <walken@zoy.org>
+ *          Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
+ *****************************************************************************/
+
+/*****************************************************************************
+ * Preamble
+ *****************************************************************************/
+#include <stdio.h>
+#include <stdlib.h>
+#define NDEBUG 1
+#include <assert.h>
+#include <math.h>
+
+#include <vlc/vlc.h>
+#include <vlc/sout.h>
+#include <vlc/input.h>
+
+#include "transrate.h"
+
+/****************************************************************************
+ * transrater, code from M2VRequantizer http://www.metakine.com/
+ ****************************************************************************/
+
+/////---- begin ext mpeg code
+
+#include "getvlc.h"
+#include "putvlc.h"
+
+static inline int saturate( int i_value )
+{
+    if ( i_value > 2047 )
+        return 2047;
+    if ( i_value < -2048 )
+        return -2048;
+    return i_value;
+}
+
+static int64_t get_score( const RunLevel *blk, RunLevel *new_blk, int i_qscale, int i_qscale_new )
+{
+    int64_t score = 0;
+    int i1 = -1, i2 = -1;
+
+    while ( new_blk->level )
+    {
+        int new_level = new_blk->level;
+        int level = blk->level;
+	if ( i1 > 64 || i2 > 64 || !blk->run || !new_blk->run ) return score;
+        if ( i1 + blk->run == i2 + new_blk->run )
+        {
+            int64_t tmp = saturate(level * i_qscale) 
+                            - saturate(new_level * i_qscale_new);
+            i1 += blk->run;
+            i2 += new_blk->run;
+            score += tmp * tmp;
+            blk++;
+            new_blk++;
+        }
+        else
+        {
+            int64_t tmp = saturate(level * i_qscale);
+            i1 += blk->run;
+            score += tmp * tmp;
+            blk++;
+        }
+    }
+
+    while ( blk->level )
+    {
+        int level = blk->level;
+        int64_t tmp = saturate(level * i_qscale);
+        i1 += blk->run;
+        score += tmp * tmp;
+        blk++;
+    }
+
+    return score;
+}
+
+static void change_qscale( const RunLevel *blk, RunLevel *new_blk, int i_qscale, int i_qscale_new, int intra )
+{
+    int i = 0, li = 0;
+    int rounding;
+    if ( intra )
+        rounding = i_qscale_new / 3;
+    else
+        rounding = i_qscale_new / 6;
+
+    while ( blk->level )
+    {
+        int level = blk->level > 0 ? blk->level : -blk->level;
+        int new_level = saturate(level * i_qscale) / i_qscale_new;
+        i += blk->run;
+
+        if ( new_level )
+        {
+            new_blk->run = i - li;
+            new_blk->level = blk->level > 0 ? new_level : -new_level;
+            new_blk++;
+            li = i;
+        }
+        blk++;
+    }
+    new_blk->level = 0;
+}
+
+static const uint8_t non_linear_mquant_table[32] =
+{
+    0, 1, 2, 3, 4, 5, 6, 7,
+    8,10,12,14,16,18,20,22,
+    24,28,32,36,40,44,48,52,
+    56,64,72,80,88,96,104,112
+};
+static const uint8_t map_non_linear_mquant[113] =
+{
+    0,1,2,3,4,5,6,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,
+    16,17,17,17,18,18,18,18,19,19,19,19,20,20,20,20,21,21,21,21,22,22,
+    22,22,23,23,23,23,24,24,24,24,24,24,24,25,25,25,25,25,25,25,26,26,
+    26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,29,
+    29,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,31,31,31,31,31
+};
+
+int scale_quant( transrate_t *tr, double qrate )
+{
+    int i_quant = (int)floor( tr->quantizer_scale * qrate + 0.5 );
+
+    if ( tr->q_scale_type )
+    {
+        if ( i_quant < 1 )
+            i_quant = 1;
+        if ( i_quant > 112 )
+            i_quant = 112;
+        i_quant = non_linear_mquant_table[map_non_linear_mquant[i_quant]];
+    }
+    else
+    {
+        if ( i_quant < 2 )
+            i_quant = 2;
+        if ( i_quant > 62 )
+            i_quant = 62;
+        i_quant = (i_quant / 2) * 2; // Must be *even*
+    }
+
+    return i_quant;
+}
+
+int increment_quant( transrate_t *tr, int i_quant )
+{
+    if ( tr->q_scale_type )
+    {
+        assert( i_quant >= 1 && i_quant <= 112 );
+        i_quant = map_non_linear_mquant[i_quant] + 1;
+        if ( i_quant > 31 )
+            i_quant = 31;
+        i_quant = non_linear_mquant_table[i_quant];
+    }
+    else
+    {
+        assert(!(i_quant & 1));
+        i_quant += 2;
+        if ( i_quant > 62 )
+            i_quant = 62;
+    }
+    return i_quant;
+}
+
+
+static int decrement_quant( transrate_t *tr, int i_quant )
+{
+    if ( tr->q_scale_type )
+    {
+        assert( i_quant >= 1 && i_quant <= 112 );
+        i_quant = map_non_linear_mquant[i_quant] - 1;
+        if ( i_quant < 1 )
+            i_quant = 1;
+        i_quant = non_linear_mquant_table[i_quant];
+    }
+    else
+    {
+        assert(!(i_quant & 1));
+        i_quant -= 2;
+        if ( i_quant < 2 )
+            i_quant = 2;
+    }
+    return i_quant;
+}
+
+static void quantize_block( transrate_t *tr, RunLevel *new_blk, int intra )
+{
+    RunLevel old_blk[65];
+    RunLevel *blk = old_blk;
+    const uint8_t *old_matrix, *new_matrix;
+    int i = 0, li = 0;
+
+    memcpy( blk, new_blk, 65 * sizeof(RunLevel) );
+    if ( intra )
+    {
+        old_matrix = tr->intra_quantizer_matrix;
+        new_matrix = mpeg4_default_intra_matrix;
+    }
+    else
+    {
+        old_matrix = tr->non_intra_quantizer_matrix;
+        new_matrix = mpeg4_default_non_intra_matrix;
+    }
+
+    while ( blk->level )
+    {
+        int level = blk->level > 0 ? blk->level : -blk->level;
+        int new_level = (level * old_matrix[i] + new_matrix[i]/2)
+                            / new_matrix[i];
+        i += blk->run;
+
+        if (new_level)
+        {
+            new_blk->run = i - li;
+            new_blk->level = blk->level > 0 ? new_level : -new_level;
+            new_blk++;
+            li = i;
+        }
+        blk++;
+    }
+    new_blk->level = 0;
+}
+
+int transrate_mb( transrate_t *tr, RunLevel blk[6][65], RunLevel new_blk[6][65],
+                  int i_cbp, int intra )
+{
+    int i_qscale = tr->quantizer_scale;
+    int i_guessed_qscale = tr->new_quantizer_scale;
+    int64_t i_last_error = 0;
+    int i_last_qscale;
+    int i_last_qscale_same_error = 0;
+    int i_direction = 0;
+    int i_new_cbp;
+    int i_nb_blocks = 0;
+    int i_nb_coeffs = 0;
+    int i;
+
+    for ( i = 0; i < 6; i++ )
+    {
+        if ( i_cbp & (1 << (5 - i)) )
+        {
+            RunLevel *cur_blk = blk[i];
+            i_nb_blocks++;
+            while ( cur_blk->level )
+            {
+                cur_blk++;
+                i_nb_coeffs++;
+            }
+        }
+    }
+
+    /* See if we can change quantizer scale */
+    for ( ; ; )
+    {
+        int64_t i_error = 0;
+        i_new_cbp = 0;
+
+        for ( i = 0; i < 6; i++ )
+        {
+            if ( i_cbp & (1 << (5 - i)) )
+            {
+                int64_t i_block_error;
+                change_qscale( blk[i], new_blk[i], i_qscale, i_guessed_qscale,
+                               intra );
+                i_block_error = get_score( blk[i], new_blk[i],
+                                           i_qscale, i_guessed_qscale );
+                if ( i > 3 ) i_block_error *= 4;
+                if ( i_block_error > i_error )
+                    i_error = i_block_error;
+                if ( new_blk[i]->level )
+                    i_new_cbp |= (1 << (5 - i));
+            }
+        }
+
+        if ( i_error >= (int64_t)tr->i_minimum_error
+                && i_error <= (int64_t)tr->i_admissible_error )
+        {
+            break;
+        }
+        if ( i_nb_coeffs <= 15 && i_error <= (int64_t)tr->i_admissible_error )
+        {
+            /* There is no interest in changing the qscale (takes up 5 bits
+             * we won't regain) */
+            break;
+        }
+
+        if ( !i_direction )
+        {
+            if ( i_error > (int64_t)tr->i_admissible_error )
+            {
+                i_direction = -1;
+                i_last_qscale = i_guessed_qscale;
+                i_guessed_qscale = decrement_quant( tr, i_guessed_qscale );
+            }
+            else
+            {
+                i_direction = +1;
+                i_last_qscale = i_guessed_qscale;
+                i_guessed_qscale = increment_quant( tr, i_guessed_qscale );
+                i_last_error = i_error;
+                i_last_qscale_same_error = i_last_qscale;
+            }
+            if ( i_guessed_qscale == i_last_qscale )
+                break;
+        }
+        else if ( i_direction < 0 )
+        {
+            if ( i_error > (int64_t)tr->i_admissible_error )
+            {
+                i_last_qscale = i_guessed_qscale;
+                i_guessed_qscale = decrement_quant( tr, i_guessed_qscale );
+                if ( i_guessed_qscale == i_last_qscale )
+                    break;
+            }
+            else
+            {
+                break;
+            }
+        }
+        else
+        {
+            if ( i_error < (int64_t)tr->i_minimum_error )
+            {
+                i_last_qscale = i_guessed_qscale;
+                i_guessed_qscale = increment_quant( tr, i_guessed_qscale );
+                if ( i_error > i_last_error )
+                {
+                    i_last_error = i_error;
+                    i_last_qscale_same_error = i_last_qscale;
+                }
+                if ( i_guessed_qscale == i_last_qscale )
+                {
+                    if ( i_last_error == i_error )
+                    {
+                        i_guessed_qscale = i_last_qscale_same_error;
+                        if ( i_guessed_qscale == i_qscale )
+                        {
+                            memcpy( new_blk, blk, sizeof(RunLevel)*65*6 );
+                            i_new_cbp = i_cbp;
+                        }
+                        else
+                        {
+                            i_new_cbp = 0;
+                            for ( i = 0; i < 6; i++ )
+                            {
+                                if ( i_cbp & (1 << (5 - i)) )
+                                {
+                                    change_qscale( blk[i], new_blk[i],
+                                                   i_qscale, i_guessed_qscale,
+                                                   intra );
+                                    if ( new_blk[i]->level )
+                                        i_new_cbp |= (1 << (5 - i));
+                                }
+                            }
+                        }
+                    }
+                    break;
+                }
+            }
+            else
+            {
+                if ( i_error > (int64_t)tr->i_admissible_error
+                        || i_last_error == i_error )
+                {
+                    i_guessed_qscale = i_last_qscale_same_error;
+                    if ( i_guessed_qscale == i_qscale )
+                    {
+                        memcpy( new_blk, blk, sizeof(RunLevel)*65*6 );
+                        i_new_cbp = i_cbp;
+                    }
+                    else
+                    {
+                        i_new_cbp = 0;
+                        for ( i = 0; i < 6; i++ )
+                        {
+                            if ( i_cbp & (1 << (5 - i)) )
+                            {
+                                change_qscale( blk[i], new_blk[i],
+                                               i_qscale, i_guessed_qscale,
+                                               intra );
+                                if ( new_blk[i]->level )
+                                    i_new_cbp |= (1 << (5 - i));
+                            }
+                        }
+                    }
+                }
+                break;
+            }
+        }
+    }
+
+    tr->new_quantizer_scale = i_guessed_qscale;
+
+#if 0
+    /* Now see if we can drop coeffs */
+    for ( i = 0; i < 6; i++ )
+    {
+        if ( i_new_cbp & (1 << (5 - i)) )
+        {
+            for ( ; ; )
+            {
+                RunLevel *last_blk = new_blk[i];
+                uint8_t old_level;
+
+                while ( last_blk[1].level )
+                    last_blk++;
+                if ( last_blk == new_blk[i] )
+                    break;
+                old_level = last_blk->level;
+                last_blk->level = 0;
+                i_error = get_score( blk[i], new_blk[i],
+                                     i_qscale, i_guessed_qscale );
+                if ( i_error > tr->i_admissible_error )
+                {
+                    last_blk->level = old_level;
+                    break;
+                }
+            }
+        }
+    }
+#endif
+
+    return i_new_cbp;
+}
+
+void get_intra_block_B14( transrate_t *tr, RunLevel *blk )
+{
+    bs_transrate_t *bs = &tr->bs;
+    int i, li;
+    int val;
+    const DCTtab * tab;
+
+    li = i = 0;
+
+    for( ;; )
+    {
+        if (bs->i_bit_in_cache >= 0x28000000)
+        {
+            tab = DCT_B14AC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
+
+            i += tab->run;
+            if (i >= 64) break; /* end of block */
+
+    normal_code:
+            bs_flush( bs, tab->len );
+            val = tab->level;
+            val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
+            blk->level = val;
+            blk->run = i - li - 1;
+            li = i;
+            blk++;
+
+            bs_flush( bs, 1 );
+            continue;
+        }
+        else if (bs->i_bit_in_cache >= 0x04000000)
+        {
+            tab = DCT_B14_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
+
+            i += tab->run;
+            if (i < 64) goto normal_code;
+
+            /* escape code */
+            i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
+            if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
+
+            bs_flush( bs, 12 );
+            val = SBITS (bs->i_bit_in_cache, 12);
+            blk->level = val;
+            blk->run = i - li - 1;
+            li = i;
+            blk++;
+
+            bs_flush( bs, 12 );
+
+            continue;
+        }
+        else if (bs->i_bit_in_cache >= 0x02000000)
+        {
+            tab = DCT_B14_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
+            i += tab->run;
+            if (i < 64 ) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00800000)
+        {
+            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
+            i += tab->run;
+            if (i < 64 ) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00200000)
+        {
+            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
+            i += tab->run;
+            if (i < 64 ) goto normal_code;
+        }
+        else
+        {
+            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
+            bs_flush( bs, 16 );
+            i += tab->run;
+            if (i < 64 ) goto normal_code;
+        }
+        fprintf(stderr, "Err in B14\n");
+	tr->b_error = 1;
+        break;  /* illegal, check needed to avoid buffer overflow */
+    }
+    bs_flush( bs, 2 );    /* dump end of block code */
+    blk->level = 0;
+
+    if ( tr->mpeg4_matrix )
+        quantize_block( tr, blk, 1 );
+}
+
+void get_intra_block_B15( transrate_t *tr, RunLevel *blk )
+{
+    bs_transrate_t *bs = &tr->bs;
+    int i, li;
+    int val;
+    const DCTtab * tab;
+
+    li = i = 0;
+
+    for( ;; )
+    {
+        if (bs->i_bit_in_cache >= 0x04000000)
+        {
+            tab = DCT_B15_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
+
+            i += tab->run;
+            if (i < 64)
+            {
+    normal_code:
+                bs_flush( bs, tab->len );
+
+                val = tab->level;
+                val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
+                blk->level = val;
+                blk->run = i - li - 1;
+                li = i;
+                blk++;
+
+                bs_flush( bs, 1 );
+                continue;
+            }
+            else
+            {
+                i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
+
+                if (i >= 64) break; /* illegal, check against buffer overflow */
+
+                bs_flush( bs, 12 );
+                val = SBITS (bs->i_bit_in_cache, 12);
+                blk->level = val;
+                blk->run = i - li - 1;
+                li = i;
+                blk++;
+
+                bs_flush( bs, 12 );
+                continue;
+            }
+        }
+        else if (bs->i_bit_in_cache >= 0x02000000)
+        {
+            tab = DCT_B15_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00800000)
+        {
+            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00200000)
+        {
+            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else
+        {
+            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
+            bs_flush( bs, 16 );
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        fprintf(stderr, "Err in B15\n");
+	tr->b_error = 1;
+        break;  /* illegal, check needed to avoid buffer overflow */
+    }
+    bs_flush( bs, 4 );    /* dump end of block code */
+    blk->level = 0;
+
+    if ( tr->mpeg4_matrix )
+        quantize_block( tr, blk, 1 );
+}
+
+
+int get_non_intra_block( transrate_t *tr, RunLevel *blk )
+{
+    bs_transrate_t *bs = &tr->bs;
+    int i, li;
+    int val;
+    const DCTtab * tab;
+
+    li = i = -1;
+
+    if (bs->i_bit_in_cache >= 0x28000000)
+    {
+        tab = DCT_B14DC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
+        goto entry_1;
+    }
+    else goto entry_2;
+
+    for( ;; )
+    {
+        if (bs->i_bit_in_cache >= 0x28000000)
+        {
+            tab = DCT_B14AC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
+
+    entry_1:
+            i += tab->run;
+            if (i >= 64)
+            break;  /* end of block */
+
+    normal_code:
+
+            bs_flush( bs, tab->len );
+            val = tab->level;
+            val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
+            blk->level = val;
+            blk->run = i - li - 1;
+            li = i;
+            blk++;
+
+            //if ( ((val) && (tab->level < tst)) || ((!val) && (tab->level >= tst)) )
+            //  LOGF("level: %i val: %i tst : %i q: %i nq : %i\n", tab->level, val, tst, q, nq);
+
+            bs_flush( bs, 1 );
+            continue;
+        }
+
+    entry_2:
+        if (bs->i_bit_in_cache >= 0x04000000)
+        {
+            tab = DCT_B14_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
+
+            i += tab->run;
+            if (i < 64) goto normal_code;
+
+            /* escape code */
+
+            i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
+
+            if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
+
+            bs_flush( bs, 12 );
+            val = SBITS (bs->i_bit_in_cache, 12);
+            blk->level = val;
+            blk->run = i - li - 1;
+            li = i;
+            blk++;
+
+            bs_flush( bs, 12 );
+            continue;
+        }
+        else if (bs->i_bit_in_cache >= 0x02000000)
+        {
+            tab = DCT_B14_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00800000)
+        {
+            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else if (bs->i_bit_in_cache >= 0x00200000)
+        {
+            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        else
+        {
+            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
+            bs_flush( bs, 16 );
+
+            i += tab->run;
+            if (i < 64) goto normal_code;
+        }
+        fprintf(stderr, "Err in non-intra\n");
+	tr->b_error = 1;
+        break;  /* illegal, check needed to avoid buffer overflow */
+    }
+    bs_flush( bs, 2 );    /* dump end of block code */
+    blk->level = 0;
+
+    if ( tr->mpeg4_matrix )
+        quantize_block( tr, blk, 0 );
+
+    return i;
+}
+
+static inline void putAC( bs_transrate_t *bs, int run, int signed_level, int vlcformat)
+{
+    int level, len;
+    const VLCtable *ptab = NULL;
+
+    level = (signed_level<0) ? -signed_level : signed_level; /* abs(signed_level) */
+
+    assert(!(run<0 || run>63 || level==0 || level>2047));
+
+    len = 0;
+
+    if (run<2 && level<41)
+    {
+        if (vlcformat)  ptab = &dct_code_tab1a[run][level-1];
+        else ptab = &dct_code_tab1[run][level-1];
+        len = ptab->len;
+    }
+    else if (run<32 && level<6)
+    {
+        if (vlcformat) ptab = &dct_code_tab2a[run-2][level-1];
+        else ptab = &dct_code_tab2[run-2][level-1];
+        len = ptab->len;
+    }
+
+    if (len) /* a VLC code exists */
+    {
+        bs_write( bs, ptab->code, len);
+        bs_write( bs, signed_level<0, 1); /* sign */
+    }
+    else
+    {
+        bs_write( bs, 1l, 6); /* Escape */
+        bs_write( bs, run, 6); /* 6 bit code for run */
+        bs_write( bs, ((unsigned int)signed_level) & 0xFFF, 12);
+    }
+}
+
+
+static inline void putACfirst( bs_transrate_t *bs, int run, int val)
+{
+    if (run==0 && (val==1 || val==-1)) bs_write( bs, 2|(val<0), 2 );
+    else putAC( bs, run, val, 0);
+}
+
+void putnonintrablk( bs_transrate_t *bs, RunLevel *blk)
+{
+    assert(blk->level);
+
+    putACfirst( bs, blk->run, blk->level );
+    blk++;
+
+    while (blk->level)
+    {
+        putAC( bs, blk->run, blk->level, 0 );
+        blk++;
+    }
+
+    bs_write( bs, 2, 2 );
+}
+
+void putintrablk( bs_transrate_t *bs, RunLevel *blk, int vlcformat)
+{
+    while (blk->level)
+    {
+        putAC( bs, blk->run, blk->level, vlcformat );
+        blk++;
+    }
+
+    if (vlcformat)
+        bs_write( bs, 6, 4 );
+    else
+        bs_write( bs, 2, 2 );
+}
+
diff --git a/modules/stream_out/transrate/frame.c b/modules/stream_out/transrate/frame.c
index a96a3096ed..16186ea458 100644
--- a/modules/stream_out/transrate/frame.c
+++ b/modules/stream_out/transrate/frame.c
@@ -1,7 +1,7 @@
 /*****************************************************************************
  * frame.c: MPEG2 video transrating module
  *****************************************************************************
- * Copyright (C) 2003 VideoLAN
+ * Copyright (C) 2003-2004 VideoLAN
  * Copyright (C) 2003 Antoine Missout
  * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org>
  * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
@@ -47,37 +47,6 @@
  * transrater, code from M2VRequantizer http://www.metakine.com/
  ****************************************************************************/
 
-/* This is awful magic --Meuuh */
-//#define REACT_DELAY (1024.0*128.0)
-#define REACT_DELAY (256.0)
-
-#define QUANT_I (1.7)
-
-#define QUANT_P (1.4)
-
-#define QUANT_P_INC (0.1)
-
-#define B_HANDICAP 5
-
-// notes:
-//
-// - intra block:
-//      - the quantiser is increment by one step
-//
-// - non intra block:
-//      - in P_FRAME we keep the original quantiser but drop the last coefficient
-//        if there is more than one
-//      - in B_FRAME we multiply the quantiser by a factor
-//
-// - I_FRAME is recoded when we're 5.0 * REACT_DELAY late
-// - P_FRAME is recoded when we're 2.5 * REACT_DELAY late
-// - B_FRAME are always recoded
-
-// if we're getting *very* late (60 * REACT_DELAY)
-//
-// - intra blocks quantiser is incremented two step
-// - drop a few coefficients but always keep the first one
-
 // useful constants
 enum
 {
@@ -87,274 +56,10 @@ enum
 };
 
 
-// gcc
-#ifdef HAVE_BUILTIN_EXPECT
-#define likely(x) __builtin_expect ((x) != 0, 1)
-#define unlikely(x) __builtin_expect ((x) != 0, 0)
-#else
-#define likely(x) (x)
-#define unlikely(x) (x)
-#endif
-
-#define BITS_IN_BUF (8)
-
-#define LOG(msg) fprintf (stderr, msg)
-#define LOGF(format, args...) fprintf (stderr, format, args)
-
-static inline void bs_write( bs_transrate_t *s, unsigned int val, int n)
-{
-    assert(n < 32);
-    assert(!(val & (0xffffffffU << n)));
-
-    while (unlikely(n >= s->i_bit_out))
-    {
-        s->p_w[0] = (s->i_bit_out_cache << s->i_bit_out ) | (val >> (n - s->i_bit_out));
-        s->p_w++;
-        n -= s->i_bit_out;
-        s->i_bit_out_cache = 0;
-        val &= ~(0xffffffffU << n);
-        s->i_bit_out = BITS_IN_BUF;
-    }
-
-    if (likely(n))
-    {
-        s->i_bit_out_cache = (s->i_bit_out_cache << n) | val;
-        s->i_bit_out -= n;
-    }
-
-    assert(s->i_bit_out > 0);
-    assert(s->i_bit_out <= BITS_IN_BUF);
-}
-
-static inline void bs_refill( bs_transrate_t *s )
-{
-    assert((s->p_r - s->p_c) >= 1);
-    s->i_bit_in_cache |= s->p_c[0] << (24 - s->i_bit_in);
-    s->i_bit_in += 8;
-    s->p_c++;
-}
-
-static inline void bs_flush( bs_transrate_t *s, unsigned int n )
-{
-    assert(s->i_bit_in >= n);
-
-    s->i_bit_in_cache <<= n;
-    s->i_bit_in -= n;
-
-    assert( (!n) || ((n>0) && !(s->i_bit_in_cache & 0x1)) );
-
-    while (unlikely(s->i_bit_in < 24)) bs_refill( s );
-}
-
-static inline unsigned int bs_read( bs_transrate_t *s, unsigned int n)
-{
-    unsigned int Val = ((unsigned int)s->i_bit_in_cache) >> (32 - n);
-    bs_flush( s, n );
-    return Val;
-}
-
-static inline unsigned int bs_copy( bs_transrate_t *s, unsigned int n)
-{
-    unsigned int Val = bs_read( s, n);
-    bs_write(s, Val, n);
-    return Val;
-}
-
-static inline void bs_flush_read( bs_transrate_t *s )
-{
-    int i = s->i_bit_in & 0x7;
-    if( i )
-    {
-        assert(((unsigned int)bs->i_bit_in_cache) >> (32 - i) == 0);
-        s->i_bit_in_cache <<= i;
-        s->i_bit_in -= i;
-    }
-    s->p_c += -1 * (s->i_bit_in >> 3);
-    s->i_bit_in = 0;
-}
-static inline void bs_flush_write( bs_transrate_t *s )
-{
-    if( s->i_bit_out != 8 ) bs_write(s, 0, s->i_bit_out);
-}
-
 /////---- begin ext mpeg code
 
-const uint8_t non_linear_mquant_table[32] =
-{
-    0, 1, 2, 3, 4, 5, 6, 7,
-    8,10,12,14,16,18,20,22,
-    24,28,32,36,40,44,48,52,
-    56,64,72,80,88,96,104,112
-};
-const uint8_t map_non_linear_mquant[113] =
-{
-    0,1,2,3,4,5,6,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,
-    16,17,17,17,18,18,18,18,19,19,19,19,20,20,20,20,21,21,21,21,22,22,
-    22,22,23,23,23,23,24,24,24,24,24,24,24,25,25,25,25,25,25,25,26,26,
-    26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,29,
-    29,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,31,31,31,31,31
-};
-
-static int scale_quant( unsigned int q_scale_type, double quant )
-{
-    int iquant;
-    if (q_scale_type)
-    {
-        iquant = (int) floor(quant+0.5);
-
-        /* clip mquant to legal (linear) range */
-        if (iquant<1) iquant = 1;
-        if (iquant>112) iquant = 112;
-
-        iquant = non_linear_mquant_table[map_non_linear_mquant[iquant]];
-    }
-    else
-    {
-        /* clip mquant to legal (linear) range */
-        iquant = (int)floor(quant+0.5);
-        if (iquant<2) iquant = 2;
-        if (iquant>62) iquant = 62;
-        iquant = (iquant/2)*2; // Must be *even*
-    }
-    return iquant;
-}
-
-static int increment_quant( transrate_t *tr, int quant )
-{
-    if( tr->q_scale_type )
-    {
-        assert(quant >= 1 && quant <= 112 );
-        quant = map_non_linear_mquant[quant] + 1;
-        if( tr->picture_coding_type == P_TYPE )
-            quant += tr->level_p;
-        if( quant > 31) quant = 31;
-        quant = non_linear_mquant_table[quant];
-    }
-    else
-    {
-        assert(!(quant & 1));
-        quant += 2;
-        if( tr->picture_coding_type == P_TYPE )
-            quant += 2 * tr->level_p;
-        if (quant > 62) quant = 62;
-    }
-    return quant;
-}
-
-static inline int intmax( register int x, register int y )
-{
-    return x < y ? y : x;
-}
-static inline int intmin( register int x, register int y )
-{
-    return x < y ? x : y;
-}
-
-static int getNewQuant( transrate_t *tr, int curQuant)
-{
-    bs_transrate_t *bs = &tr->bs;
-
-    double calc_quant, quant_to_use;
-    int mquant = 0;
-
-    switch ( tr->picture_coding_type )
-    {
-        case I_TYPE:
-        case P_TYPE:
-            mquant = increment_quant( tr, curQuant );
-            break;
-
-        case B_TYPE:
-            tr->quant_corr = (((bs->i_byte_in - (bs->p_r - 4 - bs->p_c)) / tr->fact_x) - (bs->i_byte_out + (bs->p_w - bs->p_ow))) / REACT_DELAY + B_HANDICAP;
-            calc_quant = curQuant * tr->current_fact_x;
-            quant_to_use = calc_quant - tr->quant_corr;
-
-            mquant = intmax(scale_quant( tr->q_scale_type, quant_to_use), increment_quant( tr, curQuant) );
-            break;
-
-        default:
-            assert(0);
-            break;
-    }
-
-    /*
-        LOGF("type: %s orig_quant: %3i calc_quant: %7.1f quant_corr: %7.1f using_quant: %3i\n",
-        (picture_coding_type == I_TYPE ? "I_TYPE" : (picture_coding_type == P_TYPE ? "P_TYPE" : "B_TYPE")),
-        (int)curQuant, (float)calc_quant, (float)quant_corr, (int)mquant);
-    */
-
-    assert(mquant >= curQuant);
-
-    return mquant;
-}
-
-static inline int isNotEmpty(RunLevel *blk)
-{
-    return (blk->level);
-}
-
 #include "putvlc.h"
 
-static void putAC( bs_transrate_t *bs, int run, int signed_level, int vlcformat)
-{
-    int level, len;
-    const VLCtable *ptab = NULL;
-
-    level = (signed_level<0) ? -signed_level : signed_level; /* abs(signed_level) */
-
-    assert(!(run<0 || run>63 || level==0 || level>2047));
-
-    len = 0;
-
-    if (run<2 && level<41)
-    {
-        if (vlcformat)  ptab = &dct_code_tab1a[run][level-1];
-        else ptab = &dct_code_tab1[run][level-1];
-        len = ptab->len;
-    }
-    else if (run<32 && level<6)
-    {
-        if (vlcformat) ptab = &dct_code_tab2a[run-2][level-1];
-        else ptab = &dct_code_tab2[run-2][level-1];
-        len = ptab->len;
-    }
-
-    if (len) /* a VLC code exists */
-    {
-        bs_write( bs, ptab->code, len);
-        bs_write( bs, signed_level<0, 1); /* sign */
-    }
-    else
-    {
-        bs_write( bs, 1l, 6); /* Escape */
-        bs_write( bs, run, 6); /* 6 bit code for run */
-        bs_write( bs, ((unsigned int)signed_level) & 0xFFF, 12);
-    }
-}
-
-
-static inline void putACfirst( bs_transrate_t *bs, int run, int val)
-{
-    if (run==0 && (val==1 || val==-1)) bs_write( bs, 2|(val<0),2);
-    else putAC( bs, run,val,0);
-}
-
-static void putnonintrablk( bs_transrate_t *bs, RunLevel *blk)
-{
-    assert(blk->level);
-
-    putACfirst( bs, blk->run, blk->level);
-    blk++;
-
-    while(blk->level)
-    {
-        putAC( bs, blk->run, blk->level, 0);
-        blk++;
-    }
-
-    bs_write( bs, 2,2);
-}
-
 #include "getvlc.h"
 
 static const int non_linear_quantizer_scale [] =
@@ -372,7 +77,7 @@ static inline int get_macroblock_modes( transrate_t *tr )
     int macroblock_modes;
     const MBtab * tab;
 
-    switch( tr->picture_coding_type)
+    switch( tr->picture_coding_type )
     {
         case I_TYPE:
 
@@ -380,7 +85,7 @@ static inline int get_macroblock_modes( transrate_t *tr )
             bs_flush( bs, tab->len );
             macroblock_modes = tab->modes;
 
-            if ((! ( tr->frame_pred_frame_dct)) && ( tr->picture_structure == FRAME_PICTURE))
+            if ((!(tr->frame_pred_frame_dct)) && (tr->picture_structure == FRAME_PICTURE))
             {
                 macroblock_modes |= UBITS (bs->i_bit_in_cache, 1) * DCT_TYPE_INTERLACED;
                 bs_flush( bs, 1 );
@@ -501,8 +206,11 @@ static inline int get_motion_delta( bs_transrate_t *bs, const int f_code )
         sign = SBITS (bs->i_bit_in_cache, 1);
         bs_copy( bs, 1 );
 
-        if (f_code) delta += UBITS (bs->i_bit_in_cache, f_code);
-        bs_copy( bs, f_code);
+        if (f_code)
+        {
+            delta += UBITS (bs->i_bit_in_cache, f_code);
+            bs_copy( bs, f_code);
+        }
 
         return (delta ^ sign) - sign;
     }
@@ -554,7 +262,7 @@ static inline int get_coded_block_pattern( bs_transrate_t *bs )
     }
 }
 
-static inline int get_luma_dc_dct_diff( bs_transrate_t *bs )
+static inline int get_luma_dc_dct_diff( bs_transrate_t *bs, uint32_t *bits, uint8_t *len )
 {
     const DCtab * tab;
     int size;
@@ -566,16 +274,20 @@ static inline int get_luma_dc_dct_diff( bs_transrate_t *bs )
         size = tab->size;
         if (size)
         {
-            bs_copy( bs, tab->len);
+            *bits = bs_read( bs, tab->len );
+            *len = tab->len;
             //dc_diff = UBITS (bs->i_bit_in_cache, size) - UBITS (SBITS (~bs->i_bit_in_cache, 1), size);
             dc_diff = UBITS (bs->i_bit_in_cache, size);
             if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size);
-            bs_copy( bs, size);
+            *bits <<= size;
+            *bits |= bs_read( bs, size );
+            *len += size;
             return dc_diff;
         }
         else
         {
-            bs_copy( bs, 3);
+            *bits = bs_read( bs, 3 );
+            *len = 3;
             return 0;
         }
     }
@@ -583,16 +295,19 @@ static inline int get_luma_dc_dct_diff( bs_transrate_t *bs )
     {
         tab = DC_long + (UBITS (bs->i_bit_in_cache, 9) - 0x1e0);
         size = tab->size;
-        bs_copy( bs, tab->len);
+        *bits = bs_read( bs, tab->len );
+        *len = tab->len;
         //dc_diff = UBITS (bs->i_bit_in_cache, size) - UBITS (SBITS (~bs->i_bit_in_cache, 1), size);
         dc_diff = UBITS (bs->i_bit_in_cache, size);
         if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size);
-        bs_copy( bs, size);
+        *bits <<= size;
+        *bits |= bs_read( bs, size );
+        *len += size;
         return dc_diff;
     }
 }
 
-static inline int get_chroma_dc_dct_diff( bs_transrate_t *bs )
+static inline int get_chroma_dc_dct_diff( bs_transrate_t *bs, uint32_t *bits, uint8_t *len )
 {
     const DCtab * tab;
     int size;
@@ -604,454 +319,37 @@ static inline int get_chroma_dc_dct_diff( bs_transrate_t *bs )
         size = tab->size;
         if (size)
         {
-            bs_copy( bs, tab->len);
+            *bits = bs_read( bs, tab->len );
+            *len = tab->len;
             //dc_diff = UBITS (bs->i_bit_in_cache, size) - UBITS (SBITS (~bs->i_bit_in_cache, 1), size);
             dc_diff = UBITS (bs->i_bit_in_cache, size);
             if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size);
-            bs_copy( bs, size);
+            *bits <<= size;
+            *bits |= bs_read( bs, size );
+            *len += size;
             return dc_diff;
-        } else
-        {
-            bs_copy( bs, 2);
-            return 0;
-        }
-    }
-    else
-    {
-        tab = DC_long + (UBITS (bs->i_bit_in_cache, 10) - 0x3e0);
-        size = tab->size;
-        bs_copy( bs, tab->len + 1);
-        //dc_diff = UBITS (bs->i_bit_in_cache, size) - UBITS (SBITS (~bs->i_bit_in_cache, 1), size);
-        dc_diff = UBITS (bs->i_bit_in_cache, size);
-        if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size);
-        bs_copy( bs, size);
-        return dc_diff;
-    }
-}
-
-static void get_intra_block_B14( bs_transrate_t *bs, const int i_qscale, const int i_qscale_new )
-{
-    int tst;
-    int i, li;
-    int val;
-    const DCTtab * tab;
-
-    /* Basic sanity check --Meuuh */
-    if( i_qscale == 0 )
-    {
-        return;
-    }
-
-    tst = i_qscale_new/i_qscale + ((i_qscale_new%i_qscale) ? 1 : 0);
-
-    li = i = 0;
-
-    for( ;; )
-    {
-        if (bs->i_bit_in_cache >= 0x28000000)
-        {
-            tab = DCT_B14AC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
-
-            i += tab->run;
-            if (i >= 64) break; /* end of block */
-
-    normal_code:
-            bs_flush( bs, tab->len );
-            val = tab->level;
-            if (val >= tst)
-            {
-                val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
-                putAC( bs, i - li - 1, (val * i_qscale) / i_qscale_new, 0);
-                li = i;
-            }
-
-            bs_flush( bs, 1 );
-            continue;
-        }
-        else if (bs->i_bit_in_cache >= 0x04000000)
-        {
-            tab = DCT_B14_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
-
-            i += tab->run;
-            if (i < 64) goto normal_code;
-
-            /* escape code */
-            i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
-            if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
-            bs_flush( bs, 12 );
-            val = SBITS (bs->i_bit_in_cache, 12);
-            if (abs(val) >= tst)
-            {
-                putAC( bs, i - li - 1, (val * i_qscale) / i_qscale_new, 0);
-                li = i;
-            }
-
-            bs_flush( bs, 12 );
-
-            continue;
-        }
-        else if (bs->i_bit_in_cache >= 0x02000000)
-        {
-            tab = DCT_B14_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
-            i += tab->run;
-            if (i < 64 ) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00800000)
-        {
-            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
-            i += tab->run;
-            if (i < 64 ) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00200000)
-        {
-            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
-            i += tab->run;
-            if (i < 64 ) goto normal_code;
-        }
-        else
-        {
-            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
-            bs_flush( bs, 16 );
-            i += tab->run;
-            if (i < 64 ) goto normal_code;
-        }
-        break;  /* illegal, check needed to avoid buffer overflow */
-    }
-
-    bs_copy( bs, 2);    /* end of block code */
-}
-
-static void get_intra_block_B15( bs_transrate_t *bs,  const int i_qscale, int const i_qscale_new )
-{
-    int tst;
-    int i, li;
-    int val;
-    const DCTtab * tab;
-
-    /* Basic sanity check --Meuuh */
-    if( i_qscale == 0 )
-    {
-        return;
-    }
-
-    tst = i_qscale_new/i_qscale + ((i_qscale_new%i_qscale) ? 1 : 0);
-
-    li = i = 0;
-
-    for( ;; )
-    {
-        if (bs->i_bit_in_cache >= 0x04000000)
-        {
-            tab = DCT_B15_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
-
-            i += tab->run;
-            if (i < 64)
-            {
-    normal_code:
-                bs_flush( bs, tab->len );
-
-                val = tab->level;
-                if (val >= tst)
-                {
-                    val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
-                    putAC( bs, i - li - 1, (val * i_qscale) / i_qscale_new, 1);
-                    li = i;
-                }
-
-                bs_flush( bs, 1 );
-                continue;
-            }
-            else
-            {
-                i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
-
-                if (i >= 64) break; /* illegal, check against buffer overflow */
-
-                bs_flush( bs, 12 );
-                val = SBITS (bs->i_bit_in_cache, 12);
-                if (abs(val) >= tst)
-                {
-                    putAC( bs, i - li - 1, (val * i_qscale) / i_qscale_new, 1);
-                    li = i;
-                }
-
-                bs_flush( bs, 12 );
-                continue;
-            }
-        }
-        else if (bs->i_bit_in_cache >= 0x02000000)
-        {
-            tab = DCT_B15_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00800000)
-        {
-            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00200000)
-        {
-            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else
-        {
-            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
-            bs_flush( bs, 16 );
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        break;  /* illegal, check needed to avoid buffer overflow */
-    }
-
-    bs_copy( bs, 4);    /* end of block code */
-}
-
-
-static int get_non_intra_block_drop( transrate_t *tr, RunLevel *blk)
-{
-    bs_transrate_t *bs = &tr->bs;
-
-    int i, li;
-    int val;
-    const DCTtab * tab;
-    RunLevel *sblk = blk + 1;
-
-    li = i = -1;
-
-    if (bs->i_bit_in_cache >= 0x28000000)
-    {
-        tab = DCT_B14DC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
-        goto entry_1;
-    }
-    else goto entry_2;
-
-    for( ;; )
-    {
-        if (bs->i_bit_in_cache >= 0x28000000)
-        {
-            tab = DCT_B14AC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
-
-    entry_1:
-            i += tab->run;
-            if (i >= 64) break; /* end of block */
-
-    normal_code:
-
-            bs_flush( bs, tab->len );
-            val = tab->level;
-            val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1); /* if (bitstream_get (1)) val = -val; */
-
-            blk->level = val;
-            blk->run = i - li - 1;
-            li = i;
-            blk++;
-
-            bs_flush( bs, 1 );
-            continue;
-        }
-
-    entry_2:
-
-        if (bs->i_bit_in_cache >= 0x04000000)
-        {
-            tab = DCT_B14_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
-
-            i += tab->run;
-            if (i < 64) goto normal_code;
-
-            /* escape code */
-
-            i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
-
-            if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
-            bs_flush( bs, 12 );
-            val = SBITS (bs->i_bit_in_cache, 12);
-
-            blk->level = val;
-            blk->run = i - li - 1;
-            li = i;
-            blk++;
-
-            bs_flush( bs, 12 );
-            continue;
-        }
-        else if (bs->i_bit_in_cache >= 0x02000000)
-        {
-            tab = DCT_B14_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00800000)
-        {
-            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00200000)
-        {
-            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else
-        {
-            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
-            bs_flush( bs, 16 );
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        break;  /* illegal, check needed to avoid buffer overflow */
-    }
-    bs_flush( bs, 2 ); /* dump end of block code */
-
-    // remove last coeff
-    if (blk != sblk)
-    {
-        blk--;
-    }
-
-    // remove more coeffs if very late
-    if (tr->level_p >= 4 && (blk != sblk))
-    {
-        blk--;
-        if (tr->level_p >= 5 && (blk != sblk))
-        {
-            blk--;
-            if (tr->level_p >= 6 && (blk != sblk))
-            {
-                blk--;
-                if (tr->level_p >= 7 && (blk != sblk))
-                    blk--;
-            }
-        }
-    }
-
-    blk->level = 0;
-
-    return i;
-}
-
-static int get_non_intra_block_rq( bs_transrate_t *bs, RunLevel *blk,  const int i_qscale, const int i_qscale_new )
-{
-    int tst;
-    int i, li;
-    int val;
-    const DCTtab * tab;
-
-    /* Basic sanity check --Meuuh */
-    if( i_qscale == 0 )
-    {
-        return 0;
-    }
-
-    tst = i_qscale_new/i_qscale + ((i_qscale_new%i_qscale) ? 1 : 0);
-
-    li = i = -1;
-
-    if (bs->i_bit_in_cache >= 0x28000000)
-    {
-        tab = DCT_B14DC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
-        goto entry_1;
-    }
-    else goto entry_2;
-
-    for( ;; )
-    {
-        if (bs->i_bit_in_cache >= 0x28000000)
-        {
-            tab = DCT_B14AC_5 + (UBITS (bs->i_bit_in_cache, 5) - 5);
-
-    entry_1:
-            i += tab->run;
-            if (i >= 64)
-            break;  /* end of block */
-
-    normal_code:
-
-            bs_flush( bs, tab->len );
-            val = tab->level;
-            if (val >= tst)
-            {
-                val = (val ^ SBITS (bs->i_bit_in_cache, 1)) - SBITS (bs->i_bit_in_cache, 1);
-                blk->level = (val * i_qscale) / i_qscale_new;
-                blk->run = i - li - 1;
-                li = i;
-                blk++;
-            }
-
-            //if ( ((val) && (tab->level < tst)) || ((!val) && (tab->level >= tst)) )
-            //  LOGF("level: %i val: %i tst : %i q: %i nq : %i\n", tab->level, val, tst, q, nq);
-
-            bs_flush( bs, 1 );
-            continue;
-        }
-
-    entry_2:
-        if (bs->i_bit_in_cache >= 0x04000000)
-        {
-            tab = DCT_B14_8 + (UBITS (bs->i_bit_in_cache, 8) - 4);
-
-            i += tab->run;
-            if (i < 64) goto normal_code;
-
-            /* escape code */
-
-            i += (UBITS (bs->i_bit_in_cache, 12) & 0x3F) - 64;
-
-            if (i >= 64) break; /* illegal, check needed to avoid buffer overflow */
-
-            bs_flush( bs, 12 );
-            val = SBITS (bs->i_bit_in_cache, 12);
-            if (abs(val) >= tst)
-            {
-                blk->level = (val * i_qscale) / i_qscale_new;
-                blk->run = i - li - 1;
-                li = i;
-                blk++;
-            }
-
-            bs_flush( bs, 12 );
-            continue;
-        }
-        else if (bs->i_bit_in_cache >= 0x02000000)
-        {
-            tab = DCT_B14_10 + (UBITS (bs->i_bit_in_cache, 10) - 8);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00800000)
-        {
-            tab = DCT_13 + (UBITS (bs->i_bit_in_cache, 13) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
-        }
-        else if (bs->i_bit_in_cache >= 0x00200000)
-        {
-            tab = DCT_15 + (UBITS (bs->i_bit_in_cache, 15) - 16);
-            i += tab->run;
-            if (i < 64) goto normal_code;
         }
         else
         {
-            tab = DCT_16 + UBITS (bs->i_bit_in_cache, 16);
-            bs_flush( bs, 16 );
-
-            i += tab->run;
-            if (i < 64) goto normal_code;
+            *bits = bs_read( bs, 2 );
+            *len = 2;
+            return 0;
         }
-        break;  /* illegal, check needed to avoid buffer overflow */
     }
-    bs_flush( bs, 2 );    /* dump end of block code */
-
-    blk->level = 0;
-
-    return i;
+    else
+    {
+        tab = DC_long + (UBITS (bs->i_bit_in_cache, 10) - 0x3e0);
+        size = tab->size;
+        *bits = bs_read( bs, tab->len + 1 );
+        *len = tab->len + 1;
+        //dc_diff = UBITS (bs->i_bit_in_cache, size) - UBITS (SBITS (~bs->i_bit_in_cache, 1), size);
+        dc_diff = UBITS (bs->i_bit_in_cache, size);
+        if (!(dc_diff >> (size - 1))) dc_diff = (dc_diff + 1) - (1 << size);
+        *bits <<= size;
+        *bits |= bs_read( bs, size );
+        *len += size;
+        return dc_diff;
+    }
 }
 
 static void motion_fr_frame( bs_transrate_t *bs, unsigned int f_code[2] )
@@ -1140,7 +438,7 @@ static void putmbdata( transrate_t *tr, int macroblock_modes )
               mbtypetab[tr->picture_coding_type-1][macroblock_modes&0x1F].code,
               mbtypetab[tr->picture_coding_type-1][macroblock_modes&0x1F].len);
 
-    switch ( tr->picture_coding_type)
+    switch ( tr->picture_coding_type )
     {
         case I_TYPE:
             if ((! (tr->frame_pred_frame_dct)) && (tr->picture_structure == FRAME_PICTURE))
@@ -1186,15 +484,37 @@ static void putmbdata( transrate_t *tr, int macroblock_modes )
     }
 }
 
+static const uint8_t map_non_linear_mquant[113] =
+{
+    0,1,2,3,4,5,6,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,
+    16,17,17,17,18,18,18,18,19,19,19,19,20,20,20,20,21,21,21,21,22,22,
+    22,22,23,23,23,23,24,24,24,24,24,24,24,25,25,25,25,25,25,25,26,26,
+    26,26,26,26,26,26,27,27,27,27,27,27,27,27,28,28,28,28,28,28,28,29,
+    29,29,29,29,29,29,29,29,29,30,30,30,30,30,30,30,31,31,31,31,31
+};
 static inline void put_quantiser( transrate_t *tr )
 {
     bs_transrate_t *bs = &tr->bs;
 
-    bs_write( bs, tr->q_scale_type ? map_non_linear_mquant[tr->new_quantizer_scale] : tr->new_quantizer_scale >> 1, 5);
+    bs_write( bs, tr->q_scale_type ? map_non_linear_mquant[tr->new_quantizer_scale] : tr->new_quantizer_scale >> 1, 5 );
     tr->last_coded_scale = tr->new_quantizer_scale;
 }
 
-static int slice_init( transrate_t *tr,  int code)
+/* generate variable length code for macroblock_address_increment (6.3.16) */
+static inline void putaddrinc( transrate_t *tr, int addrinc )
+{
+    bs_transrate_t *bs = &tr->bs;
+
+    while ( addrinc >= 33 )
+    {
+        bs_write( bs, 0x08, 11 ); /* macroblock_escape */
+        addrinc -= 33;
+    }
+
+    bs_write( bs, addrinctab[addrinc].code, addrinctab[addrinc].len );
+}
+
+static int slice_init( transrate_t *tr,  int code )
 {
     bs_transrate_t *bs = &tr->bs;
     int offset;
@@ -1203,15 +523,8 @@ static int slice_init( transrate_t *tr,  int code)
     tr->v_offset = (code - 1) * 16;
 
     tr->quantizer_scale = get_quantizer_scale( tr );
-    if ( tr->picture_coding_type == P_TYPE)
-    {
-        tr->new_quantizer_scale = tr->quantizer_scale;
-    }
-    else
-    {
-        tr->new_quantizer_scale = getNewQuant(tr, tr->quantizer_scale);
-    }
-    put_quantiser( tr );
+    if ( tr->new_quantizer_scale < tr->quantizer_scale )
+        tr->new_quantizer_scale = scale_quant( tr, tr->qrate );
 
     /*LOGF("************************\nstart of slice %i in %s picture. ori quant: %i new quant: %i\n", code,
         (picture_coding_type == I_TYPE ? "I_TYPE" : (picture_coding_type == P_TYPE ? "P_TYPE" : "B_TYPE")),
@@ -1241,7 +554,7 @@ static int slice_init( transrate_t *tr,  int code)
         {
             /* macroblock_escape */
             offset += 33;
-            bs_copy( bs, 11);
+            bs_flush(bs, 11);
         }
         else
         {
@@ -1249,7 +562,7 @@ static int slice_init( transrate_t *tr,  int code)
         }
     }
 
-    bs_copy( bs, mba->len + 1);
+    bs_flush(bs, mba->len + 1);
     tr->h_offset = (offset + mba->mba) << 4;
 
     while( tr->h_offset - (int)tr->horizontal_size_value >= 0)
@@ -1262,81 +575,157 @@ static int slice_init( transrate_t *tr,  int code)
     {
         return -1;
     }
-    return 0;
+    return (offset + mba->mba);
 }
 
 static void mpeg2_slice( transrate_t *tr, const int code )
 {
     bs_transrate_t *bs = &tr->bs;
+    int mba_inc;
+    int first_in_slice = 1;
 
-    if( slice_init( tr, code ) )
+    if( (mba_inc = slice_init( tr, code )) < 0 )
     {
         return;
     }
 
     for( ;; )
     {
-        int macroblock_modes;
-        int mba_inc;
         const MBAtab * mba;
+        int macroblock_modes;
+        int mba_local;
+        int i;
+
+        while (unlikely(bs->i_bit_in < 24)) bs_refill( bs );
 
         macroblock_modes = get_macroblock_modes( tr );
-        if (macroblock_modes & MACROBLOCK_QUANT) tr->quantizer_scale = get_quantizer_scale( tr );
+        if (macroblock_modes & MACROBLOCK_QUANT)
+            tr->quantizer_scale = get_quantizer_scale( tr );
+        if (tr->new_quantizer_scale < tr->quantizer_scale)
+            tr->new_quantizer_scale = scale_quant( tr, tr->qrate );
 
         //LOGF("blk %i : ", h_offset >> 4);
 
         if (macroblock_modes & MACROBLOCK_INTRA)
         {
-            //LOG("intra "); if (macroblock_modes & MACROBLOCK_QUANT) LOGF("got new quant: %i ", quantizer_scale);
+            RunLevel block[6][65]; // terminated by level = 0, so we need 64+1
+            RunLevel new_block[6][65]; // terminated by level = 0, so we need 64+1
+            uint32_t dc[6];
+            uint8_t  dc_len[6];
+
+            // begin saving data
+            int batb;
+            uint8_t   p_n_ow[32], *p_n_w,
+                    *p_o_ow = bs->p_ow, *p_o_w = bs->p_w;
+            uint32_t  i_n_bit_out, i_n_bit_out_cache,
+                    i_o_bit_out  = bs->i_bit_out, i_o_bit_out_cache = bs->i_bit_out_cache;
 
-            tr->new_quantizer_scale = increment_quant( tr, tr->quantizer_scale);
-            if (tr->last_coded_scale == tr->new_quantizer_scale) macroblock_modes &= 0xFFFFFFEF; // remove MACROBLOCK_QUANT
-            else macroblock_modes |= MACROBLOCK_QUANT; //add MACROBLOCK_QUANT
-            putmbdata( tr, macroblock_modes);
-            if (macroblock_modes & MACROBLOCK_QUANT) put_quantiser( tr );
+            bs->i_bit_out_cache = 0; bs->i_bit_out = BITS_IN_BUF;
+            bs->p_ow = bs->p_w = p_n_ow;
 
-            //if (macroblock_modes & MACROBLOCK_QUANT) LOGF("put new quant: %i ", new_quantizer_scale);
+            //LOG("intra "); if (macroblock_modes & MACROBLOCK_QUANT) LOGF("got new quant: %i ", quantizer_scale);
 
             if (tr->concealment_motion_vectors)
             {
                 if (tr->picture_structure != FRAME_PICTURE)
                 {
-                    bs_copy( bs, 1); /* remove field_select */
+                    bs_copy(bs, 1); /* remove field_select */
                 }
                 /* like motion_frame, but parsing without actual motion compensation */
-                get_motion_delta( bs, tr->f_code[0][0]);
-                get_motion_delta( bs, tr->f_code[0][1]);
+                get_motion_delta(bs, tr->f_code[0][0]);
+                get_motion_delta(bs, tr->f_code[0][1]);
 
-                bs_copy( bs, 1); /* remove marker_bit */
+                bs_copy(bs, 1); /* remove marker_bit */
             }
 
+            assert(bs->p_w - bs->p_ow < 32);
+
+            p_n_w = bs->p_w;
+            i_n_bit_out = bs->i_bit_out;
+            i_n_bit_out_cache = bs->i_bit_out_cache;
+            assert(bs->p_ow == p_n_ow);
+
+            bs->i_bit_out = i_o_bit_out ;
+            bs->i_bit_out_cache = i_o_bit_out_cache;
+            bs->p_ow = p_o_ow;
+            bs->p_w = p_o_w;
+            // end saving data
+
             if( tr->intra_vlc_format )
             {
                 /* Luma */
-                get_luma_dc_dct_diff( bs );     get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
+                for ( i = 0; i < 4; i++ )
+                {
+                    get_luma_dc_dct_diff( bs, dc + i, dc_len + i );
+                    get_intra_block_B15( tr, block[i] );
+                    if (tr->b_error) return;
+                }
                 /* Chroma */
-                get_chroma_dc_dct_diff( bs );   get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_chroma_dc_dct_diff( bs );   get_intra_block_B15( bs, tr->quantizer_scale, tr->new_quantizer_scale );
+                for ( ; i < 6; i++ )
+                {
+                    get_chroma_dc_dct_diff( bs, dc + i, dc_len + i );
+                    get_intra_block_B15( tr, block[i] );
+                    if (tr->b_error) return;
+                }
             }
             else
             {
                 /* Luma */
-                get_luma_dc_dct_diff( bs );     get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_luma_dc_dct_diff( bs );     get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
+                for ( i = 0; i < 4; i++ )
+                {
+                    get_luma_dc_dct_diff( bs, dc + i, dc_len + i );
+                    get_intra_block_B14( tr, block[i] );
+                    if (tr->b_error) return;
+                }
                 /* Chroma */
-                get_chroma_dc_dct_diff( bs );   get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
-                get_chroma_dc_dct_diff( bs );   get_intra_block_B14( bs, tr->quantizer_scale, tr->new_quantizer_scale );
+                for ( ; i < 6; i++ )
+                {
+                    get_chroma_dc_dct_diff( bs, dc + i, dc_len + i );
+                    get_intra_block_B14( tr, block[i] );
+                    if (tr->b_error) return;
+                }
+            }
+
+            transrate_mb( tr, block, new_block, 0x3f, 1 );
+
+            if (tr->last_coded_scale == tr->new_quantizer_scale)
+                macroblock_modes &= ~MACROBLOCK_QUANT;
+
+            if ( first_in_slice )
+            {
+                put_quantiser( tr );
+                bs_write( bs, 0, 1 );
+                macroblock_modes &= ~MACROBLOCK_QUANT;
+            }
+            putaddrinc( tr, mba_inc );
+            mba_inc = 0;
+            putmbdata( tr, macroblock_modes );
+            if( macroblock_modes & MACROBLOCK_QUANT )
+            {
+                put_quantiser( tr );
             }
+
+            // put saved motion data...
+            for (batb = 0; batb < (p_n_w - p_n_ow); batb++)
+            {
+                bs_write( bs, p_n_ow[batb], 8 );
+            }
+            bs_write( bs, i_n_bit_out_cache, BITS_IN_BUF - i_n_bit_out );
+            // end saved motion data...
+
+            for ( i = 0; i < 6; i++ )
+            {
+                bs_write( bs, *(dc + i), *(dc_len + i) );
+                putintrablk( bs, new_block[i], tr->intra_vlc_format );
+            }
+    
         }
         else
         {
             RunLevel block[6][65]; // terminated by level = 0, so we need 64+1
+            RunLevel new_block[6][65]; // terminated by level = 0, so we need 64+1
             int new_coded_block_pattern = 0;
+            int cbp = 0;
 
             // begin saving data
             int batb;
@@ -1363,129 +752,160 @@ static void mpeg2_slice( transrate_t *tr, const int code )
                     case MC_DMV: MOTION_CALL (motion_fi_dmv, MACROBLOCK_MOTION_FORWARD); break;
                 }
 
-            assert(bs->p_w - bs->p_ow < 32);
-
-            p_n_w = bs->p_w;
-            i_n_bit_out = bs->i_bit_out;
-            i_n_bit_out_cache = bs->i_bit_out_cache;
-            assert(bs->p_ow == p_n_ow);
-
-            bs->i_bit_out = i_o_bit_out ;
-            bs->i_bit_out_cache = i_o_bit_out_cache;
-            bs->p_ow = p_o_ow;
-            bs->p_w = p_o_w;
-            // end saving data
-
-            if ( tr->picture_coding_type == P_TYPE) tr->new_quantizer_scale = tr->quantizer_scale;
-            else tr->new_quantizer_scale = getNewQuant( tr, tr->quantizer_scale);
-
             //LOG("non intra "); if (macroblock_modes & MACROBLOCK_QUANT) LOGF("got new quant: %i ", quantizer_scale);
 
             if (macroblock_modes & MACROBLOCK_PATTERN)
             {
-                const int cbp = get_coded_block_pattern( bs );
+                int last_in_slice;
 
-                if( tr->picture_coding_type == P_TYPE )
-                {
-                    if( cbp&0x20 ) get_non_intra_block_drop( tr, block[0] );
-                    if( cbp&0x10 ) get_non_intra_block_drop( tr, block[1] );
-                    if( cbp&0x08 ) get_non_intra_block_drop( tr, block[2] );
-                    if( cbp&0x04 ) get_non_intra_block_drop( tr, block[3] );
-                    if( cbp&0x02 ) get_non_intra_block_drop( tr, block[4] );
-                    if( cbp&0x01 ) get_non_intra_block_drop( tr, block[5] );
-
-                    new_coded_block_pattern = cbp;
-                }
-                else
+                cbp = get_coded_block_pattern( bs );
+
+                for ( i = 0; i < 6; i++ )
                 {
-                    if( cbp&0x20 )
+                    if ( cbp & (1 << (5 - i)) )
                     {
-                        get_non_intra_block_rq( bs, block[0], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[0] ) ) new_coded_block_pattern |= 0x20;
+                        get_non_intra_block( tr, block[i] );
+                        if (tr->b_error) return;
                     }
-                    if( cbp&0x10 )
-                    {
-                        get_non_intra_block_rq( bs, block[1], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[1] ) ) new_coded_block_pattern |= 0x10;
-                    }
-                    if( cbp&0x08 )
+                }
+                last_in_slice = !UBITS( bs->i_bit_in_cache, 11 );
+
+                new_coded_block_pattern = transrate_mb( tr, block, new_block,
+                                                        cbp, 0 );
+
+                if ( !new_coded_block_pattern &&
+                        !(macroblock_modes
+                            & (MACROBLOCK_MOTION_FORWARD
+                                | MACROBLOCK_MOTION_BACKWARD))
+                        && (first_in_slice || last_in_slice) )
+                {
+                    /* First mb in slice, just code a 0-mv mb.
+                     * This is wrong for last in slice, but it only shows
+                     * a few artefacts. */
+                    macroblock_modes |= MACROBLOCK_MOTION_FORWARD;
+                    if (tr->picture_structure == FRAME_PICTURE)
                     {
-                        get_non_intra_block_rq( bs, block[2], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[2] ) ) new_coded_block_pattern |= 0x08;
+                        macroblock_modes |= MC_FRAME;
+                        bs_write( bs, 0x3, 2 ); /* motion vectors */
                     }
-                    if( cbp&0x04 )
+                    else
                     {
-                        get_non_intra_block_rq( bs, block[3], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[3] ) ) new_coded_block_pattern |= 0x04;
+                        macroblock_modes |= MC_FIELD;
+                        bs_write( bs,
+                             (tr->picture_structure == BOTTOM_FIELD ? 1 : 0),
+                             1); /* motion field select */
+                        bs_write( bs, 0x3, 2 ); /* motion vectors */
                     }
-                    if( cbp&0x02 )
+                }
+
+                if ( !new_coded_block_pattern )
+                {
+                    macroblock_modes &= ~MACROBLOCK_PATTERN;
+                    macroblock_modes &= ~MACROBLOCK_QUANT;
+                }
+                else
+                {
+                    if ( tr->last_coded_scale == tr->new_quantizer_scale )
                     {
-                        get_non_intra_block_rq( bs, block[4], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[4] ) ) new_coded_block_pattern |= 0x02;
+                        macroblock_modes &= ~MACROBLOCK_QUANT;
                     }
-                    if( cbp&0x01 )
+                    else
                     {
-                        get_non_intra_block_rq( bs, block[5], tr->quantizer_scale, tr->new_quantizer_scale );
-                        if( isNotEmpty( block[5] ) ) new_coded_block_pattern |= 0x01;
+                        macroblock_modes |= MACROBLOCK_QUANT;
                     }
-                    if( !new_coded_block_pattern) macroblock_modes &= 0xFFFFFFED; // remove MACROBLOCK_PATTERN and MACROBLOCK_QUANT flag
                 }
             }
 
-            if (tr->last_coded_scale == tr->new_quantizer_scale) macroblock_modes &= 0xFFFFFFEF; // remove MACROBLOCK_QUANT
-            else if (macroblock_modes & MACROBLOCK_PATTERN) macroblock_modes |= MACROBLOCK_QUANT; //add MACROBLOCK_QUANT
-            assert( (macroblock_modes & MACROBLOCK_PATTERN) || !(macroblock_modes & MACROBLOCK_QUANT) );
+            assert(bs->p_w - bs->p_ow < 32);
 
-            putmbdata( tr, macroblock_modes);
-            if( macroblock_modes & MACROBLOCK_QUANT )
-            {
-                put_quantiser( tr );
-            }
+            p_n_w = bs->p_w;
+            i_n_bit_out = bs->i_bit_out;
+            i_n_bit_out_cache = bs->i_bit_out_cache;
+            assert(bs->p_ow == p_n_ow);
 
-            // put saved motion data...
-            for (batb = 0; batb < (p_n_w - p_n_ow); batb++)
+            bs->i_bit_out = i_o_bit_out ;
+            bs->i_bit_out_cache = i_o_bit_out_cache;
+            bs->p_ow = p_o_ow;
+            bs->p_w = p_o_w;
+            // end saving data
+
+            if ( macroblock_modes &
+                    (MACROBLOCK_MOTION_FORWARD | MACROBLOCK_MOTION_BACKWARD
+                      | MACROBLOCK_PATTERN) )
             {
-                bs_write( bs, p_n_ow[batb], 8 );
-            }
-            bs_write( bs, i_n_bit_out_cache, BITS_IN_BUF - i_n_bit_out);
-            // end saved motion data...
+                if ( first_in_slice )
+                {
+                    put_quantiser( tr );
+                    bs_write( bs, 0, 1 );
+                    macroblock_modes &= ~MACROBLOCK_QUANT;
+                }
+                putaddrinc( tr, mba_inc );
+                mba_inc = 0;
+                putmbdata( tr, macroblock_modes );
+                if ( macroblock_modes & MACROBLOCK_QUANT )
+                {
+                    put_quantiser( tr );
+                }
 
-            if (macroblock_modes & MACROBLOCK_PATTERN)
+                // put saved motion data...
+                for (batb = 0; batb < (p_n_w - p_n_ow); batb++)
+                {
+                    bs_write( bs, p_n_ow[batb], 8 );
+                }
+                bs_write( bs, i_n_bit_out_cache, BITS_IN_BUF - i_n_bit_out);
+                // end saved motion data...
+
+                if (macroblock_modes & MACROBLOCK_PATTERN)
+                {
+                    /* Write CBP */
+                    bs_write( bs, cbptable[new_coded_block_pattern].code,
+                              cbptable[new_coded_block_pattern].len );
+
+                    for ( i = 0; i < 6; i++ )
+                    {
+                        if ( new_coded_block_pattern & (1 << (5 - i)) )
+                        {
+                            putnonintrablk( bs, new_block[i] );
+                        }
+                    }
+                }
+            }
+            else
             {
-                /* Write CBP */
-                bs_write( bs, cbptable[new_coded_block_pattern].code,cbptable[new_coded_block_pattern].len);
-
-                if (new_coded_block_pattern & 0x20) putnonintrablk( bs, block[0]);
-                if (new_coded_block_pattern & 0x10) putnonintrablk( bs, block[1]);
-                if (new_coded_block_pattern & 0x08) putnonintrablk( bs, block[2]);
-                if (new_coded_block_pattern & 0x04) putnonintrablk( bs, block[3]);
-                if (new_coded_block_pattern & 0x02) putnonintrablk( bs, block[4]);
-                if (new_coded_block_pattern & 0x01) putnonintrablk( bs, block[5]);
+                /* skipped macroblock */
+                mba_inc++;
             }
         }
 
+        if (bs->p_c > bs->p_r || bs->p_w > bs->p_rw)
+        {
+            tr->b_error = 1;
+            return;
+        }
         //LOGF("\n\to: %i c: %i n: %i\n", quantizer_scale, last_coded_scale, new_quantizer_scale);
 
         NEXT_MACROBLOCK;
 
-        mba_inc = 0;
-        for( ;; )
+        first_in_slice = 0;
+        mba_local = 0;
+        for ( ; ; )
         {
-            if (bs->i_bit_in_cache >= 0x10000000)
+            if ( bs->i_bit_in_cache >= 0x10000000 )
             {
                 mba = MBA_5 + (UBITS (bs->i_bit_in_cache, 5) - 2);
                 break;
             }
-            else if (bs->i_bit_in_cache >= 0x03000000)
+            else if ( bs->i_bit_in_cache >= 0x03000000 )
             {
                 mba = MBA_11 + (UBITS (bs->i_bit_in_cache, 11) - 24);
                 break;
             }
-            else if( UBITS (bs->i_bit_in_cache, 11 ) == 8 )
+            else if ( UBITS( bs->i_bit_in_cache, 11 ) == 8 )
             {
                 /* macroblock_escape */
                 mba_inc += 33;
-                bs_copy( bs, 11);
+                mba_local += 33;
+                bs_flush(bs, 11);
             }
             else
             {
@@ -1493,16 +913,123 @@ static void mpeg2_slice( transrate_t *tr, const int code )
                 return;
             }
         }
-        bs_copy( bs, mba->len);
+        bs_flush(bs, mba->len);
         mba_inc += mba->mba;
+        mba_local += mba->mba;
 
-        while( mba_inc-- )
+        while( mba_local-- )
         {
             NEXT_MACROBLOCK;
         }
     }
 }
 
+static const uint8_t mpeg2_scan_norm[64] ATTR_ALIGN(16) = {
+    /* Zig-Zag scan pattern */
+     0,  1,  8, 16,  9,  2,  3, 10, 17, 24, 32, 25, 18, 11,  4,  5,
+    12, 19, 26, 33, 40, 48, 41, 34, 27, 20, 13,  6,  7, 14, 21, 28,
+    35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51,
+    58, 59, 52, 45, 38, 31, 39, 46, 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+static const uint8_t mpeg2_scan_alt[64] ATTR_ALIGN(16) = {
+    /* Alternate scan pattern */
+     0, 8,  16, 24,  1,  9,  2, 10, 17, 25, 32, 40, 48, 56, 57, 49,
+    41, 33, 26, 18,  3, 11,  4, 12, 19, 27, 34, 42, 50, 58, 35, 43,
+    51, 59, 20, 28,  5, 13,  6, 14, 21, 29, 36, 44, 52, 60, 37, 45,
+    53, 61, 22, 30,  7, 15, 23, 31, 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+static const int16_t default_intra_matrix[64] = {
+        8, 16, 19, 22, 26, 27, 29, 34,
+        16, 16, 22, 24, 27, 29, 34, 37,
+        19, 22, 26, 27, 29, 34, 34, 38,
+        22, 22, 26, 27, 29, 34, 37, 40,
+        22, 26, 27, 29, 32, 35, 40, 48,
+        26, 27, 29, 32, 35, 40, 48, 58,
+        26, 27, 29, 34, 38, 46, 56, 69,
+        27, 29, 35, 38, 46, 56, 69, 83
+};
+
+static int mpeg2_header_sequence( transrate_t * tr )
+{
+    bs_transrate_t *bs = &tr->bs;
+    int has_intra = 0, has_non_intra = 0;
+    int i;
+
+    i = (bs->p_c[0] << 16) | (bs->p_c[1] << 8) | bs->p_c[2];
+    tr->horizontal_size_value = i >> 12;
+    tr->vertical_size_value = i & 0xfff;
+    tr->horizontal_size_value = (tr->horizontal_size_value + 15) & ~15;
+    tr->vertical_size_value = (tr->vertical_size_value + 15) & ~15;
+    if ( !tr->horizontal_size_value || !tr->vertical_size_value )
+    {
+        return -1;
+    }
+
+    if ( tr->mpeg4_matrix )
+    {
+        if (bs->p_c[7] & 2)
+        {
+            has_intra = 1;
+            for (i = 0; i < 64; i++)
+                tr->intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+                (bs->p_c[i+7] << 7) | (bs->p_c[i+8] >> 1);
+        }
+        else
+        {
+            for (i = 0; i < 64; i++)
+                tr->intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+                default_intra_matrix[i];
+        }
+
+        if (bs->p_c[7+64] & 1)
+        {
+            has_non_intra = 1;
+            for (i = 0; i < 64; i++)
+                tr->non_intra_quantizer_matrix[mpeg2_scan_norm[i]] =
+                bs->p_c[i+8+64];
+        }
+        else
+        {
+            for (i = 0; i < 64; i++)
+                tr->non_intra_quantizer_matrix[i] = 16;
+        }
+    }
+
+    /* Write quantization matrices */
+    memcpy( bs->p_w, bs->p_c, 8 );
+    bs->p_c += 8;
+
+    if ( tr->mpeg4_matrix )
+    {
+        memset( &bs->p_w[8], 0, 128 );
+        bs->p_w[7] |= 2;
+        bs->p_w[7] &= ~1;
+        for (i = 0; i < 64; i++)
+        {
+            bs->p_w[i+7] |= mpeg4_default_intra_matrix[mpeg2_scan_norm[i]] >> 7;
+            bs->p_w[i+8] |= mpeg4_default_intra_matrix[mpeg2_scan_norm[i]] << 1;
+        }
+
+        bs->p_w[7+64] |= 1;
+        for (i = 0; i < 64; i++)
+        {
+            bs->p_w[i+8+64] |= mpeg4_default_intra_matrix[mpeg2_scan_norm[i]];
+        }
+        bs->p_w += 8 + 128;
+        bs->p_c += (has_intra + has_non_intra) * 64;
+    }
+    else
+    {
+        bs->p_w += 8;
+    }
+
+    tr->scan = mpeg2_scan_norm;
+
+    return 0;
+}
+
 /////---- end ext mpeg code
 
 static int do_next_start_code( transrate_t *tr )
@@ -1527,16 +1054,7 @@ static int do_next_start_code( transrate_t *tr )
     }
     else if (ID == 0xB3) // seq header
     {
-        tr->horizontal_size_value = (bs->p_c[0] << 4) | (bs->p_c[1] >> 4);
-        tr->vertical_size_value = ((bs->p_c[1] & 0xF) << 8) | bs->p_c[2];
-        if(!tr->horizontal_size_value || !tr->vertical_size_value )
-        {
-            return -1;
-        }
-
-        memcpy(bs->p_w, bs->p_c, 8 );
-        bs->p_c += 8;
-        bs->p_w += 8;
+        mpeg2_header_sequence(tr);
     }
     else if (ID == 0xB5) // extension
     {
@@ -1553,8 +1071,8 @@ static int do_next_start_code( transrate_t *tr )
             tr->concealment_motion_vectors = (bs->p_c[3] >> 5) & 0x1;
             tr->q_scale_type = (bs->p_c[3] >> 4) & 0x1;
             tr->intra_vlc_format = (bs->p_c[3] >> 3) & 0x1;
-            /* tr->alternate_scan = (bs->p_c[3] >> 2) & 0x1; */
-
+            if ( (bs->p_c[3] >> 2) & 0x1 )
+                tr->scan = mpeg2_scan_alt;
 
             memcpy(bs->p_w, bs->p_c, 5);
             bs->p_c += 5;
@@ -1575,15 +1093,7 @@ static int do_next_start_code( transrate_t *tr )
     {
         uint8_t *outTemp = bs->p_w, *inTemp = bs->p_c;
 
-#if 0
-        if( ( tr->picture_coding_type == B_TYPE && tr->quant_corr <  2.5f ) || // don't recompress if we're in advance!
-            ( tr->picture_coding_type == P_TYPE && tr->quant_corr < -2.5f ) ||
-            ( tr->picture_coding_type == I_TYPE && tr->quant_corr < -5.0f ) )
-#else
-        if( ( tr->picture_coding_type == B_TYPE ) ||
-            ( tr->picture_coding_type == P_TYPE && tr->level_p ) ||
-            ( tr->picture_coding_type == I_TYPE && tr->level_i ) )
-#endif
+        if( tr->qrate != 1.0 )
         {
             if( !tr->horizontal_size_value || !tr->vertical_size_value )
             {
@@ -1602,6 +1112,7 @@ static int do_next_start_code( transrate_t *tr )
 
             // begin bit level recoding
             mpeg2_slice(tr, ID);
+            if (tr->b_error) return -1;
 
             bs_flush_read( bs );
             bs_flush_write( bs );
@@ -1619,95 +1130,120 @@ static int do_next_start_code( transrate_t *tr )
 
             if (bs->p_w - outTemp > bs->p_c - inTemp) // yes that might happen, rarely
             {
+
                 /*LOGF("*** slice bigger than before !! (type: %s code: %i in : %i out : %i diff : %i)\n",
                 (picture_coding_type == I_TYPE ? "I_TYPE" : (picture_coding_type == P_TYPE ? "P_TYPE" : "B_TYPE")),
                 ID, bs->p_c - inTemp, bs->p_w - outTemp, (bs->p_w - outTemp) - (bs->p_c - inTemp));*/
 
-                // in this case, we'll just use the original slice !
-                memcpy(outTemp, inTemp, bs->p_c - inTemp);
-                bs->p_w = outTemp + (bs->p_c - inTemp);
+                if ( !tr->mpeg4_matrix )
+                {
+                    // in this case, we'll just use the original slice !
+                    memcpy(outTemp, inTemp, bs->p_c - inTemp);
+                    bs->p_w = outTemp + (bs->p_c - inTemp);
 
-                // adjust bs->i_byte_out
-                bs->i_byte_out -= (bs->p_w - outTemp) - (bs->p_c - inTemp);
+                    // adjust bs->i_byte_out
+                    bs->i_byte_out -= (bs->p_w - outTemp) - (bs->p_c - inTemp);
+                }
+                else
+                {
+                    fprintf(stderr, "bad choice for mpeg4-matrix...\n");
+                }
             }
         }
     }
     return 0;
 }
 
-void E_(process_frame)( sout_stream_t *p_stream,
-                    sout_stream_id_t *id, block_t *in, block_t **out )
+int process_frame( sout_stream_t *p_stream, sout_stream_id_t *id,
+                   block_t *in, block_t **out, int i_handicap )
 {
     transrate_t *tr = &id->tr;
     bs_transrate_t *bs = &tr->bs;
 
     block_t       *p_out;
 
-    double              next_fact_x = 1.0;
+    double        f_drift, f_fact;
+    int           i_drift;
 
-    /* The output buffer can't be bigger than the input buffer. */
-    p_out = block_New( p_stream, in->i_buffer );
+    p_out = block_New( p_stream, in->i_buffer * 3 );
 
     p_out->i_length = in->i_length;
     p_out->i_dts    = in->i_dts;
     p_out->i_pts    = in->i_pts;
     p_out->i_flags  = in->i_flags;
 
-    block_ChainAppend( out, p_out );
-
     bs->p_rw = bs->p_ow = bs->p_w = p_out->p_buffer;
     bs->p_c = bs->p_r = in->p_buffer;
     bs->p_r += in->i_buffer + 4;
-    bs->p_rw += in->i_buffer;
+    bs->p_rw += in->i_buffer * 2;
     *(in->p_buffer + in->i_buffer) = 0;
     *(in->p_buffer + in->i_buffer + 1) = 0;
     *(in->p_buffer + in->i_buffer + 2) = 1;
     *(in->p_buffer + in->i_buffer + 3) = 0;
 
     /* Calculate how late we are */
-    tr->quant_corr = 0.0 + B_HANDICAP;
-    tr->level_i = 0;
-    tr->level_p = 0;
     bs->i_byte_in = in->i_buffer;
     bs->i_byte_out  = 0;
 
-    if (tr->i_current_gop_size - in->i_buffer > 100)
+    i_drift = tr->i_current_output + tr->i_remaining_input
+                - tr->i_wanted_output;
+    f_drift = (double)i_drift / tr->i_wanted_output;
+    f_fact = (double)(tr->i_wanted_output - tr->i_current_output)
+                    / tr->i_remaining_input;
+
+    if ( in->i_flags & BLOCK_FLAG_TYPE_I )
     {
-        if (tr->i_wanted_gop_size == in->i_buffer)
-        {
-            next_fact_x = 1.0;
-        }
-        else if ( tr->i_wanted_gop_size < in->i_buffer )
+        /* This is the last picture of the GOP ; only transrate if we're
+         * very late. */
+        if ( 0 && f_drift > 0.085 )
         {
-            /* We're really late */
-            next_fact_x = 10.0;
+            tr->i_minimum_error = (f_drift - 0.085) * 50.0 * 50.0;
+            tr->i_admissible_error = (f_drift - 0.085) * 50.0 * 75.0;
+            tr->qrate = 1.0 + (f_drift - 0.085) * 50.0;
+            msg_Warn( p_stream, "transrating I %d/%d",
+                      tr->i_minimum_error, tr->i_admissible_error );
         }
         else
         {
-            next_fact_x = ((double)(tr->i_current_gop_size - in->i_buffer)) /
-                          (tr->i_wanted_gop_size - in->i_buffer);
+            tr->i_minimum_error = 0;
+            tr->i_admissible_error = 0;
+            tr->qrate = 1.0;
         }
-
-        if (next_fact_x > QUANT_I)
+    }
+    else if ( in->i_flags & BLOCK_FLAG_TYPE_P )
+    {
+        if ( f_fact < 0.8 )
         {
-            tr->level_i = 1;
+            tr->i_minimum_error = (0.8 - f_fact) * 3000.0 + i_handicap;
+            tr->i_admissible_error = (0.8 - f_fact) * 3500.0 + i_handicap;
+            tr->qrate = 1.0 + (0.8 - f_fact) * 70.0;
         }
-        if (next_fact_x > QUANT_P)
+        else
         {
-            tr->level_p = 1 + (next_fact_x - QUANT_P) / (QUANT_P_INC);
+            tr->i_minimum_error = 0;
+            tr->i_admissible_error = 0;
+            tr->qrate = 1.0;
         }
     }
-    if ( tr->i_wanted_gop_size < 0 )
-    {
-        /* We're really late */
-        tr->current_fact_x = 3.0;
-    }
     else
     {
-        tr->current_fact_x = ((double)(tr->i_current_gop_size) /
-                              (tr->i_wanted_gop_size));
+        if ( f_fact < 1.2 )
+        {
+            tr->i_minimum_error = (1.2 - f_fact) * 1750.0 + i_handicap;
+            tr->i_admissible_error = (1.2 - f_fact) * 2250.0 + i_handicap;
+            tr->qrate = 1.0 + (1.2 - f_fact) * 45.0;
+        }
+        else
+        {
+            tr->i_minimum_error = 0;
+            tr->i_admissible_error = 0;
+            tr->qrate = 1.0;
+        }
     }
 
+    tr->new_quantizer_scale = 0;
+    tr->b_error = 0;
+
     for ( ; ; )
     {
         uint8_t *p_end = &in->p_buffer[in->i_buffer];
@@ -1733,7 +1269,7 @@ void E_(process_frame)( sout_stream_t *p_stream,
                 *bs->p_w++ = *bs->p_c++;
             }
 
-            if( bs->p_c >= p_end)
+            if( bs->p_c >= p_end )
             {
                 break;
             }
@@ -1745,31 +1281,72 @@ void E_(process_frame)( sout_stream_t *p_stream,
         }
 
         /* Copy the start code */
-        memcpy(bs->p_w, bs->p_c, 3 );
+        memcpy( bs->p_w, bs->p_c, 3 );
         bs->p_c += 3;
         bs->p_w += 3;
 
-        if (do_next_start_code( tr ) )
+        if ( do_next_start_code( tr ) )
         {
             /* Error */
-            break;
+            msg_Err( p_stream, "error in do_next_start_code()" );
+            block_Release( p_out );
+            tr->i_remaining_input -= in->i_buffer;
+            tr->i_current_output += in->i_buffer;
+            return -1;
         }
-
-        tr->quant_corr = (((bs->i_byte_in - (bs->p_r - 4 - bs->p_c)) / tr->fact_x) - (bs->i_byte_out + (bs->p_w - bs->p_ow))) / REACT_DELAY + B_HANDICAP;
     }
 
     bs->i_byte_out += bs->p_w - bs->p_ow;
     p_out->i_buffer = bs->p_w - bs->p_ow;
-    tr->i_current_gop_size -= in->i_buffer;
-    tr->i_wanted_gop_size -= p_out->i_buffer;
-    tr->i_new_gop_size += bs->i_byte_out;
 
 #if 0
-    msg_Dbg( p_stream, "%d: %d -> %d (r: %f, n:%f, corr:%f)",
-             tr->picture_coding_type, in->i_buffer, p_out->i_size,
-             (float)in->i_buffer / p_out->i_size,
-             next_fact_x, tr->quant_corr);
+    if ( in->i_flags & BLOCK_FLAG_TYPE_P && f_fact < 0.8 )
+    {
+        double f_ratio = (in->i_buffer - p_out->i_buffer) / in->i_buffer;
+        if ( f_ratio < (0.8 - f_fact) * 0.1 && i_handicap < 200 )
+        {
+            block_Release( p_out );
+            return process_frame( p_stream, id, in, out, i_handicap + 50 );
+        }
+    }
+
+    if ( in->i_flags & BLOCK_FLAG_TYPE_B && f_fact < 1.1 )
+    {
+        double f_ratio = (double)(in->i_buffer - p_out->i_buffer)
+                            / in->i_buffer;
+        if ( f_ratio < (1.1 - f_fact) * 0.1 && i_handicap < 400 )
+        {
+#ifdef DEBUG_TRANSRATER
+            msg_Dbg( p_stream, "%d: %d -> %d big (f: %f d: %f)",
+                     tr->picture_coding_type, in->i_buffer, p_out->i_buffer,
+                     f_fact, f_drift);
+#endif
+            block_Release( p_out );
+            return process_frame( p_stream, id, in, out, i_handicap + 100 );
+        }
+    }
+#endif
+
+#if 0
+    {
+        int toto;
+        for ( toto = 0; toto < p_out->i_buffer; toto++ )
+            if (in->p_buffer[toto] != p_out->p_buffer[toto])
+                msg_Dbg(p_stream, "toto %d %x %x", toto, in->p_buffer[toto], p_out->p_buffer[toto]);
+    }
+#endif
+
+    block_ChainAppend( out, p_out );
+    tr->i_remaining_input -= in->i_buffer;
+    tr->i_current_output += p_out->i_buffer;
+
+#ifdef DEBUG_TRANSRATER
+    msg_Dbg( p_stream, "%d: %d -> %d (%d/%d)",
+             tr->picture_coding_type, in->i_buffer, p_out->i_buffer,
+             tr->i_minimum_error, tr->i_admissible_error );
 #endif
+
+    return 0;
 }
 
 
diff --git a/modules/stream_out/transrate/getvlc.h b/modules/stream_out/transrate/getvlc.h
index d1e8946ccb..3ac8416a1f 100644
--- a/modules/stream_out/transrate/getvlc.h
+++ b/modules/stream_out/transrate/getvlc.h
@@ -395,3 +395,4 @@ static const MBAtab MBA_11 [] = {
     { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7},
     { 7,  7}, { 7,  7}, { 7,  7}, { 7,  7}
 };
+
diff --git a/modules/stream_out/transrate/putvlc.h b/modules/stream_out/transrate/putvlc.h
index a2e9d885a4..f19610c1e1 100644
--- a/modules/stream_out/transrate/putvlc.h
+++ b/modules/stream_out/transrate/putvlc.h
@@ -1,4 +1,31 @@
-// put blk
+/* vlc.h, variable length code tables (used by routines in putvlc.c)        */
+
+/* Copyright (C) 1996, MPEG Software Simulation Group. All Rights Reserved. */
+
+/*
+ * Disclaimer of Warranty
+ *
+ * These software programs are available to the user without any license fee or
+ * royalty on an "as is" basis.  The MPEG Software Simulation Group disclaims
+ * any and all warranties, whether express, implied, or statuary, including any
+ * implied warranties or merchantability or of fitness for a particular
+ * purpose.  In no event shall the copyright-holder be liable for any
+ * incidental, punitive, or consequential damages of any kind whatsoever
+ * arising from the use of these programs.
+ *
+ * This disclaimer of warranty extends to the user of these programs and user's
+ * customers, employees, agents, transferees, successors, and assigns.
+ *
+ * The MPEG Software Simulation Group does not represent or warrant that the
+ * programs furnished hereunder are free of infringement of any third-party
+ * patents.
+ *
+ * Commercial implementations of MPEG-1 and MPEG-2 video, including shareware,
+ * are subject to royalty fees to patent holders.  Many of these patents are
+ * general enough such that they are unavoidable regardless of implementation
+ * design.
+ *
+ */
 
 /* type definitions for variable length code table entries */
 
@@ -16,6 +43,28 @@ typedef struct
 } sVLCtable;
 
 
+/* data from ISO/IEC 13818-2 DIS, Annex B, variable length code tables */
+
+/* Table B-1, variable length codes for macroblock_address_increment
+ *
+ * indexed by [macroblock_address_increment-1]
+ * 'macroblock_escape' is treated elsewhere
+ */
+
+const static VLCtable addrinctab[33]=
+{
+  {0x01,1},  {0x03,3},  {0x02,3},  {0x03,4},
+  {0x02,4},  {0x03,5},  {0x02,5},  {0x07,7},
+  {0x06,7},  {0x0b,8},  {0x0a,8},  {0x09,8},
+  {0x08,8},  {0x07,8},  {0x06,8},  {0x17,10},
+  {0x16,10}, {0x15,10}, {0x14,10}, {0x13,10},
+  {0x12,10}, {0x23,11}, {0x22,11}, {0x21,11},
+  {0x20,11}, {0x1f,11}, {0x1e,11}, {0x1d,11},
+  {0x1c,11}, {0x1b,11}, {0x1a,11}, {0x19,11},
+  {0x18,11}
+};
+
+
 /* Table B-2, B-3, B-4 variable length codes for macroblock_type
  *
  * indexed by [macroblock_type]
@@ -79,7 +128,6 @@ const static VLCtable cbptable[64]=
 };
 
 
-
 /* Table B-14, DCT coefficients table zero
  *
  * indexed by [run][level-1]
@@ -226,3 +274,27 @@ const static VLCtable dct_code_tab2a[30][5]=
   {{0x1c,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}},
   {{0x1b,16}, {0x00, 0}, {0x00, 0}, {0x00, 0}, {0x00, 0}}
 };
+
+/* MPEG-4 matrices */
+static const uint8_t mpeg4_default_intra_matrix[64] = {
+  8, 17, 18, 19, 21, 23, 25, 27,
+ 17, 18, 19, 21, 23, 25, 27, 28,
+ 20, 21, 22, 23, 24, 26, 28, 30,
+ 21, 22, 23, 24, 26, 28, 30, 32,
+ 22, 23, 24, 26, 28, 30, 32, 35,
+ 23, 24, 26, 28, 30, 32, 35, 38,
+ 25, 26, 28, 30, 32, 35, 38, 41,
+ 27, 28, 30, 32, 35, 38, 41, 45, 
+};
+
+static const uint8_t mpeg4_default_non_intra_matrix[64] = {
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 17, 18, 19, 20, 21, 22, 23, 24,
+ 18, 19, 20, 21, 22, 23, 24, 25,
+ 19, 20, 21, 22, 23, 24, 26, 27,
+ 20, 21, 22, 23, 25, 26, 27, 28,
+ 21, 22, 23, 24, 26, 27, 28, 30,
+ 22, 23, 24, 26, 27, 28, 30, 31,
+ 23, 24, 25, 27, 28, 30, 31, 33,
+};
+
diff --git a/modules/stream_out/transrate/transrate.c b/modules/stream_out/transrate/transrate.c
index c54cc9121d..c86698a260 100644
--- a/modules/stream_out/transrate/transrate.c
+++ b/modules/stream_out/transrate/transrate.c
@@ -49,9 +49,6 @@ static int               Send( sout_stream_t *, sout_stream_id_t *, block_t * );
 
 static int  transrate_video_process( sout_stream_t *, sout_stream_id_t *, block_t *, block_t ** );
 
-void E_(process_frame)( sout_stream_t *p_stream,
-               sout_stream_id_t *id, block_t *in, block_t **out );
-
 /*****************************************************************************
  * Module descriptor
  *****************************************************************************/
@@ -68,8 +65,8 @@ struct sout_stream_sys_t
 
     int             i_vbitrate;
     mtime_t         i_shaping_delay;
+    int             b_mpeg4_matrix;
 
-    mtime_t         i_first_frame;
     mtime_t         i_dts, i_pts;
 };
 
@@ -113,7 +110,11 @@ static int Open( vlc_object_t *p_this )
         }
     }
 
-    p_sys->i_first_frame = 0;
+    p_sys->b_mpeg4_matrix = 0;
+    if( sout_cfg_find( p_stream->p_cfg, "mpeg4-matrix" ) )
+    {
+        p_sys->b_mpeg4_matrix = 1;
+    }
 
     msg_Dbg( p_stream, "codec video %dkb/s max gop="I64Fd"us",
              p_sys->i_vbitrate / 1024, p_sys->i_shaping_delay );
@@ -167,7 +168,7 @@ static sout_stream_id_t * Add( sout_stream_t *p_stream, es_format_t *p_fmt )
         id->i_next_gop_size = 0;
         memset( &id->tr, 0, sizeof( transrate_t ) );
         id->tr.bs.i_byte_in = id->tr.bs.i_byte_out = 0;
-        id->tr.fact_x = 1.0;
+        id->tr.mpeg4_matrix = p_sys->b_mpeg4_matrix;
 
         /* open output stream */
         id->id = p_sys->p_out->pf_add( p_sys->p_out, p_fmt );
@@ -261,25 +262,19 @@ static int transrate_video_process( sout_stream_t *p_stream,
                                     / (id->i_next_gop_duration / 1000);
             mtime_t i_new_bitrate;
 
-            if ( i_bitrate > p_stream->p_sys->i_vbitrate )
-            {
-                tr->fact_x = (double)i_bitrate / p_stream->p_sys->i_vbitrate;
-            }
-            else
-            {
-                tr->fact_x = 1.0;
-            }
-            id->tr.i_current_gop_size = id->i_next_gop_size;
-            id->tr.i_wanted_gop_size = (p_stream->p_sys->i_vbitrate)
+            id->tr.i_total_input = id->i_next_gop_size;
+            id->tr.i_remaining_input = id->i_next_gop_size;
+            id->tr.i_wanted_output = (p_stream->p_sys->i_vbitrate)
                                     * (id->i_next_gop_duration / 1000) / 8000;
-            id->tr.i_new_gop_size = 0;
+            id->tr.i_current_output = 0;
 
             id->p_current_buffer = id->p_next_gop;
 
             while ( id->p_current_buffer != NULL )
             {
                 block_t * p_next = id->p_current_buffer->p_next;
-                if ( tr->fact_x == 1.0 )
+                if ( !p_stream->p_sys->b_mpeg4_matrix
+                       && id->tr.i_wanted_output >= id->tr.i_total_input )
                 {
                     bs->i_byte_out += id->p_current_buffer->i_buffer;
                     id->p_current_buffer->p_next = NULL;
@@ -287,25 +282,33 @@ static int transrate_video_process( sout_stream_t *p_stream,
                 }
                 else
                 {
-                    E_(process_frame)( p_stream, id, id->p_current_buffer, out );
-                    block_Release( id->p_current_buffer);
+                    if ( process_frame( p_stream, id, id->p_current_buffer,
+                                        out, 0 ) < 0 )
+                    {
+                        id->p_current_buffer->p_next = NULL;
+                        block_ChainAppend( out, id->p_current_buffer );
+                        if ( p_stream->p_sys->b_mpeg4_matrix )
+                            id->tr.i_wanted_output = id->tr.i_total_input;
+                    }
+                    else
+                    {
+                        block_Release( id->p_current_buffer );
+                    }
                 }
                 id->p_current_buffer = p_next;
             }
 
-            if ( tr->fact_x != 1.0 )
+            if ( id->tr.i_wanted_output < id->tr.i_total_input )
             {
-                i_new_bitrate = (mtime_t)tr->i_new_gop_size * 8000
-                                    / (id->i_next_gop_duration / 1001);
+                i_new_bitrate = (mtime_t)tr->i_current_output * 8000
+                                    / (id->i_next_gop_duration / 1000);
                 if (i_new_bitrate > p_stream->p_sys->i_vbitrate + 300000)
-                    msg_Err(p_stream, "%lld -> %lld (%f, r:%f) d=%lld",
-                            i_bitrate, i_new_bitrate, tr->fact_x,
-                            (float)i_bitrate / i_new_bitrate,
+                    msg_Err(p_stream, "%lld -> %lld d=%lld",
+                            i_bitrate, i_new_bitrate,
                             id->i_next_gop_duration);
-                    else
-                        msg_Dbg(p_stream, "%lld -> %lld (%f, r:%f) d=%lld",
-                            i_bitrate, i_new_bitrate, tr->fact_x,
-                            (float)i_bitrate / i_new_bitrate,
+                else
+                    msg_Dbg(p_stream, "%lld -> %lld d=%lld",
+                            i_bitrate, i_new_bitrate,
                             id->i_next_gop_duration);
             }
 
diff --git a/modules/stream_out/transrate/transrate.h b/modules/stream_out/transrate/transrate.h
index f14a773313..b826359741 100644
--- a/modules/stream_out/transrate/transrate.h
+++ b/modules/stream_out/transrate/transrate.h
@@ -64,6 +64,9 @@ typedef struct
     // seq header
     unsigned int horizontal_size_value;
     unsigned int vertical_size_value;
+    uint8_t intra_quantizer_matrix [64];
+    uint8_t non_intra_quantizer_matrix [64];
+    int mpeg4_matrix;
 
     // pic header
     unsigned int picture_coding_type;
@@ -76,7 +79,7 @@ typedef struct
     unsigned int concealment_motion_vectors;
     unsigned int q_scale_type;
     unsigned int intra_vlc_format;
-    /* unsigned int alternate_scan; */
+    const uint8_t * scan;
 
     // slice or mb
     // quantizer_scale_code
@@ -84,12 +87,16 @@ typedef struct
     unsigned int new_quantizer_scale;
     unsigned int last_coded_scale;
     int   h_offset, v_offset;
+    vlc_bool_t b_error;
 
     // mb
-    double quant_corr, fact_x, current_fact_x;
-    int level_i, level_p;
+    double qrate;
+    int i_admissible_error, i_minimum_error;
 
-    ssize_t i_current_gop_size, i_wanted_gop_size, i_new_gop_size;
+    /* input buffers */
+    ssize_t i_total_input, i_remaining_input;
+    /* output buffers */
+    ssize_t i_current_output, i_wanted_output;
 } transrate_t;
 
 
@@ -98,8 +105,8 @@ struct sout_stream_id_t
     void            *id;
     vlc_bool_t      b_transrate;
 
-    block_t   *p_current_buffer;
-    block_t   *p_next_gop;
+    block_t         *p_current_buffer;
+    block_t   	    *p_next_gop;
     mtime_t         i_next_gop_duration;
     size_t          i_next_gop_size;
 
@@ -107,3 +114,102 @@ struct sout_stream_id_t
 };
 
 
+#ifdef HAVE_BUILTIN_EXPECT
+#define likely(x) __builtin_expect ((x) != 0, 1)
+#define unlikely(x) __builtin_expect ((x) != 0, 0)
+#else
+#define likely(x) (x)
+#define unlikely(x) (x)
+#endif
+
+#define BITS_IN_BUF (8)
+
+#define LOG(msg) fprintf (stderr, msg)
+#define LOGF(format, args...) fprintf (stderr, format, args)
+
+static inline void bs_write( bs_transrate_t *s, unsigned int val, int n )
+{
+    assert(n < 32);
+    assert(!(val & (0xffffffffU << n)));
+
+    while (unlikely(n >= s->i_bit_out))
+    {
+        s->p_w[0] = (s->i_bit_out_cache << s->i_bit_out ) | (val >> (n - s->i_bit_out));
+        s->p_w++;
+        n -= s->i_bit_out;
+        s->i_bit_out_cache = 0;
+        val &= ~(0xffffffffU << n);
+        s->i_bit_out = BITS_IN_BUF;
+    }
+
+    if (likely(n))
+    {
+        s->i_bit_out_cache = (s->i_bit_out_cache << n) | val;
+        s->i_bit_out -= n;
+    }
+
+    assert(s->i_bit_out > 0);
+    assert(s->i_bit_out <= BITS_IN_BUF);
+}
+
+static inline void bs_refill( bs_transrate_t *s )
+{
+    assert((s->p_r - s->p_c) >= 1);
+    s->i_bit_in_cache |= s->p_c[0] << (24 - s->i_bit_in);
+    s->i_bit_in += 8;
+    s->p_c++;
+}
+
+static inline void bs_flush( bs_transrate_t *s, unsigned int n )
+{
+    assert(s->i_bit_in >= n);
+
+    s->i_bit_in_cache <<= n;
+    s->i_bit_in -= n;
+
+    assert( (!n) || ((n>0) && !(s->i_bit_in_cache & 0x1)) );
+
+    while (unlikely(s->i_bit_in < 24)) bs_refill( s );
+}
+
+static inline unsigned int bs_read( bs_transrate_t *s, unsigned int n )
+{
+    unsigned int Val = ((unsigned int)s->i_bit_in_cache) >> (32 - n);
+    bs_flush( s, n );
+    return Val;
+}
+
+static inline unsigned int bs_copy( bs_transrate_t *s, unsigned int n )
+{
+    unsigned int Val = bs_read( s, n);
+    bs_write(s, Val, n);
+    return Val;
+}
+
+static inline void bs_flush_read( bs_transrate_t *s )
+{
+    int i = s->i_bit_in & 0x7;
+    if( i )
+    {
+        assert(((unsigned int)s->i_bit_in_cache) >> (32 - i) == 0);
+        s->i_bit_in_cache <<= i;
+        s->i_bit_in -= i;
+    }
+    s->p_c += -1 * (s->i_bit_in >> 3);
+    s->i_bit_in = 0;
+}
+static inline void bs_flush_write( bs_transrate_t *s )
+{
+    if( s->i_bit_out != 8 ) bs_write(s, 0, s->i_bit_out);
+}
+
+int scale_quant( transrate_t *tr, double qrate );
+int transrate_mb( transrate_t *tr, RunLevel blk[6][65], RunLevel new_blk[6][65], int i_cbp, int intra );
+void get_intra_block_B14( transrate_t *tr, RunLevel *blk );
+void get_intra_block_B15( transrate_t *tr, RunLevel *blk );
+int get_non_intra_block( transrate_t *tr, RunLevel *blk );
+void putnonintrablk( bs_transrate_t *bs, RunLevel *blk);
+void putintrablk( bs_transrate_t *bs, RunLevel *blk, int vlcformat);
+
+int process_frame( sout_stream_t *p_stream, sout_stream_id_t *id,
+                   block_t *in, block_t **out, int i_handicap );