]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/vp3.c
Do not redundantly check for both CONFIG_THEORA_DECODER and CONFIG_VP3_DECODER.
[ffmpeg] / libavcodec / vp3.c
index 60ba027d102eb283318f66fcdc1d86bffeb4a4ae..47e83f2154acf38a763ae4c97714d9359b828fc2 100644 (file)
@@ -19,7 +19,7 @@
  */
 
 /**
- * @file vp3.c
+ * @file libavcodec/vp3.c
  * On2 VP3 Video Decoder
  *
  * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx)
@@ -36,7 +36,7 @@
 
 #include "avcodec.h"
 #include "dsputil.h"
-#include "bitstream.h"
+#include "get_bits.h"
 
 #include "vp3data.h"
 #include "xiph.h"
@@ -60,6 +60,7 @@ typedef struct Vp3Fragment {
     uint8_t coding_method;
     int8_t motion_x;
     int8_t motion_y;
+    uint8_t qpi;
 } Vp3Fragment;
 
 #define SB_NOT_CODED        0
@@ -134,14 +135,11 @@ typedef struct Vp3DecodeContext {
     DSPContext dsp;
     int flipped_image;
 
-    int qis[3];
-    int nqis;
-    int quality_index;
-    int last_quality_index;
+    int qps[3];
+    int nqps;
+    int last_qps[3];
 
     int superblock_count;
-    int superblock_width;
-    int superblock_height;
     int y_superblock_width;
     int y_superblock_height;
     int c_superblock_width;
@@ -193,7 +191,7 @@ typedef struct Vp3DecodeContext {
 
     /* these arrays need to be on 16-byte boundaries since SSE2 operations
      * index into them */
-    DECLARE_ALIGNED_16(int16_t, qmat[2][4][64]);        //<qmat[is_inter][plane]
+    DECLARE_ALIGNED_16(int16_t, qmat[3][2][3][64]);     //<qmat[qpi][is_inter][plane]
 
     /* This table contains superblock_count * 16 entries. Each set of 16
      * numbers corresponds to the fragment indexes 0..15 of the superblock.
@@ -230,8 +228,8 @@ typedef struct Vp3DecodeContext {
     int huff_code_size;
     uint16_t huffman_table[80][32][2];
 
-    uint32_t filter_limit_values[64];
-    int bounding_values_array[256];
+    uint8_t filter_limit_values[64];
+    DECLARE_ALIGNED_8(int, bounding_values_array[256+2]);
 } Vp3DecodeContext;
 
 /************************************************************************
@@ -256,7 +254,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
     int right_edge = 0;
     int bottom_edge = 0;
     int superblock_row_inc = 0;
-    int *hilbert = NULL;
     int mapping_index = 0;
 
     int current_macroblock;
@@ -368,7 +365,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
     current_height = 0;
     superblock_row_inc = s->macroblock_width -
         (s->y_superblock_width * 2 - s->macroblock_width);
-    hilbert = hilbert_walk_mb;
     mapping_index = 0;
     current_macroblock = -1;
     for (i = 0; i < s->u_superblock_start; i++) {
@@ -471,6 +467,7 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
         s->all_fragments[i].motion_x = 127;
         s->all_fragments[i].motion_y = 127;
         s->all_fragments[i].next_coeff= NULL;
+        s->all_fragments[i].qpi = 0;
         s->coeffs[i].index=
         s->coeffs[i].coeff=0;
         s->coeffs[i].next= NULL;
@@ -481,10 +478,10 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
  * This function sets up the dequantization tables used for a particular
  * frame.
  */
-static void init_dequantizer(Vp3DecodeContext *s)
+static void init_dequantizer(Vp3DecodeContext *s, int qpi)
 {
-    int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index];
-    int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index];
+    int ac_scale_factor = s->coded_ac_scale_factor[s->qps[qpi]];
+    int dc_scale_factor = s->coded_dc_scale_factor[s->qps[qpi]];
     int i, plane, inter, qri, bmi, bmj, qistart;
 
     for(inter=0; inter<2; inter++){
@@ -492,49 +489,59 @@ static void init_dequantizer(Vp3DecodeContext *s)
             int sum=0;
             for(qri=0; qri<s->qr_count[inter][plane]; qri++){
                 sum+= s->qr_size[inter][plane][qri];
-                if(s->quality_index <= sum)
+                if(s->qps[qpi] <= sum)
                     break;
             }
             qistart= sum - s->qr_size[inter][plane][qri];
             bmi= s->qr_base[inter][plane][qri  ];
             bmj= s->qr_base[inter][plane][qri+1];
             for(i=0; i<64; i++){
-                int coeff= (  2*(sum    -s->quality_index)*s->base_matrix[bmi][i]
-                            - 2*(qistart-s->quality_index)*s->base_matrix[bmj][i]
+                int coeff= (  2*(sum    -s->qps[qpi])*s->base_matrix[bmi][i]
+                            - 2*(qistart-s->qps[qpi])*s->base_matrix[bmj][i]
                             + s->qr_size[inter][plane][qri])
                            / (2*s->qr_size[inter][plane][qri]);
 
                 int qmin= 8<<(inter + !i);
                 int qscale= i ? ac_scale_factor : dc_scale_factor;
 
-                s->qmat[inter][plane][s->dsp.idct_permutation[i]]= av_clip((qscale * coeff)/100 * 4, qmin, 4096);
+                s->qmat[qpi][inter][plane][s->dsp.idct_permutation[i]]= av_clip((qscale * coeff)/100 * 4, qmin, 4096);
             }
+            // all DC coefficients use the same quant so as not to interfere with DC prediction
+            s->qmat[qpi][inter][plane][0] = s->qmat[0][inter][plane][0];
         }
     }
 
-    memset(s->qscale_table, (FFMAX(s->qmat[0][0][1], s->qmat[0][1][1])+8)/16, 512); //FIXME finetune
+    memset(s->qscale_table, (FFMAX(s->qmat[0][0][0][1], s->qmat[0][0][1][1])+8)/16, 512); //FIXME finetune
 }
 
 /*
  * This function initializes the loop filter boundary limits if the frame's
  * quality index is different from the previous frame's.
+ *
+ * The filter_limit_values may not be larger than 127.
  */
 static void init_loop_filter(Vp3DecodeContext *s)
 {
     int *bounding_values= s->bounding_values_array+127;
     int filter_limit;
     int x;
+    int value;
 
-    filter_limit = s->filter_limit_values[s->quality_index];
+    filter_limit = s->filter_limit_values[s->qps[0]];
 
     /* set up the bounding values */
     memset(s->bounding_values_array, 0, 256 * sizeof(int));
     for (x = 0; x < filter_limit; x++) {
-        bounding_values[-x - filter_limit] = -filter_limit + x;
         bounding_values[-x] = -x;
         bounding_values[x] = x;
-        bounding_values[x + filter_limit] = filter_limit - x;
     }
+    for (x = value = filter_limit; x < 128 && value; x++, value--) {
+        bounding_values[ x] =  value;
+        bounding_values[-x] = -value;
+    }
+    if (value)
+        bounding_values[128] = value;
+    bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202;
 }
 
 /*
@@ -742,6 +749,8 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
 
         /* is it a custom coding scheme? */
         if (scheme == 0) {
+            for (i = 0; i < 8; i++)
+                custom_mode_alphabet[i] = MODE_INTER_NO_MV;
             for (i = 0; i < 8; i++)
                 custom_mode_alphabet[get_bits(gb, 3)] = i;
         }
@@ -811,152 +820,152 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
     int current_macroblock;
     int current_fragment;
 
-    if (s->keyframe) {
-    } else {
-        memset(motion_x, 0, 6 * sizeof(int));
-        memset(motion_y, 0, 6 * sizeof(int));
+    if (s->keyframe)
+        return 0;
 
-        /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
-        coding_mode = get_bits1(gb);
+    memset(motion_x, 0, 6 * sizeof(int));
+    memset(motion_y, 0, 6 * sizeof(int));
 
-        /* iterate through all of the macroblocks that contain 1 or more
-         * coded fragments */
-        for (i = 0; i < s->u_superblock_start; i++) {
+    /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */
+    coding_mode = get_bits1(gb);
 
-            for (j = 0; j < 4; j++) {
-                current_macroblock = s->superblock_macroblocks[i * 4 + j];
-                if ((current_macroblock == -1) ||
-                    (s->macroblock_coding[current_macroblock] == MODE_COPY))
-                    continue;
-                if (current_macroblock >= s->macroblock_count) {
-                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad macroblock number (%d >= %d)\n",
-                        current_macroblock, s->macroblock_count);
-                    return 1;
-                }
+    /* iterate through all of the macroblocks that contain 1 or more
+     * coded fragments */
+    for (i = 0; i < s->u_superblock_start; i++) {
 
-                current_fragment = s->macroblock_fragments[current_macroblock * 6];
-                if (current_fragment >= s->fragment_count) {
-                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d\n",
-                        current_fragment, s->fragment_count);
-                    return 1;
-                }
-                switch (s->macroblock_coding[current_macroblock]) {
-
-                case MODE_INTER_PLUS_MV:
-                case MODE_GOLDEN_MV:
-                    /* all 6 fragments use the same motion vector */
-                    if (coding_mode == 0) {
-                        motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
-                        motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
-                    } else {
-                        motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)];
-                        motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)];
-                    }
+        for (j = 0; j < 4; j++) {
+            current_macroblock = s->superblock_macroblocks[i * 4 + j];
+            if ((current_macroblock == -1) ||
+                (s->macroblock_coding[current_macroblock] == MODE_COPY))
+                continue;
+            if (current_macroblock >= s->macroblock_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad macroblock number (%d >= %d)\n",
+                    current_macroblock, s->macroblock_count);
+                return 1;
+            }
 
-                    for (k = 1; k < 6; k++) {
-                        motion_x[k] = motion_x[0];
-                        motion_y[k] = motion_y[0];
-                    }
+            current_fragment = s->macroblock_fragments[current_macroblock * 6];
+            if (current_fragment >= s->fragment_count) {
+                av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d\n",
+                    current_fragment, s->fragment_count);
+                return 1;
+            }
+            switch (s->macroblock_coding[current_macroblock]) {
+
+            case MODE_INTER_PLUS_MV:
+            case MODE_GOLDEN_MV:
+                /* all 6 fragments use the same motion vector */
+                if (coding_mode == 0) {
+                    motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                    motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                } else {
+                    motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)];
+                    motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)];
+                }
 
-                    /* vector maintenance, only on MODE_INTER_PLUS_MV */
-                    if (s->macroblock_coding[current_macroblock] ==
-                        MODE_INTER_PLUS_MV) {
-                        prior_last_motion_x = last_motion_x;
-                        prior_last_motion_y = last_motion_y;
-                        last_motion_x = motion_x[0];
-                        last_motion_y = motion_y[0];
-                    }
-                    break;
+                for (k = 1; k < 6; k++) {
+                    motion_x[k] = motion_x[0];
+                    motion_y[k] = motion_y[0];
+                }
 
-                case MODE_INTER_FOURMV:
-                    /* vector maintenance */
+                /* vector maintenance, only on MODE_INTER_PLUS_MV */
+                if (s->macroblock_coding[current_macroblock] ==
+                    MODE_INTER_PLUS_MV) {
                     prior_last_motion_x = last_motion_x;
                     prior_last_motion_y = last_motion_y;
+                    last_motion_x = motion_x[0];
+                    last_motion_y = motion_y[0];
+                }
+                break;
 
-                    /* fetch 4 vectors from the bitstream, one for each
-                     * Y fragment, then average for the C fragment vectors */
-                    motion_x[4] = motion_y[4] = 0;
-                    for (k = 0; k < 4; k++) {
-                        for (l = 0; l < s->coded_fragment_list_index; l++)
-                            if (s->coded_fragment_list[l] == s->macroblock_fragments[6*current_macroblock + k])
-                                break;
-                        if (l < s->coded_fragment_list_index) {
-                            if (coding_mode == 0) {
-                                motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
-                                motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
-                            } else {
-                                motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)];
-                                motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)];
-                            }
-                            last_motion_x = motion_x[k];
-                            last_motion_y = motion_y[k];
+            case MODE_INTER_FOURMV:
+                /* vector maintenance */
+                prior_last_motion_x = last_motion_x;
+                prior_last_motion_y = last_motion_y;
+
+                /* fetch 4 vectors from the bitstream, one for each
+                 * Y fragment, then average for the C fragment vectors */
+                motion_x[4] = motion_y[4] = 0;
+                for (k = 0; k < 4; k++) {
+                    for (l = 0; l < s->coded_fragment_list_index; l++)
+                        if (s->coded_fragment_list[l] == s->macroblock_fragments[6*current_macroblock + k])
+                            break;
+                    if (l < s->coded_fragment_list_index) {
+                        if (coding_mode == 0) {
+                            motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
+                            motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)];
                         } else {
-                            motion_x[k] = 0;
-                            motion_y[k] = 0;
+                            motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)];
+                            motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)];
                         }
-                        motion_x[4] += motion_x[k];
-                        motion_y[4] += motion_y[k];
+                        last_motion_x = motion_x[k];
+                        last_motion_y = motion_y[k];
+                    } else {
+                        motion_x[k] = 0;
+                        motion_y[k] = 0;
                     }
+                    motion_x[4] += motion_x[k];
+                    motion_y[4] += motion_y[k];
+                }
 
-                    motion_x[5]=
-                    motion_x[4]= RSHIFT(motion_x[4], 2);
-                    motion_y[5]=
-                    motion_y[4]= RSHIFT(motion_y[4], 2);
-                    break;
+                motion_x[5]=
+                motion_x[4]= RSHIFT(motion_x[4], 2);
+                motion_y[5]=
+                motion_y[4]= RSHIFT(motion_y[4], 2);
+                break;
 
-                case MODE_INTER_LAST_MV:
-                    /* all 6 fragments use the last motion vector */
-                    motion_x[0] = last_motion_x;
-                    motion_y[0] = last_motion_y;
-                    for (k = 1; k < 6; k++) {
-                        motion_x[k] = motion_x[0];
-                        motion_y[k] = motion_y[0];
-                    }
+            case MODE_INTER_LAST_MV:
+                /* all 6 fragments use the last motion vector */
+                motion_x[0] = last_motion_x;
+                motion_y[0] = last_motion_y;
+                for (k = 1; k < 6; k++) {
+                    motion_x[k] = motion_x[0];
+                    motion_y[k] = motion_y[0];
+                }
 
-                    /* no vector maintenance (last vector remains the
-                     * last vector) */
-                    break;
+                /* no vector maintenance (last vector remains the
+                 * last vector) */
+                break;
 
-                case MODE_INTER_PRIOR_LAST:
-                    /* all 6 fragments use the motion vector prior to the
-                     * last motion vector */
-                    motion_x[0] = prior_last_motion_x;
-                    motion_y[0] = prior_last_motion_y;
-                    for (k = 1; k < 6; k++) {
-                        motion_x[k] = motion_x[0];
-                        motion_y[k] = motion_y[0];
-                    }
+            case MODE_INTER_PRIOR_LAST:
+                /* all 6 fragments use the motion vector prior to the
+                 * last motion vector */
+                motion_x[0] = prior_last_motion_x;
+                motion_y[0] = prior_last_motion_y;
+                for (k = 1; k < 6; k++) {
+                    motion_x[k] = motion_x[0];
+                    motion_y[k] = motion_y[0];
+                }
 
-                    /* vector maintenance */
-                    prior_last_motion_x = last_motion_x;
-                    prior_last_motion_y = last_motion_y;
-                    last_motion_x = motion_x[0];
-                    last_motion_y = motion_y[0];
-                    break;
+                /* vector maintenance */
+                prior_last_motion_x = last_motion_x;
+                prior_last_motion_y = last_motion_y;
+                last_motion_x = motion_x[0];
+                last_motion_y = motion_y[0];
+                break;
 
-                default:
-                    /* covers intra, inter without MV, golden without MV */
-                    memset(motion_x, 0, 6 * sizeof(int));
-                    memset(motion_y, 0, 6 * sizeof(int));
+            default:
+                /* covers intra, inter without MV, golden without MV */
+                memset(motion_x, 0, 6 * sizeof(int));
+                memset(motion_y, 0, 6 * sizeof(int));
 
-                    /* no vector maintenance */
-                    break;
-                }
+                /* no vector maintenance */
+                break;
+            }
 
-                /* assign the motion vectors to the correct fragments */
-                for (k = 0; k < 6; k++) {
-                    current_fragment =
-                        s->macroblock_fragments[current_macroblock * 6 + k];
-                    if (current_fragment == -1)
-                        continue;
-                    if (current_fragment >= s->fragment_count) {
-                        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d)\n",
-                            current_fragment, s->fragment_count);
-                        return 1;
-                    }
-                    s->all_fragments[current_fragment].motion_x = motion_x[k];
-                    s->all_fragments[current_fragment].motion_y = motion_y[k];
+            /* assign the motion vectors to the correct fragments */
+            for (k = 0; k < 6; k++) {
+                current_fragment =
+                    s->macroblock_fragments[current_macroblock * 6 + k];
+                if (current_fragment == -1)
+                    continue;
+                if (current_fragment >= s->fragment_count) {
+                    av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vectors(): bad fragment number (%d >= %d)\n",
+                        current_fragment, s->fragment_count);
+                    return 1;
                 }
+                s->all_fragments[current_fragment].motion_x = motion_x[k];
+                s->all_fragments[current_fragment].motion_y = motion_y[k];
             }
         }
     }
@@ -964,6 +973,47 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
     return 0;
 }
 
+static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb)
+{
+    int qpi, i, j, bit, run_length, blocks_decoded, num_blocks_at_qpi;
+    int num_blocks = s->coded_fragment_list_index;
+
+    for (qpi = 0; qpi < s->nqps-1 && num_blocks > 0; qpi++) {
+        i = blocks_decoded = num_blocks_at_qpi = 0;
+
+        bit = get_bits1(gb);
+
+        do {
+            run_length = get_vlc2(gb, s->superblock_run_length_vlc.table, 6, 2) + 1;
+            if (run_length == 34)
+                run_length += get_bits(gb, 12);
+            blocks_decoded += run_length;
+
+            if (!bit)
+                num_blocks_at_qpi += run_length;
+
+            for (j = 0; j < run_length; i++) {
+                if (i > s->coded_fragment_list_index)
+                    return -1;
+
+                if (s->all_fragments[s->coded_fragment_list[i]].qpi == qpi) {
+                    s->all_fragments[s->coded_fragment_list[i]].qpi += bit;
+                    j++;
+                }
+            }
+
+            if (run_length == 4129)
+                bit = get_bits1(gb);
+            else
+                bit ^= 1;
+        } while (blocks_decoded < num_blocks);
+
+        num_blocks -= num_blocks_at_qpi;
+    }
+
+    return 0;
+}
+
 /*
  * This function is called by unpack_dct_coeffs() to extract the VLCs from
  * the bitstream. The VLCs encode tokens which are used to unpack DCT
@@ -1283,12 +1333,6 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
     }
 }
 
-
-static void horizontal_filter(unsigned char *first_pixel, int stride,
-    int *bounding_values);
-static void vertical_filter(unsigned char *first_pixel, int stride,
-    int *bounding_values);
-
 /*
  * Perform the final rendering for a particular slice of data.
  * The slice number ranges from 0..(macroblock_height - 1).
@@ -1401,22 +1445,22 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                                 motion_source + stride + 1 + d,
                                 stride, 8);
                         }
-                        dequantizer = s->qmat[1][plane];
+                        dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane];
                     }else{
-                        dequantizer = s->qmat[0][plane];
+                        dequantizer = s->qmat[s->all_fragments[i].qpi][0][plane];
                     }
 
                     /* dequantize the DCT coefficients */
                     if(s->avctx->idct_algo==FF_IDCT_VP3){
                         Coeff *coeff= s->coeffs + i;
-                        memset(block, 0, sizeof(block));
+                        s->dsp.clear_block(block);
                         while(coeff->next){
                             block[coeff->index]= coeff->coeff * dequantizer[coeff->index];
                             coeff= coeff->next;
                         }
                     }else{
                         Coeff *coeff= s->coeffs + i;
-                        memset(block, 0, sizeof(block));
+                        s->dsp.clear_block(block);
                         while(coeff->next){
                             block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2;
                             coeff= coeff->next;
@@ -1495,39 +1539,6 @@ static void render_slice(Vp3DecodeContext *s, int slice)
     emms_c();
 }
 
-static void horizontal_filter(unsigned char *first_pixel, int stride,
-    int *bounding_values)
-{
-    unsigned char *end;
-    int filter_value;
-
-    for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
-        filter_value =
-            (first_pixel[-2] - first_pixel[ 1])
-         +3*(first_pixel[ 0] - first_pixel[-1]);
-        filter_value = bounding_values[(filter_value + 4) >> 3];
-        first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
-        first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
-    }
-}
-
-static void vertical_filter(unsigned char *first_pixel, int stride,
-    int *bounding_values)
-{
-    unsigned char *end;
-    int filter_value;
-    const int nstride= -stride;
-
-    for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
-        filter_value =
-            (first_pixel[2 * nstride] - first_pixel[ stride])
-         +3*(first_pixel[0          ] - first_pixel[nstride]);
-        filter_value = bounding_values[(filter_value + 4) >> 3];
-        first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
-        first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value);
-    }
-}
-
 static void apply_loop_filter(Vp3DecodeContext *s)
 {
     int plane;
@@ -1569,7 +1580,7 @@ static void apply_loop_filter(Vp3DecodeContext *s)
                 /* do not perform left edge filter for left columns frags */
                 if ((x > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
-                    horizontal_filter(
+                    s->dsp.vp3_h_loop_filter(
                         plane_data + s->all_fragments[fragment].first_pixel,
                         stride, bounding_values);
                 }
@@ -1577,7 +1588,7 @@ static void apply_loop_filter(Vp3DecodeContext *s)
                 /* do not perform top edge filter for top row fragments */
                 if ((y > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
-                    vertical_filter(
+                    s->dsp.vp3_v_loop_filter(
                         plane_data + s->all_fragments[fragment].first_pixel,
                         stride, bounding_values);
                 }
@@ -1588,7 +1599,7 @@ static void apply_loop_filter(Vp3DecodeContext *s)
                 if ((x < width - 1) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
-                    horizontal_filter(
+                    s->dsp.vp3_h_loop_filter(
                         plane_data + s->all_fragments[fragment + 1].first_pixel,
                         stride, bounding_values);
                 }
@@ -1599,7 +1610,7 @@ static void apply_loop_filter(Vp3DecodeContext *s)
                 if ((y < height - 1) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
-                    vertical_filter(
+                    s->dsp.vp3_v_loop_filter(
                         plane_data + s->all_fragments[fragment + width].first_pixel,
                         stride, bounding_values);
                 }
@@ -1676,9 +1687,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
         s->version = 1;
 
     s->avctx = avctx;
-    s->width = (avctx->width + 15) & 0xFFFFFFF0;
-    s->height = (avctx->height + 15) & 0xFFFFFFF0;
+    s->width = FFALIGN(avctx->width, 16);
+    s->height = FFALIGN(avctx->height, 16);
     avctx->pix_fmt = PIX_FMT_YUV420P;
+    avctx->chroma_sample_location = AVCHROMA_LOC_CENTER;
     if(avctx->idct_algo==FF_IDCT_AUTO)
         avctx->idct_algo=FF_IDCT_VP3;
     dsputil_init(&s->dsp, avctx);
@@ -1687,7 +1699,8 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
 
     /* initialize to an impossible value which will force a recalculation
      * in the first frame decode */
-    s->quality_index = -1;
+    for (i = 0; i < 3; i++)
+        s->qps[i] = -1;
 
     s->y_superblock_width = (s->width + 31) / 32;
     s->y_superblock_height = (s->height + 31) / 32;
@@ -1775,29 +1788,34 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
         for (i = 0; i < 16; i++) {
 
             /* DC histograms */
-            init_vlc(&s->dc_vlc[i], 5, 32,
+            if (init_vlc(&s->dc_vlc[i], 5, 32,
                 &s->huffman_table[i][0][1], 4, 2,
-                &s->huffman_table[i][0][0], 4, 2, 0);
+                &s->huffman_table[i][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 1 AC histograms */
-            init_vlc(&s->ac_vlc_1[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_1[i], 5, 32,
                 &s->huffman_table[i+16][0][1], 4, 2,
-                &s->huffman_table[i+16][0][0], 4, 2, 0);
+                &s->huffman_table[i+16][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 2 AC histograms */
-            init_vlc(&s->ac_vlc_2[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_2[i], 5, 32,
                 &s->huffman_table[i+16*2][0][1], 4, 2,
-                &s->huffman_table[i+16*2][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*2][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 3 AC histograms */
-            init_vlc(&s->ac_vlc_3[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_3[i], 5, 32,
                 &s->huffman_table[i+16*3][0][1], 4, 2,
-                &s->huffman_table[i+16*3][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*3][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
 
             /* group 4 AC histograms */
-            init_vlc(&s->ac_vlc_4[i], 5, 32,
+            if (init_vlc(&s->ac_vlc_4[i], 5, 32,
                 &s->huffman_table[i+16*4][0][1], 4, 2,
-                &s->huffman_table[i+16*4][0][0], 4, 2, 0);
+                &s->huffman_table[i+16*4][0][0], 4, 2, 0) < 0)
+                goto vlc_fail;
         }
     }
 
@@ -1831,6 +1849,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
     }
 
     return 0;
+
+vlc_fail:
+    av_log(avctx, AV_LOG_FATAL, "Invalid huffman table\n");
+    return -1;
 }
 
 /*
@@ -1838,8 +1860,10 @@ static av_cold int vp3_decode_init(AVCodecContext *avctx)
  */
 static int vp3_decode_frame(AVCodecContext *avctx,
                             void *data, int *data_size,
-                            const uint8_t *buf, int buf_size)
+                            AVPacket *avpkt)
 {
+    const uint8_t *buf = avpkt->data;
+    int buf_size = avpkt->size;
     Vp3DecodeContext *s = avctx->priv_data;
     GetBitContext gb;
     static int counter = 0;
@@ -1856,24 +1880,29 @@ static int vp3_decode_frame(AVCodecContext *avctx,
     s->keyframe = !get_bits1(&gb);
     if (!s->theora)
         skip_bits(&gb, 1);
-    s->last_quality_index = s->quality_index;
+    for (i = 0; i < 3; i++)
+        s->last_qps[i] = s->qps[i];
 
-    s->nqis=0;
+    s->nqps=0;
     do{
-        s->qis[s->nqis++]= get_bits(&gb, 6);
-    } while(s->theora >= 0x030200 && s->nqis<3 && get_bits1(&gb));
-
-    s->quality_index= s->qis[0];
+        s->qps[s->nqps++]= get_bits(&gb, 6);
+    } while(s->theora >= 0x030200 && s->nqps<3 && get_bits1(&gb));
+    for (i = s->nqps; i < 3; i++)
+        s->qps[i] = -1;
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
-            s->keyframe?"key":"", counter, s->quality_index);
+            s->keyframe?"key":"", counter, s->qps[0]);
     counter++;
 
-    if (s->quality_index != s->last_quality_index) {
-        init_dequantizer(s);
+    if (s->qps[0] != s->last_qps[0])
         init_loop_filter(s);
-    }
+
+    for (i = 0; i < s->nqps; i++)
+        // reinit all dequantizers if the first one changed, because
+        // the DC of the first quantizer must be used for all matrices
+        if (s->qps[i] != s->last_qps[i] || s->qps[0] != s->last_qps[0])
+            init_dequantizer(s, i);
 
     if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe)
         return buf_size;
@@ -1953,6 +1982,10 @@ static int vp3_decode_frame(AVCodecContext *avctx,
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n");
         return -1;
     }
+    if (unpack_block_qpis(s, &gb)){
+        av_log(s->avctx, AV_LOG_ERROR, "error in unpack_block_qpis\n");
+        return -1;
+    }
     if (unpack_dct_coeffs(s, &gb)){
         av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n");
         return -1;
@@ -2052,16 +2085,18 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
         }
         s->huff_code_size++;
         s->hbits <<= 1;
-        read_huffman_tree(avctx, gb);
+        if (read_huffman_tree(avctx, gb))
+            return -1;
         s->hbits |= 1;
-        read_huffman_tree(avctx, gb);
+        if (read_huffman_tree(avctx, gb))
+            return -1;
         s->hbits >>= 1;
         s->huff_code_size--;
     }
     return 0;
 }
 
-#ifdef CONFIG_THEORA_DECODER
+#if CONFIG_THEORA_DECODER
 static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
 {
     Vp3DecodeContext *s = avctx->priv_data;
@@ -2078,8 +2113,8 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
         av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n");
     }
 
-    s->width = get_bits(gb, 16) << 4;
-    s->height = get_bits(gb, 16) << 4;
+    visible_width  = s->width  = get_bits(gb, 16) << 4;
+    visible_height = s->height = get_bits(gb, 16) << 4;
 
     if(avcodec_check_dimensions(avctx, s->width, s->height)){
         av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
@@ -2096,10 +2131,10 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
         skip_bits(gb, 32); /* total number of macroblocks in a frame */
     }
 
-    visible_width  = get_bits_long(gb, 24);
-    visible_height = get_bits_long(gb, 24);
-
     if (s->theora >= 0x030200) {
+        visible_width  = get_bits_long(gb, 24);
+        visible_height = get_bits_long(gb, 24);
+
         skip_bits(gb, 8); /* offset x */
         skip_bits(gb, 8); /* offset y */
     }
@@ -2145,8 +2180,13 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
     if (s->theora >= 0x030200) {
         n = get_bits(gb, 3);
         /* loop filter limit values table */
-        for (i = 0; i < 64; i++)
+        for (i = 0; i < 64; i++) {
             s->filter_limit_values[i] = get_bits(gb, n);
+            if (s->filter_limit_values[i] > 127) {
+                av_log(avctx, AV_LOG_ERROR, "filter limit value too large (%i > 127), clamping\n", s->filter_limit_values[i]);
+                s->filter_limit_values[i] = 127;
+            }
+        }
     }
 
     if (s->theora >= 0x030200)
@@ -2230,9 +2270,11 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
         s->huff_code_size = 1;
         if (!get_bits1(gb)) {
             s->hbits = 0;
-            read_huffman_tree(avctx, gb);
+            if(read_huffman_tree(avctx, gb))
+                return -1;
             s->hbits = 1;
-            read_huffman_tree(avctx, gb);
+            if(read_huffman_tree(avctx, gb))
+                return -1;
         }
     }
 
@@ -2241,7 +2283,7 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
     return 0;
 }
 
-static int theora_decode_init(AVCodecContext *avctx)
+static av_cold int theora_decode_init(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
     GetBitContext gb;
@@ -2276,7 +2318,7 @@ static int theora_decode_init(AVCodecContext *avctx)
      }
 
     // FIXME: Check for this as well.
-    skip_bits(&gb, 6*8); /* "theora" */
+    skip_bits_long(&gb, 6*8); /* "theora" */
 
     switch(ptype)
     {
@@ -2288,7 +2330,8 @@ static int theora_decode_init(AVCodecContext *avctx)
 //            theora_decode_comments(avctx, gb);
             break;
         case 0x82:
-            theora_decode_tables(avctx, &gb);
+            if (theora_decode_tables(avctx, &gb))
+                return -1;
             break;
         default:
             av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80);
@@ -2300,8 +2343,7 @@ static int theora_decode_init(AVCodecContext *avctx)
         break;
   }
 
-    vp3_decode_init(avctx);
-    return 0;
+    return vp3_decode_init(avctx);
 }
 
 AVCodec theora_decoder = {
@@ -2313,7 +2355,7 @@ AVCodec theora_decoder = {
     NULL,
     vp3_decode_end,
     vp3_decode_frame,
-    0,
+    CODEC_CAP_DR1,
     NULL,
     .long_name = NULL_IF_CONFIG_SMALL("Theora"),
 };
@@ -2328,7 +2370,7 @@ AVCodec vp3_decoder = {
     NULL,
     vp3_decode_end,
     vp3_decode_frame,
-    0,
+    CODEC_CAP_DR1,
     NULL,
     .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"),
 };