X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fvp3.c;h=b4699b7aea2621d1d267ed0472878198e9a0f5e0;hb=7c2e31d1f0198fad23cddbd8f206a04173fe6d0d;hp=23d97bf9cc62008ef8b635fb45e30bb9b8a9bded;hpb=f66e4f5f9eac8eb022fad4f85d8d2e99b26c254f;p=ffmpeg diff --git a/libavcodec/vp3.c b/libavcodec/vp3.c index 23d97bf9cc6..b4699b7aea2 100644 --- a/libavcodec/vp3.c +++ b/libavcodec/vp3.c @@ -16,16 +16,15 @@ * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - * */ /** - * @file vp3.c + * @file libavcodec/vp3.c * On2 VP3 Video Decoder * * VP3 Video Decoder by Mike Melanson (mike at multimedia.cx) * For more information about the VP3 coding process, visit: - * http://multimedia.cx/ + * http://wiki.multimedia.cx/index.php?title=On2_VP3 * * Theora decoder by Alex Beregszaszi */ @@ -33,108 +32,17 @@ #include #include #include -#include -#include "common.h" #include "avcodec.h" #include "dsputil.h" -#include "mpegvideo.h" +#include "get_bits.h" #include "vp3data.h" +#include "xiph.h" #define FRAGMENT_PIXELS 8 -/* - * Debugging Variables - * - * Define one or more of the following compile-time variables to 1 to obtain - * elaborate information about certain aspects of the decoding process. - * - * KEYFRAMES_ONLY: set this to 1 to only see keyframes (VP3 slideshow mode) - * DEBUG_VP3: high-level decoding flow - * DEBUG_INIT: initialization parameters - * DEBUG_DEQUANTIZERS: display how the dequanization tables are built - * DEBUG_BLOCK_CODING: unpacking the superblock/macroblock/fragment coding - * DEBUG_MODES: unpacking the coding modes for individual fragments - * DEBUG_VECTORS: display the motion vectors - * DEBUG_TOKEN: display exhaustive information about each DCT token - * DEBUG_VLC: display the VLCs as they are extracted from the stream - * DEBUG_DC_PRED: display the process of reversing DC prediction - * DEBUG_IDCT: show every detail of the IDCT process - */ - -#define KEYFRAMES_ONLY 0 - -#define DEBUG_VP3 0 -#define DEBUG_INIT 0 -#define DEBUG_DEQUANTIZERS 0 -#define DEBUG_BLOCK_CODING 0 -#define DEBUG_MODES 0 -#define DEBUG_VECTORS 0 -#define DEBUG_TOKEN 0 -#define DEBUG_VLC 0 -#define DEBUG_DC_PRED 0 -#define DEBUG_IDCT 0 - -#if DEBUG_VP3 -#define debug_vp3(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_vp3(const char *format, ...) { } -#endif - -#if DEBUG_INIT -#define debug_init(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_init(const char *format, ...) { } -#endif - -#if DEBUG_DEQUANTIZERS -#define debug_dequantizers(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_dequantizers(const char *format, ...) { } -#endif - -#if DEBUG_BLOCK_CODING -#define debug_block_coding(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_block_coding(const char *format, ...) { } -#endif - -#if DEBUG_MODES -#define debug_modes(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_modes(const char *format, ...) { } -#endif - -#if DEBUG_VECTORS -#define debug_vectors(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_vectors(const char *format, ...) { } -#endif - -#if DEBUG_TOKEN -#define debug_token(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_token(const char *format, ...) { } -#endif - -#if DEBUG_VLC -#define debug_vlc(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_vlc(const char *format, ...) { } -#endif - -#if DEBUG_DC_PRED -#define debug_dc_pred(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_dc_pred(const char *format, ...) { } -#endif - -#if DEBUG_IDCT -#define debug_idct(args...) av_log(NULL, AV_LOG_DEBUG, ## args) -#else -static inline void debug_idct(const char *format, ...) { } -#endif +static av_cold int vp3_decode_end(AVCodecContext *avctx); typedef struct Coeff { struct Coeff *next; @@ -145,15 +53,10 @@ typedef struct Coeff { //FIXME split things out into their own arrays typedef struct Vp3Fragment { Coeff *next_coeff; - /* address of first pixel taking into account which plane the fragment - * lives on as well as the plane stride */ - int first_pixel; - /* this is the macroblock that the fragment belongs to */ - uint16_t macroblock; uint8_t coding_method; - uint8_t coeff_count; int8_t motion_x; int8_t motion_y; + uint8_t qpi; } Vp3Fragment; #define SB_NOT_CODED 0 @@ -174,11 +77,8 @@ typedef struct Vp3Fragment { #define MODE_COPY 8 /* There are 6 preset schemes, plus a free-form scheme */ -static int ModeAlphabet[7][CODING_MODE_COUNT] = +static const int ModeAlphabet[6][CODING_MODE_COUNT] = { - /* this is the custom scheme */ - { 0, 0, 0, 0, 0, 0, 0, 0 }, - /* scheme 1: Last motion vector dominates */ { MODE_INTER_LAST_MV, MODE_INTER_PRIOR_LAST, MODE_INTER_PLUS_MV, MODE_INTER_NO_MV, @@ -230,15 +130,13 @@ typedef struct Vp3DecodeContext { int keyframe; DSPContext dsp; int flipped_image; + int last_slice_end; - int qis[3]; - int nqis; - int quality_index; - int last_quality_index; + int qps[3]; + int nqps; + int last_qps[3]; int superblock_count; - int superblock_width; - int superblock_height; int y_superblock_width; int y_superblock_height; int c_superblock_width; @@ -256,9 +154,11 @@ typedef struct Vp3DecodeContext { int fragment_height; Vp3Fragment *all_fragments; + uint8_t *coeff_counts; Coeff *coeffs; Coeff *next_coeff; int fragment_start[3]; + int data_offset[3]; ScanTable scantable; @@ -270,11 +170,18 @@ typedef struct Vp3DecodeContext { uint8_t qr_size [2][3][64]; uint16_t qr_base[2][3][64]; - /* this is a list of indices into the all_fragments array indicating + /* this is a list of indexes into the all_fragments array indicating * which of the fragments are coded */ int *coded_fragment_list; int coded_fragment_list_index; - int pixel_addresses_inited; + + /* track which fragments have already been decoded; called 'fast' + * because this data structure avoids having to iterate through every + * fragment in coded_fragment_list; once a fragment has been fully + * decoded, it is removed from this list */ + int *fast_fragment_list; + int fragment_list_y_head; + int fragment_list_c_head; VLC dc_vlc[16]; VLC ac_vlc_1[16]; @@ -289,24 +196,14 @@ typedef struct Vp3DecodeContext { /* these arrays need to be on 16-byte boundaries since SSE2 operations * index into them */ - DECLARE_ALIGNED_16(int16_t, qmat[2][4][64]); //macroblock_width; hilbert_walk_mb[2] = 1; @@ -391,9 +275,6 @@ static int init_block_mapping(Vp3DecodeContext *s) /* iterate through each superblock (all planes) and map the fragments */ for (i = 0; i < s->superblock_count; i++) { - debug_init(" superblock %d (u starts @ %d, v starts @ %d)\n", - i, s->u_superblock_start, s->v_superblock_start); - /* time to re-assign the limits? */ if (i == 0) { @@ -455,129 +336,14 @@ static int init_block_mapping(Vp3DecodeContext *s) if ((current_width < right_edge) && (current_height < bottom_edge)) { s->superblock_fragments[mapping_index] = current_fragment; - debug_init(" mapping fragment %d to superblock %d, position %d (%d/%d x %d/%d)\n", - s->superblock_fragments[mapping_index], i, j, - current_width, right_edge, current_height, bottom_edge); } else { s->superblock_fragments[mapping_index] = -1; - debug_init(" superblock %d, position %d has no fragment (%d/%d x %d/%d)\n", - i, j, - current_width, right_edge, current_height, bottom_edge); - } - - mapping_index++; - } - } - - /* initialize the superblock <-> macroblock mapping; iterate through - * all of the Y plane superblocks to build this mapping */ - right_edge = s->macroblock_width; - bottom_edge = s->macroblock_height; - current_width = -1; - current_height = 0; - superblock_row_inc = s->macroblock_width - - (s->y_superblock_width * 2 - s->macroblock_width);; - hilbert = hilbert_walk_mb; - mapping_index = 0; - current_macroblock = -1; - for (i = 0; i < s->u_superblock_start; i++) { - - if (current_width >= right_edge - 1) { - /* reset width and move to next superblock row */ - current_width = -1; - current_height += 2; - - /* macroblock is now at the start of a new superblock row */ - current_macroblock += superblock_row_inc; - } - - /* iterate through each potential macroblock in the superblock */ - for (j = 0; j < 4; j++) { - current_macroblock += hilbert_walk_mb[j]; - current_width += travel_width_mb[j]; - current_height += travel_height_mb[j]; - - /* check if the macroblock is in bounds */ - if ((current_width < right_edge) && - (current_height < bottom_edge)) { - s->superblock_macroblocks[mapping_index] = current_macroblock; - debug_init(" mapping macroblock %d to superblock %d, position %d (%d/%d x %d/%d)\n", - s->superblock_macroblocks[mapping_index], i, j, - current_width, right_edge, current_height, bottom_edge); - } else { - s->superblock_macroblocks[mapping_index] = -1; - debug_init(" superblock %d, position %d has no macroblock (%d/%d x %d/%d)\n", - i, j, - current_width, right_edge, current_height, bottom_edge); } mapping_index++; } } - /* initialize the macroblock <-> fragment mapping */ - current_fragment = 0; - current_macroblock = 0; - mapping_index = 0; - for (i = 0; i < s->fragment_height; i += 2) { - - for (j = 0; j < s->fragment_width; j += 2) { - - debug_init(" macroblock %d contains fragments: ", current_macroblock); - s->all_fragments[current_fragment].macroblock = current_macroblock; - s->macroblock_fragments[mapping_index++] = current_fragment; - debug_init("%d ", current_fragment); - - if (j + 1 < s->fragment_width) { - s->all_fragments[current_fragment + 1].macroblock = current_macroblock; - s->macroblock_fragments[mapping_index++] = current_fragment + 1; - debug_init("%d ", current_fragment + 1); - } else - s->macroblock_fragments[mapping_index++] = -1; - - if (i + 1 < s->fragment_height) { - s->all_fragments[current_fragment + s->fragment_width].macroblock = - current_macroblock; - s->macroblock_fragments[mapping_index++] = - current_fragment + s->fragment_width; - debug_init("%d ", current_fragment + s->fragment_width); - } else - s->macroblock_fragments[mapping_index++] = -1; - - if ((j + 1 < s->fragment_width) && (i + 1 < s->fragment_height)) { - s->all_fragments[current_fragment + s->fragment_width + 1].macroblock = - current_macroblock; - s->macroblock_fragments[mapping_index++] = - current_fragment + s->fragment_width + 1; - debug_init("%d ", current_fragment + s->fragment_width + 1); - } else - s->macroblock_fragments[mapping_index++] = -1; - - /* C planes */ - c_fragment = s->fragment_start[1] + - (i * s->fragment_width / 4) + (j / 2); - s->all_fragments[c_fragment].macroblock = s->macroblock_count; - s->macroblock_fragments[mapping_index++] = c_fragment; - debug_init("%d ", c_fragment); - - c_fragment = s->fragment_start[2] + - (i * s->fragment_width / 4) + (j / 2); - s->all_fragments[c_fragment].macroblock = s->macroblock_count; - s->macroblock_fragments[mapping_index++] = c_fragment; - debug_init("%d ", c_fragment); - - debug_init("\n"); - - if (j + 2 <= s->fragment_width) - current_fragment += 2; - else - current_fragment++; - current_macroblock++; - } - - current_fragment += s->fragment_width; - } - return 0; /* successful path out */ } @@ -591,10 +357,11 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb) /* zero out all of the fragment information */ s->coded_fragment_list_index = 0; for (i = 0; i < s->fragment_count; i++) { - s->all_fragments[i].coeff_count = 0; + s->coeff_counts[i] = 0; s->all_fragments[i].motion_x = 127; s->all_fragments[i].motion_y = 127; s->all_fragments[i].next_coeff= NULL; + s->all_fragments[i].qpi = 0; s->coeffs[i].index= s->coeffs[i].coeff=0; s->coeffs[i].next= NULL; @@ -605,62 +372,70 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb) * This function sets up the dequantization tables used for a particular * frame. */ -static void init_dequantizer(Vp3DecodeContext *s) +static void init_dequantizer(Vp3DecodeContext *s, int qpi) { - int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index]; - int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index]; + int ac_scale_factor = s->coded_ac_scale_factor[s->qps[qpi]]; + int dc_scale_factor = s->coded_dc_scale_factor[s->qps[qpi]]; int i, plane, inter, qri, bmi, bmj, qistart; - debug_vp3(" vp3: initializing dequantization tables\n"); - for(inter=0; inter<2; inter++){ for(plane=0; plane<3; plane++){ int sum=0; for(qri=0; qriqr_count[inter][plane]; qri++){ sum+= s->qr_size[inter][plane][qri]; - if(s->quality_index <= sum) + if(s->qps[qpi] <= sum) break; } qistart= sum - s->qr_size[inter][plane][qri]; bmi= s->qr_base[inter][plane][qri ]; bmj= s->qr_base[inter][plane][qri+1]; for(i=0; i<64; i++){ - int coeff= ( 2*(sum -s->quality_index)*s->base_matrix[bmi][i] - - 2*(qistart-s->quality_index)*s->base_matrix[bmj][i] + int coeff= ( 2*(sum -s->qps[qpi])*s->base_matrix[bmi][i] + - 2*(qistart-s->qps[qpi])*s->base_matrix[bmj][i] + s->qr_size[inter][plane][qri]) / (2*s->qr_size[inter][plane][qri]); int qmin= 8<<(inter + !i); int qscale= i ? ac_scale_factor : dc_scale_factor; - s->qmat[inter][plane][i]= av_clip((qscale * coeff)/100 * 4, qmin, 4096); + s->qmat[qpi][inter][plane][s->dsp.idct_permutation[i]]= av_clip((qscale * coeff)/100 * 4, qmin, 4096); } + // all DC coefficients use the same quant so as not to interfere with DC prediction + s->qmat[qpi][inter][plane][0] = s->qmat[0][inter][plane][0]; } } - memset(s->qscale_table, (FFMAX(s->qmat[0][0][1], s->qmat[0][1][1])+8)/16, 512); //FIXME finetune + memset(s->qscale_table, (FFMAX(s->qmat[0][0][0][1], s->qmat[0][0][1][1])+8)/16, 512); //FIXME finetune } /* * This function initializes the loop filter boundary limits if the frame's * quality index is different from the previous frame's. + * + * The filter_limit_values may not be larger than 127. */ static void init_loop_filter(Vp3DecodeContext *s) { int *bounding_values= s->bounding_values_array+127; int filter_limit; int x; + int value; - filter_limit = s->filter_limit_values[s->quality_index]; + filter_limit = s->filter_limit_values[s->qps[0]]; /* set up the bounding values */ memset(s->bounding_values_array, 0, 256 * sizeof(int)); for (x = 0; x < filter_limit; x++) { - bounding_values[-x - filter_limit] = -filter_limit + x; bounding_values[-x] = -x; bounding_values[x] = x; - bounding_values[x + filter_limit] = filter_limit - x; } + for (x = value = filter_limit; x < 128 && value; x++, value--) { + bounding_values[ x] = value; + bounding_values[-x] = -value; + } + if (value) + bounding_values[128] = value; + bounding_values[129] = bounding_values[130] = filter_limit * 0x02020202; } /* @@ -679,17 +454,13 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) int i, j; int current_fragment; - debug_vp3(" vp3: unpacking superblock coding\n"); - if (s->keyframe) { - - debug_vp3(" keyframe-- all superblocks are fully coded\n"); memset(s->superblock_coding, SB_FULLY_CODED, s->superblock_count); } else { /* unpack the list of partially-coded superblocks */ - bit = get_bits(gb, 1); + bit = get_bits1(gb); /* toggle the bit because as soon as the first run length is * fetched the bit will be toggled again */ bit ^= 1; @@ -700,10 +471,6 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) s->superblock_run_length_vlc.table, 6, 2); if (current_run == 33) current_run += get_bits(gb, 12); - debug_block_coding(" setting superblocks %d..%d to %s\n", - current_superblock, - current_superblock + current_run - 1, - (bit) ? "partially coded" : "not coded"); /* if any of the superblocks are not partially coded, flag * a boolean to decode the list of fully-coded superblocks */ @@ -725,7 +492,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) current_superblock = 0; current_run = 0; - bit = get_bits(gb, 1); + bit = get_bits1(gb); /* toggle the bit because as soon as the first run length is * fetched the bit will be toggled again */ bit ^= 1; @@ -741,10 +508,6 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) if (current_run == 33) current_run += get_bits(gb, 12); } - - debug_block_coding(" setting superblock %d to %s\n", - current_superblock, - (bit) ? "fully coded" : "not coded"); s->superblock_coding[current_superblock] = 2*bit; } current_superblock++; @@ -756,7 +519,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) if (decode_partial_blocks) { current_run = 0; - bit = get_bits(gb, 1); + bit = get_bits1(gb); /* toggle the bit because as soon as the first run length is * fetched the bit will be toggled again */ bit ^= 1; @@ -816,15 +579,10 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) first_c_fragment_seen = 1; } s->coded_fragment_list_index++; - s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV; - debug_block_coding(" superblock %d is partially coded, fragment %d is coded\n", - i, current_fragment); } else { /* not coded; copy this fragment from the prior frame */ s->all_fragments[current_fragment].coding_method = MODE_COPY; - debug_block_coding(" superblock %d is partially coded, fragment %d is not coded\n", - i, current_fragment); } } else { @@ -844,9 +602,6 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) first_c_fragment_seen = 1; } s->coded_fragment_list_index++; - s->macroblock_coding[s->all_fragments[current_fragment].macroblock] = MODE_INTER_NO_MV; - debug_block_coding(" superblock %d is fully coded, fragment %d is coded\n", - i, current_fragment); } } } @@ -859,12 +614,24 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) /* end the list of coded C fragments */ s->last_coded_c_fragment = s->coded_fragment_list_index - 1; - debug_block_coding(" %d total coded fragments, y: %d -> %d, c: %d -> %d\n", - s->coded_fragment_list_index, - s->first_coded_y_fragment, - s->last_coded_y_fragment, - s->first_coded_c_fragment, - s->last_coded_c_fragment); + for (i = 0; i < s->fragment_count - 1; i++) { + s->fast_fragment_list[i] = i + 1; + } + s->fast_fragment_list[s->fragment_count - 1] = -1; + + if (s->last_coded_y_fragment == -1) + s->fragment_list_y_head = -1; + else { + s->fragment_list_y_head = s->first_coded_y_fragment; + s->fast_fragment_list[s->last_coded_y_fragment] = -1; + } + + if (s->last_coded_c_fragment == -1) + s->fragment_list_c_head = -1; + else { + s->fragment_list_c_head = s->first_coded_c_fragment; + s->fast_fragment_list[s->last_coded_c_fragment] = -1; + } return 0; } @@ -875,17 +642,15 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb) */ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) { - int i, j, k; + int i, j, k, sb_x, sb_y; int scheme; int current_macroblock; int current_fragment; int coding_mode; - - debug_vp3(" vp3: unpacking encoding modes\n"); + int custom_mode_alphabet[CODING_MODE_COUNT]; + const int *alphabet; if (s->keyframe) { - debug_vp3(" keyframe-- all blocks are coded as INTRA\n"); - for (i = 0; i < s->fragment_count; i++) s->all_fragments[i].coding_method = MODE_INTRA; @@ -893,60 +658,70 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) /* fetch the mode coding scheme for this frame */ scheme = get_bits(gb, 3); - debug_modes(" using mode alphabet %d\n", scheme); /* is it a custom coding scheme? */ if (scheme == 0) { - debug_modes(" custom mode alphabet ahead:\n"); for (i = 0; i < 8; i++) - ModeAlphabet[scheme][get_bits(gb, 3)] = i; - } - - for (i = 0; i < 8; i++) - debug_modes(" mode[%d][%d] = %d\n", scheme, i, - ModeAlphabet[scheme][i]); + custom_mode_alphabet[i] = MODE_INTER_NO_MV; + for (i = 0; i < 8; i++) + custom_mode_alphabet[get_bits(gb, 3)] = i; + alphabet = custom_mode_alphabet; + } else + alphabet = ModeAlphabet[scheme-1]; /* iterate through all of the macroblocks that contain 1 or more * coded fragments */ - for (i = 0; i < s->u_superblock_start; i++) { + for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) { + for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) { for (j = 0; j < 4; j++) { - current_macroblock = s->superblock_macroblocks[i * 4 + j]; - if ((current_macroblock == -1) || - (s->macroblock_coding[current_macroblock] == MODE_COPY)) + int mb_x = 2*sb_x + (j>>1); + int mb_y = 2*sb_y + (((j>>1)+j)&1); + int frags_coded = 0; + current_macroblock = mb_y * s->macroblock_width + mb_x; + + if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height) + continue; + +#define BLOCK_X (2*mb_x + (k&1)) +#define BLOCK_Y (2*mb_y + (k>>1)) + /* coding modes are only stored if the macroblock has at least one + * luma block coded, otherwise it must be INTER_NO_MV */ + for (k = 0; k < 4; k++) { + current_fragment = BLOCK_Y*s->fragment_width + BLOCK_X; + if (s->all_fragments[current_fragment].coding_method != MODE_COPY) + break; + } + if (k == 4) { + s->macroblock_coding[current_macroblock] = MODE_INTER_NO_MV; continue; - if (current_macroblock >= s->macroblock_count) { - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_modes(): bad macroblock number (%d >= %d)\n", - current_macroblock, s->macroblock_count); - return 1; } /* mode 7 means get 3 bits for each coding mode */ if (scheme == 7) coding_mode = get_bits(gb, 3); else - coding_mode = ModeAlphabet[scheme] + coding_mode = alphabet [get_vlc2(gb, s->mode_code_vlc.table, 3, 3)]; s->macroblock_coding[current_macroblock] = coding_mode; - for (k = 0; k < 6; k++) { + for (k = 0; k < 4; k++) { current_fragment = - s->macroblock_fragments[current_macroblock * 6 + k]; - if (current_fragment == -1) - continue; - if (current_fragment >= s->fragment_count) { - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_modes(): bad fragment number (%d >= %d)\n", - current_fragment, s->fragment_count); - return 1; - } + BLOCK_Y*s->fragment_width + BLOCK_X; + if (s->all_fragments[current_fragment].coding_method != + MODE_COPY) + s->all_fragments[current_fragment].coding_method = + coding_mode; + } + for (k = 0; k < 2; k++) { + current_fragment = s->fragment_start[k+1] + + mb_y*(s->fragment_width>>1) + mb_x; if (s->all_fragments[current_fragment].coding_method != MODE_COPY) s->all_fragments[current_fragment].coding_method = coding_mode; } - - debug_modes(" coding method for macroblock starting @ fragment %d = %d\n", - s->macroblock_fragments[current_macroblock * 6], coding_mode); + } } } } @@ -960,7 +735,7 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb) */ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) { - int i, j, k; + int j, k, sb_x, sb_y; int coding_mode; int motion_x[6]; int motion_y[6]; @@ -971,75 +746,63 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) int current_macroblock; int current_fragment; - debug_vp3(" vp3: unpacking motion vectors\n"); - if (s->keyframe) { - - debug_vp3(" keyframe-- there are no motion vectors\n"); - - } else { + if (s->keyframe) + return 0; - memset(motion_x, 0, 6 * sizeof(int)); - memset(motion_y, 0, 6 * sizeof(int)); + memset(motion_x, 0, 6 * sizeof(int)); + memset(motion_y, 0, 6 * sizeof(int)); - /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ - coding_mode = get_bits(gb, 1); - debug_vectors(" using %s scheme for unpacking motion vectors\n", - (coding_mode == 0) ? "VLC" : "fixed-length"); + /* coding mode 0 is the VLC scheme; 1 is the fixed code scheme */ + coding_mode = get_bits1(gb); - /* iterate through all of the macroblocks that contain 1 or more - * coded fragments */ - for (i = 0; i < s->u_superblock_start; i++) { + /* iterate through all of the macroblocks that contain 1 or more + * coded fragments */ + for (sb_y = 0; sb_y < s->y_superblock_height; sb_y++) { + for (sb_x = 0; sb_x < s->y_superblock_width; sb_x++) { - for (j = 0; j < 4; j++) { - current_macroblock = s->superblock_macroblocks[i * 4 + j]; - if ((current_macroblock == -1) || - (s->macroblock_coding[current_macroblock] == MODE_COPY)) - continue; - if (current_macroblock >= s->macroblock_count) { - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_vectors(): bad macroblock number (%d >= %d)\n", - current_macroblock, s->macroblock_count); - return 1; + for (j = 0; j < 4; j++) { + int mb_x = 2*sb_x + (j>>1); + int mb_y = 2*sb_y + (((j>>1)+j)&1); + current_macroblock = mb_y * s->macroblock_width + mb_x; + + if (mb_x >= s->macroblock_width || mb_y >= s->macroblock_height || + (s->macroblock_coding[current_macroblock] == MODE_COPY)) + continue; + + switch (s->macroblock_coding[current_macroblock]) { + + case MODE_INTER_PLUS_MV: + case MODE_GOLDEN_MV: + /* all 6 fragments use the same motion vector */ + if (coding_mode == 0) { + motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; + motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; + } else { + motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)]; + motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)]; } - current_fragment = s->macroblock_fragments[current_macroblock * 6]; - if (current_fragment >= s->fragment_count) { - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_vectors(): bad fragment number (%d >= %d\n", - current_fragment, s->fragment_count); - return 1; + /* vector maintenance, only on MODE_INTER_PLUS_MV */ + if (s->macroblock_coding[current_macroblock] == + MODE_INTER_PLUS_MV) { + prior_last_motion_x = last_motion_x; + prior_last_motion_y = last_motion_y; + last_motion_x = motion_x[0]; + last_motion_y = motion_y[0]; } - switch (s->macroblock_coding[current_macroblock]) { - - case MODE_INTER_PLUS_MV: - case MODE_GOLDEN_MV: - /* all 6 fragments use the same motion vector */ - if (coding_mode == 0) { - motion_x[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; - motion_y[0] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; - } else { - motion_x[0] = fixed_motion_vector_table[get_bits(gb, 6)]; - motion_y[0] = fixed_motion_vector_table[get_bits(gb, 6)]; - } - - for (k = 1; k < 6; k++) { - motion_x[k] = motion_x[0]; - motion_y[k] = motion_y[0]; - } - - /* vector maintenance, only on MODE_INTER_PLUS_MV */ - if (s->macroblock_coding[current_macroblock] == - MODE_INTER_PLUS_MV) { - prior_last_motion_x = last_motion_x; - prior_last_motion_y = last_motion_y; - last_motion_x = motion_x[0]; - last_motion_y = motion_y[0]; - } - break; + break; - case MODE_INTER_FOURMV: - /* fetch 4 vectors from the bitstream, one for each - * Y fragment, then average for the C fragment vectors */ - motion_x[4] = motion_y[4] = 0; - for (k = 0; k < 4; k++) { + case MODE_INTER_FOURMV: + /* vector maintenance */ + prior_last_motion_x = last_motion_x; + prior_last_motion_y = last_motion_y; + + /* fetch 4 vectors from the bitstream, one for each + * Y fragment, then average for the C fragment vectors */ + motion_x[4] = motion_y[4] = 0; + for (k = 0; k < 4; k++) { + current_fragment = BLOCK_Y*s->fragment_width + BLOCK_X; + if (s->all_fragments[current_fragment].coding_method != MODE_COPY) { if (coding_mode == 0) { motion_x[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; motion_y[k] = motion_vector_table[get_vlc2(gb, s->motion_vector_vlc.table, 6, 2)]; @@ -1047,83 +810,119 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) motion_x[k] = fixed_motion_vector_table[get_bits(gb, 6)]; motion_y[k] = fixed_motion_vector_table[get_bits(gb, 6)]; } - motion_x[4] += motion_x[k]; - motion_y[4] += motion_y[k]; + last_motion_x = motion_x[k]; + last_motion_y = motion_y[k]; + } else { + motion_x[k] = 0; + motion_y[k] = 0; } + motion_x[4] += motion_x[k]; + motion_y[4] += motion_y[k]; + } - motion_x[5]= - motion_x[4]= RSHIFT(motion_x[4], 2); - motion_y[5]= - motion_y[4]= RSHIFT(motion_y[4], 2); - - /* vector maintenance; vector[3] is treated as the - * last vector in this case */ - prior_last_motion_x = last_motion_x; - prior_last_motion_y = last_motion_y; - last_motion_x = motion_x[3]; - last_motion_y = motion_y[3]; - break; - - case MODE_INTER_LAST_MV: - /* all 6 fragments use the last motion vector */ - motion_x[0] = last_motion_x; - motion_y[0] = last_motion_y; - for (k = 1; k < 6; k++) { - motion_x[k] = motion_x[0]; - motion_y[k] = motion_y[0]; - } + motion_x[5]= + motion_x[4]= RSHIFT(motion_x[4], 2); + motion_y[5]= + motion_y[4]= RSHIFT(motion_y[4], 2); + break; - /* no vector maintenance (last vector remains the - * last vector) */ - break; + case MODE_INTER_LAST_MV: + /* all 6 fragments use the last motion vector */ + motion_x[0] = last_motion_x; + motion_y[0] = last_motion_y; - case MODE_INTER_PRIOR_LAST: - /* all 6 fragments use the motion vector prior to the - * last motion vector */ - motion_x[0] = prior_last_motion_x; - motion_y[0] = prior_last_motion_y; - for (k = 1; k < 6; k++) { - motion_x[k] = motion_x[0]; - motion_y[k] = motion_y[0]; - } + /* no vector maintenance (last vector remains the + * last vector) */ + break; - /* vector maintenance */ - prior_last_motion_x = last_motion_x; - prior_last_motion_y = last_motion_y; - last_motion_x = motion_x[0]; - last_motion_y = motion_y[0]; - break; + case MODE_INTER_PRIOR_LAST: + /* all 6 fragments use the motion vector prior to the + * last motion vector */ + motion_x[0] = prior_last_motion_x; + motion_y[0] = prior_last_motion_y; + + /* vector maintenance */ + prior_last_motion_x = last_motion_x; + prior_last_motion_y = last_motion_y; + last_motion_x = motion_x[0]; + last_motion_y = motion_y[0]; + break; - default: - /* covers intra, inter without MV, golden without MV */ - memset(motion_x, 0, 6 * sizeof(int)); - memset(motion_y, 0, 6 * sizeof(int)); + default: + /* covers intra, inter without MV, golden without MV */ + motion_x[0] = 0; + motion_y[0] = 0; - /* no vector maintenance */ - break; - } + /* no vector maintenance */ + break; + } - /* assign the motion vectors to the correct fragments */ - debug_vectors(" vectors for macroblock starting @ fragment %d (coding method %d):\n", - current_fragment, - s->macroblock_coding[current_macroblock]); - for (k = 0; k < 6; k++) { - current_fragment = - s->macroblock_fragments[current_macroblock * 6 + k]; - if (current_fragment == -1) - continue; - if (current_fragment >= s->fragment_count) { - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_vectors(): bad fragment number (%d >= %d)\n", - current_fragment, s->fragment_count); - return 1; - } + /* assign the motion vectors to the correct fragments */ + for (k = 0; k < 4; k++) { + current_fragment = + BLOCK_Y*s->fragment_width + BLOCK_X; + if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { s->all_fragments[current_fragment].motion_x = motion_x[k]; s->all_fragments[current_fragment].motion_y = motion_y[k]; - debug_vectors(" vector %d: fragment %d = (%d, %d)\n", - k, current_fragment, motion_x[k], motion_y[k]); + } else { + s->all_fragments[current_fragment].motion_x = motion_x[0]; + s->all_fragments[current_fragment].motion_y = motion_y[0]; + } + } + for (k = 0; k < 2; k++) { + current_fragment = s->fragment_start[k+1] + + mb_y*(s->fragment_width>>1) + mb_x; + if (s->macroblock_coding[current_macroblock] == MODE_INTER_FOURMV) { + s->all_fragments[current_fragment].motion_x = motion_x[k+4]; + s->all_fragments[current_fragment].motion_y = motion_y[k+4]; + } else { + s->all_fragments[current_fragment].motion_x = motion_x[0]; + s->all_fragments[current_fragment].motion_y = motion_y[0]; } } } + } + } + + return 0; +} + +static int unpack_block_qpis(Vp3DecodeContext *s, GetBitContext *gb) +{ + int qpi, i, j, bit, run_length, blocks_decoded, num_blocks_at_qpi; + int num_blocks = s->coded_fragment_list_index; + + for (qpi = 0; qpi < s->nqps-1 && num_blocks > 0; qpi++) { + i = blocks_decoded = num_blocks_at_qpi = 0; + + bit = get_bits1(gb); + + do { + run_length = get_vlc2(gb, s->superblock_run_length_vlc.table, 6, 2) + 1; + if (run_length == 34) + run_length += get_bits(gb, 12); + blocks_decoded += run_length; + + if (!bit) + num_blocks_at_qpi += run_length; + + for (j = 0; j < run_length; i++) { + if (i >= s->coded_fragment_list_index) + return -1; + + if (s->all_fragments[s->coded_fragment_list[i]].qpi == qpi) { + s->all_fragments[s->coded_fragment_list[i]].qpi += bit; + j++; + } + } + + if (run_length == 4129) + bit = get_bits1(gb); + else + bit ^= 1; + } while (blocks_decoded < num_blocks); + + num_blocks -= num_blocks_at_qpi; } return 0; @@ -1143,7 +942,7 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb) */ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, VLC *table, int coeff_index, - int first_fragment, int last_fragment, + int y_plane, int eob_run) { int i; @@ -1151,27 +950,43 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, int zero_run = 0; DCTELEM coeff = 0; Vp3Fragment *fragment; - uint8_t *perm= s->scantable.permutated; int bits_to_get; + int next_fragment; + int previous_fragment; + int fragment_num; + int *list_head; - if ((first_fragment >= s->fragment_count) || - (last_fragment >= s->fragment_count)) { - - av_log(s->avctx, AV_LOG_ERROR, " vp3:unpack_vlcs(): bad fragment number (%d -> %d ?)\n", - first_fragment, last_fragment); - return 0; + /* local references to structure members to avoid repeated deferences */ + uint8_t *perm= s->scantable.permutated; + int *coded_fragment_list = s->coded_fragment_list; + Vp3Fragment *all_fragments = s->all_fragments; + uint8_t *coeff_counts = s->coeff_counts; + VLC_TYPE (*vlc_table)[2] = table->table; + int *fast_fragment_list = s->fast_fragment_list; + + if (y_plane) { + next_fragment = s->fragment_list_y_head; + list_head = &s->fragment_list_y_head; + } else { + next_fragment = s->fragment_list_c_head; + list_head = &s->fragment_list_c_head; } - for (i = first_fragment; i <= last_fragment; i++) { + i = next_fragment; + previous_fragment = -1; /* this indicates that the previous fragment is actually the list head */ + while (i != -1) { + fragment_num = coded_fragment_list[i]; - fragment = &s->all_fragments[s->coded_fragment_list[i]]; - if (fragment->coeff_count > coeff_index) + if (coeff_counts[fragment_num] > coeff_index) { + previous_fragment = i; + i = fast_fragment_list[i]; continue; + } + fragment = &all_fragments[fragment_num]; if (!eob_run) { /* decode a VLC into a token */ - token = get_vlc2(gb, table->table, 5, 3); - debug_vlc(" token = %2d, ", token); + token = get_vlc2(gb, vlc_table, 5, 3); /* use the token to get a zero run, a coefficient, and an eob run */ if (token <= 6) { eob_run = eob_run_base[token]; @@ -1180,10 +995,9 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, coeff = zero_run = 0; } else { bits_to_get = coeff_get_bits[token]; - if (!bits_to_get) - coeff = coeff_tables[token][0]; - else - coeff = coeff_tables[token][get_bits(gb, bits_to_get)]; + if (bits_to_get) + bits_to_get = get_bits(gb, bits_to_get); + coeff = coeff_tables[token][bits_to_get]; zero_run = zero_run_base[token]; if (zero_run_get_bits[token]) @@ -1192,27 +1006,37 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb, } if (!eob_run) { - fragment->coeff_count += zero_run; - if (fragment->coeff_count < 64){ + coeff_counts[fragment_num] += zero_run; + if (coeff_counts[fragment_num] < 64){ fragment->next_coeff->coeff= coeff; - fragment->next_coeff->index= perm[fragment->coeff_count++]; //FIXME perm here already? + fragment->next_coeff->index= perm[coeff_counts[fragment_num]++]; //FIXME perm here already? fragment->next_coeff->next= s->next_coeff; s->next_coeff->next=NULL; fragment->next_coeff= s->next_coeff++; } - debug_vlc(" fragment %d coeff = %d\n", - s->coded_fragment_list[i], fragment->next_coeff[coeff_index]); + /* previous fragment is now this fragment */ + previous_fragment = i; } else { - fragment->coeff_count |= 128; - debug_vlc(" fragment %d eob with %d coefficients\n", - s->coded_fragment_list[i], fragment->coeff_count&127); + coeff_counts[fragment_num] |= 128; eob_run--; + /* remove this fragment from the list */ + if (previous_fragment != -1) + fast_fragment_list[previous_fragment] = fast_fragment_list[i]; + else + *list_head = fast_fragment_list[i]; + /* previous fragment remains unchanged */ } + + i = fast_fragment_list[i]; } return eob_run; } +static void reverse_dc_prediction(Vp3DecodeContext *s, + int first_fragment, + int fragment_width, + int fragment_height); /* * This function unpacks all of the DCT coefficient data from the * bitstream. @@ -1225,81 +1049,64 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) int ac_y_table; int ac_c_table; int residual_eob_run = 0; + VLC *y_tables[64]; + VLC *c_tables[64]; - /* fetch the DC table indices */ + /* fetch the DC table indexes */ dc_y_table = get_bits(gb, 4); dc_c_table = get_bits(gb, 4); /* unpack the Y plane DC coefficients */ - debug_vp3(" vp3: unpacking Y plane DC coefficients using table %d\n", - dc_y_table); residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0, - s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run); + 1, residual_eob_run); + + /* reverse prediction of the Y-plane DC coefficients */ + reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); /* unpack the C plane DC coefficients */ - debug_vp3(" vp3: unpacking C plane DC coefficients using table %d\n", - dc_c_table); residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0, - s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run); + 0, residual_eob_run); - /* fetch the AC table indices */ + /* reverse prediction of the C-plane DC coefficients */ + if (!(s->avctx->flags & CODEC_FLAG_GRAY)) + { + reverse_dc_prediction(s, s->fragment_start[1], + s->fragment_width / 2, s->fragment_height / 2); + reverse_dc_prediction(s, s->fragment_start[2], + s->fragment_width / 2, s->fragment_height / 2); + } + + /* fetch the AC table indexes */ ac_y_table = get_bits(gb, 4); ac_c_table = get_bits(gb, 4); - /* unpack the group 1 AC coefficients (coeffs 1-5) */ + /* build tables of AC VLC tables */ for (i = 1; i <= 5; i++) { - - debug_vp3(" vp3: unpacking level %d Y plane AC coefficients using table %d\n", - i, ac_y_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i, - s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run); - - debug_vp3(" vp3: unpacking level %d C plane AC coefficients using table %d\n", - i, ac_c_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i, - s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run); + y_tables[i] = &s->ac_vlc_1[ac_y_table]; + c_tables[i] = &s->ac_vlc_1[ac_c_table]; } - - /* unpack the group 2 AC coefficients (coeffs 6-14) */ for (i = 6; i <= 14; i++) { - - debug_vp3(" vp3: unpacking level %d Y plane AC coefficients using table %d\n", - i, ac_y_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i, - s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run); - - debug_vp3(" vp3: unpacking level %d C plane AC coefficients using table %d\n", - i, ac_c_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i, - s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run); + y_tables[i] = &s->ac_vlc_2[ac_y_table]; + c_tables[i] = &s->ac_vlc_2[ac_c_table]; } - - /* unpack the group 3 AC coefficients (coeffs 15-27) */ for (i = 15; i <= 27; i++) { - - debug_vp3(" vp3: unpacking level %d Y plane AC coefficients using table %d\n", - i, ac_y_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i, - s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run); - - debug_vp3(" vp3: unpacking level %d C plane AC coefficients using table %d\n", - i, ac_c_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i, - s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run); + y_tables[i] = &s->ac_vlc_3[ac_y_table]; + c_tables[i] = &s->ac_vlc_3[ac_c_table]; } - - /* unpack the group 4 AC coefficients (coeffs 28-63) */ for (i = 28; i <= 63; i++) { + y_tables[i] = &s->ac_vlc_4[ac_y_table]; + c_tables[i] = &s->ac_vlc_4[ac_c_table]; + } - debug_vp3(" vp3: unpacking level %d Y plane AC coefficients using table %d\n", - i, ac_y_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i, - s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run); + /* decode all AC coefficents */ + for (i = 1; i <= 63; i++) { + if (s->fragment_list_y_head != -1) + residual_eob_run = unpack_vlcs(s, gb, y_tables[i], i, + 1, residual_eob_run); - debug_vp3(" vp3: unpacking level %d C plane AC coefficients using table %d\n", - i, ac_c_table); - residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i, - s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run); + if (s->fragment_list_c_head != -1) + residual_eob_run = unpack_vlcs(s, gb, c_tables[i], i, + 0, residual_eob_run); } return 0; @@ -1312,7 +1119,6 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb) */ #define COMPATIBLE_FRAME(x) \ (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type) -#define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY) #define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this static void reverse_dc_prediction(Vp3DecodeContext *s, @@ -1334,7 +1140,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, /* DC values for the left, up-left, up, and up-right fragments */ int vl, vul, vu, vur; - /* indices for the left, up-left, up, and up-right fragments */ + /* indexes for the left, up-left, up, and up-right fragments */ int l, ul, u, ur; /* @@ -1344,7 +1150,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, * 2: up-right multiplier * 3: left multiplier */ - int predictor_transform[16][4] = { + static const int predictor_transform[16][4] = { { 0, 0, 0, 0}, { 0, 0, 0,128}, // PL { 0, 0,128, 0}, // PUR @@ -1369,7 +1175,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, * from other INTRA blocks. There are 2 golden frame coding types; * blocks encoding in these modes can only predict from other blocks * that were encoded with these 1 of these 2 modes. */ - unsigned char compatible_frame[8] = { + static const unsigned char compatible_frame[9] = { 1, /* MODE_INTER_NO_MV */ 0, /* MODE_INTRA */ 1, /* MODE_INTER_PLUS_MV */ @@ -1377,7 +1183,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, 1, /* MODE_INTER_PRIOR_MV */ 2, /* MODE_USING_GOLDEN */ 2, /* MODE_GOLDEN_MV */ - 1 /* MODE_INTER_FOUR_MV */ + 1, /* MODE_INTER_FOUR_MV */ + 3 /* MODE_COPY */ }; int current_frame_type; @@ -1386,8 +1193,6 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, int transform = 0; - debug_vp3(" vp3: reversing DC prediction\n"); - vul = vu = vur = vl = 0; last_dc[0] = last_dc[1] = last_dc[2] = 0; @@ -1402,45 +1207,38 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, current_frame_type = compatible_frame[s->all_fragments[i].coding_method]; - debug_dc_pred(" frag %d: orig DC = %d, ", - i, DC_COEFF(i)); transform= 0; if(x){ l= i-1; vl = DC_COEFF(l); - if(FRAME_CODED(l) && COMPATIBLE_FRAME(l)) + if(COMPATIBLE_FRAME(l)) transform |= PL; } if(y){ u= i-fragment_width; vu = DC_COEFF(u); - if(FRAME_CODED(u) && COMPATIBLE_FRAME(u)) + if(COMPATIBLE_FRAME(u)) transform |= PU; if(x){ ul= i-fragment_width-1; vul = DC_COEFF(ul); - if(FRAME_CODED(ul) && COMPATIBLE_FRAME(ul)) + if(COMPATIBLE_FRAME(ul)) transform |= PUL; } if(x + 1 < fragment_width){ ur= i-fragment_width+1; vur = DC_COEFF(ur); - if(FRAME_CODED(ur) && COMPATIBLE_FRAME(ur)) + if(COMPATIBLE_FRAME(ur)) transform |= PUR; } } - debug_dc_pred("transform = %d, ", transform); - if (transform == 0) { /* if there were no fragments to predict from, use last * DC saved */ predicted_dc = last_dc[current_frame_type]; - debug_dc_pred("from last DC (%d) = %d\n", - current_frame_type, DC_COEFF(i)); - } else { /* apply the appropriate predictor transform */ @@ -1454,7 +1252,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, /* check for outranging on the [ul u l] and * [ul u ur l] predictors */ - if ((transform == 13) || (transform == 15)) { + if ((transform == 15) || (transform == 13)) { if (FFABS(predicted_dc - vu) > 128) predicted_dc = vu; else if (FFABS(predicted_dc - vl) > 128) @@ -1462,9 +1260,6 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, else if (FFABS(predicted_dc - vul) > 128) predicted_dc = vul; } - - debug_dc_pred("from pred DC = %d\n", - DC_COEFF(i)); } /* at long last, apply the predictor */ @@ -1477,8 +1272,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, s->coeffs[i].coeff += predicted_dc; /* save the DC */ last_dc[current_frame_type] = DC_COEFF(i); - if(DC_COEFF(i) && !(s->all_fragments[i].coeff_count&127)){ - s->all_fragments[i].coeff_count= 129; + if(DC_COEFF(i) && !(s->coeff_counts[i]&127)){ + s->coeff_counts[i]= 129; // s->all_fragments[i].next_coeff= s->next_coeff; s->coeffs[i].next= s->next_coeff; (s->next_coeff++)->next=NULL; @@ -1488,11 +1283,99 @@ static void reverse_dc_prediction(Vp3DecodeContext *s, } } +static void apply_loop_filter(Vp3DecodeContext *s, int plane, int ystart, int yend) +{ + int x, y; + int *bounding_values= s->bounding_values_array+127; + + int width = s->fragment_width >> !!plane; + int height = s->fragment_height >> !!plane; + int fragment = s->fragment_start [plane] + ystart * width; + int stride = s->current_frame.linesize[plane]; + uint8_t *plane_data = s->current_frame.data [plane]; + if (!s->flipped_image) stride = -stride; + plane_data += s->data_offset[plane] + 8*ystart*stride; + + for (y = ystart; y < yend; y++) { + + for (x = 0; x < width; x++) { + /* This code basically just deblocks on the edges of coded blocks. + * However, it has to be much more complicated because of the + * braindamaged deblock ordering used in VP3/Theora. Order matters + * because some pixels get filtered twice. */ + if( s->all_fragments[fragment].coding_method != MODE_COPY ) + { + /* do not perform left edge filter for left columns frags */ + if (x > 0) { + s->dsp.vp3_h_loop_filter( + plane_data + 8*x, + stride, bounding_values); + } + + /* do not perform top edge filter for top row fragments */ + if (y > 0) { + s->dsp.vp3_v_loop_filter( + plane_data + 8*x, + stride, bounding_values); + } + + /* do not perform right edge filter for right column + * fragments or if right fragment neighbor is also coded + * in this frame (it will be filtered in next iteration) */ + if ((x < width - 1) && + (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { + s->dsp.vp3_h_loop_filter( + plane_data + 8*x + 8, + stride, bounding_values); + } + + /* do not perform bottom edge filter for bottom row + * fragments or if bottom fragment neighbor is also coded + * in this frame (it will be filtered in the next row) */ + if ((y < height - 1) && + (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { + s->dsp.vp3_v_loop_filter( + plane_data + 8*x + 8*stride, + stride, bounding_values); + } + } + + fragment++; + } + plane_data += 8*stride; + } +} + +/** + * called when all pixels up to row y are complete + */ +static void vp3_draw_horiz_band(Vp3DecodeContext *s, int y) +{ + int h, cy; + int offset[4]; + + if(s->avctx->draw_horiz_band==NULL) + return; -static void horizontal_filter(unsigned char *first_pixel, int stride, - int *bounding_values); -static void vertical_filter(unsigned char *first_pixel, int stride, - int *bounding_values); + h= y - s->last_slice_end; + y -= h; + + if (!s->flipped_image) { + if (y == 0) + h -= s->height - s->avctx->height; // account for non-mod16 + y = s->height - y - h; + } + + cy = y >> 1; + offset[0] = s->current_frame.linesize[0]*y; + offset[1] = s->current_frame.linesize[1]*cy; + offset[2] = s->current_frame.linesize[2]*cy; + offset[3] = 0; + + emms_c(); + s->avctx->draw_horiz_band(s->avctx, &s->current_frame, offset, y, 3, h); + s->last_slice_end= y + h; +} /* * Perform the final rendering for a particular slice of data. @@ -1501,28 +1384,26 @@ static void vertical_filter(unsigned char *first_pixel, int stride, static void render_slice(Vp3DecodeContext *s, int slice) { int x; - int m, n; int16_t *dequantizer; - DECLARE_ALIGNED_16(DCTELEM, block[64]); + DECLARE_ALIGNED_16(DCTELEM, block)[64]; int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef; int motion_halfpel_index; uint8_t *motion_source; int plane; - int current_macroblock_entry = slice * s->macroblock_width * 6; if (slice >= s->macroblock_height) return; for (plane = 0; plane < 3; plane++) { - uint8_t *output_plane = s->current_frame.data [plane]; - uint8_t * last_plane = s-> last_frame.data [plane]; - uint8_t *golden_plane = s-> golden_frame.data [plane]; + uint8_t *output_plane = s->current_frame.data [plane] + s->data_offset[plane]; + uint8_t * last_plane = s-> last_frame.data [plane] + s->data_offset[plane]; + uint8_t *golden_plane = s-> golden_frame.data [plane] + s->data_offset[plane]; int stride = s->current_frame.linesize[plane]; int plane_width = s->width >> !!plane; int plane_height = s->height >> !!plane; int y = slice * FRAGMENT_PIXELS << !plane ; int slice_height = y + (FRAGMENT_PIXELS << !plane); - int i = s->macroblock_fragments[current_macroblock_entry + plane + 3*!!plane]; + int i = s->fragment_start[plane] + (y>>3)*(s->fragment_width>>!!plane); if (!s->flipped_image) stride = -stride; @@ -1535,6 +1416,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) /* for each fragment in a row... */ for (x = 0; x < plane_width; x += 8, i++) { + int first_pixel = y*stride + x; if ((i < 0) || (i >= s->fragment_count)) { av_log(s->avctx, AV_LOG_ERROR, " vp3:render_slice(): bad fragment number (%d)\n", i); @@ -1551,7 +1433,7 @@ static void render_slice(Vp3DecodeContext *s, int slice) else motion_source= last_plane; - motion_source += s->all_fragments[i].first_pixel; + motion_source += first_pixel; motion_halfpel_index = 0; /* sort out the motion vector if this fragment is coded @@ -1597,36 +1479,32 @@ static void render_slice(Vp3DecodeContext *s, int slice) put_no_rnd_pixels_tab is better optimzed */ if(motion_halfpel_index != 3){ s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index]( - output_plane + s->all_fragments[i].first_pixel, + output_plane + first_pixel, motion_source, stride, 8); }else{ int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1 s->dsp.put_no_rnd_pixels_l2[1]( - output_plane + s->all_fragments[i].first_pixel, + output_plane + first_pixel, motion_source - d, motion_source + stride + 1 + d, stride, 8); } - dequantizer = s->qmat[1][plane]; + dequantizer = s->qmat[s->all_fragments[i].qpi][1][plane]; }else{ - dequantizer = s->qmat[0][plane]; + dequantizer = s->qmat[s->all_fragments[i].qpi][0][plane]; } /* dequantize the DCT coefficients */ - debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", - i, s->all_fragments[i].coding_method, - DC_COEFF(i), dequantizer[0]); - if(s->avctx->idct_algo==FF_IDCT_VP3){ Coeff *coeff= s->coeffs + i; - memset(block, 0, sizeof(block)); + s->dsp.clear_block(block); while(coeff->next){ block[coeff->index]= coeff->coeff * dequantizer[coeff->index]; coeff= coeff->next; } }else{ Coeff *coeff= s->coeffs + i; - memset(block, 0, sizeof(block)); + s->dsp.clear_block(block); while(coeff->next){ block[coeff->index]= (coeff->coeff * dequantizer[coeff->index] + 2)>>2; coeff= coeff->next; @@ -1639,71 +1517,29 @@ static void render_slice(Vp3DecodeContext *s, int slice) if(s->avctx->idct_algo!=FF_IDCT_VP3) block[0] += 128<<3; s->dsp.idct_put( - output_plane + s->all_fragments[i].first_pixel, + output_plane + first_pixel, stride, block); } else { s->dsp.idct_add( - output_plane + s->all_fragments[i].first_pixel, + output_plane + first_pixel, stride, block); } - - debug_idct("block after idct_%s():\n", - (s->all_fragments[i].coding_method == MODE_INTRA)? - "put" : "add"); - for (m = 0; m < 8; m++) { - for (n = 0; n < 8; n++) { - debug_idct(" %3d", *(output_plane + - s->all_fragments[i].first_pixel + (m * stride + n))); - } - debug_idct("\n"); - } - debug_idct("\n"); - } else { /* copy directly from the previous frame */ s->dsp.put_pixels_tab[1][0]( - output_plane + s->all_fragments[i].first_pixel, - last_plane + s->all_fragments[i].first_pixel, + output_plane + first_pixel, + last_plane + first_pixel, stride, 8); } -#if 0 - /* perform the left edge filter if: - * - the fragment is not on the left column - * - the fragment is coded in this frame - * - the fragment is not coded in this frame but the left - * fragment is coded in this frame (this is done instead - * of a right edge filter when rendering the left fragment - * since this fragment is not available yet) */ - if ((x > 0) && - ((s->all_fragments[i].coding_method != MODE_COPY) || - ((s->all_fragments[i].coding_method == MODE_COPY) && - (s->all_fragments[i - 1].coding_method != MODE_COPY)) )) { - horizontal_filter( - output_plane + s->all_fragments[i].first_pixel + 7*stride, - -stride, s->bounding_values_array + 127); - } - - /* perform the top edge filter if: - * - the fragment is not on the top row - * - the fragment is coded in this frame - * - the fragment is not coded in this frame but the above - * fragment is coded in this frame (this is done instead - * of a bottom edge filter when rendering the above - * fragment since this fragment is not available yet) */ - if ((y > 0) && - ((s->all_fragments[i].coding_method != MODE_COPY) || - ((s->all_fragments[i].coding_method == MODE_COPY) && - (s->all_fragments[i - fragment_width].coding_method != MODE_COPY)) )) { - vertical_filter( - output_plane + s->all_fragments[i].first_pixel - stride, - -stride, s->bounding_values_array + 127); - } -#endif } + // Filter the previous block row. We can't filter the current row yet + // since it needs pixels from the next row + if (y > 0) + apply_loop_filter(s, plane, (y>>3)-1, (y>>3)); } } @@ -1715,228 +1551,15 @@ static void render_slice(Vp3DecodeContext *s, int slice) * dispatch (slice - 1); */ - emms_c(); -} - -static void horizontal_filter(unsigned char *first_pixel, int stride, - int *bounding_values) -{ - unsigned char *end; - int filter_value; - - for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) { - filter_value = - (first_pixel[-2] - first_pixel[ 1]) - +3*(first_pixel[ 0] - first_pixel[-1]); - filter_value = bounding_values[(filter_value + 4) >> 3]; - first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value); - first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value); - } -} - -static void vertical_filter(unsigned char *first_pixel, int stride, - int *bounding_values) -{ - unsigned char *end; - int filter_value; - const int nstride= -stride; - - for (end= first_pixel + 8; first_pixel < end; first_pixel++) { - filter_value = - (first_pixel[2 * nstride] - first_pixel[ stride]) - +3*(first_pixel[0 ] - first_pixel[nstride]); - filter_value = bounding_values[(filter_value + 4) >> 3]; - first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value); - first_pixel[0] = av_clip_uint8(first_pixel[0] - filter_value); - } -} - -static void apply_loop_filter(Vp3DecodeContext *s) -{ - int plane; - int x, y; - int *bounding_values= s->bounding_values_array+127; - -#if 0 - int bounding_values_array[256]; - int filter_limit; - - /* find the right loop limit value */ - for (x = 63; x >= 0; x--) { - if (vp31_ac_scale_factor[x] >= s->quality_index) - break; - } - filter_limit = vp31_filter_limit_values[s->quality_index]; - - /* set up the bounding values */ - memset(bounding_values_array, 0, 256 * sizeof(int)); - for (x = 0; x < filter_limit; x++) { - bounding_values[-x - filter_limit] = -filter_limit + x; - bounding_values[-x] = -x; - bounding_values[x] = x; - bounding_values[x + filter_limit] = filter_limit - x; - } -#endif - - for (plane = 0; plane < 3; plane++) { - int width = s->fragment_width >> !!plane; - int height = s->fragment_height >> !!plane; - int fragment = s->fragment_start [plane]; - int stride = s->current_frame.linesize[plane]; - uint8_t *plane_data = s->current_frame.data [plane]; - if (!s->flipped_image) stride = -stride; - - for (y = 0; y < height; y++) { - - for (x = 0; x < width; x++) { -START_TIMER - /* do not perform left edge filter for left columns frags */ - if ((x > 0) && - (s->all_fragments[fragment].coding_method != MODE_COPY)) { - horizontal_filter( - plane_data + s->all_fragments[fragment].first_pixel, - stride, bounding_values); - } - - /* do not perform top edge filter for top row fragments */ - if ((y > 0) && - (s->all_fragments[fragment].coding_method != MODE_COPY)) { - vertical_filter( - plane_data + s->all_fragments[fragment].first_pixel, - stride, bounding_values); - } - - /* do not perform right edge filter for right column - * fragments or if right fragment neighbor is also coded - * in this frame (it will be filtered in next iteration) */ - if ((x < width - 1) && - (s->all_fragments[fragment].coding_method != MODE_COPY) && - (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) { - horizontal_filter( - plane_data + s->all_fragments[fragment + 1].first_pixel, - stride, bounding_values); - } - - /* do not perform bottom edge filter for bottom row - * fragments or if bottom fragment neighbor is also coded - * in this frame (it will be filtered in the next row) */ - if ((y < height - 1) && - (s->all_fragments[fragment].coding_method != MODE_COPY) && - (s->all_fragments[fragment + width].coding_method == MODE_COPY)) { - vertical_filter( - plane_data + s->all_fragments[fragment + width].first_pixel, - stride, bounding_values); - } - - fragment++; -STOP_TIMER("loop filter") - } - } - } -} - -/* - * This function computes the first pixel addresses for each fragment. - * This function needs to be invoked after the first frame is allocated - * so that it has access to the plane strides. - */ -static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s) -{ - - int i, x, y; - - /* figure out the first pixel addresses for each of the fragments */ - /* Y plane */ - i = 0; - for (y = s->fragment_height; y > 0; y--) { - for (x = 0; x < s->fragment_width; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[0] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } - - /* U plane */ - i = s->fragment_start[1]; - for (y = s->fragment_height / 2; y > 0; y--) { - for (x = 0; x < s->fragment_width / 2; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[1] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } - - /* V plane */ - i = s->fragment_start[2]; - for (y = s->fragment_height / 2; y > 0; y--) { - for (x = 0; x < s->fragment_width / 2; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[2] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } -} - -/* FIXME: this should be merged with the above! */ -static void theora_calculate_pixel_addresses(Vp3DecodeContext *s) -{ - - int i, x, y; - - /* figure out the first pixel addresses for each of the fragments */ - /* Y plane */ - i = 0; - for (y = 1; y <= s->fragment_height; y++) { - for (x = 0; x < s->fragment_width; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[0] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } - - /* U plane */ - i = s->fragment_start[1]; - for (y = 1; y <= s->fragment_height / 2; y++) { - for (x = 0; x < s->fragment_width / 2; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[1] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } - - /* V plane */ - i = s->fragment_start[2]; - for (y = 1; y <= s->fragment_height / 2; y++) { - for (x = 0; x < s->fragment_width / 2; x++) { - s->all_fragments[i++].first_pixel = - s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS - - s->golden_frame.linesize[2] + - x * FRAGMENT_PIXELS; - debug_init(" fragment %d, first pixel @ %d\n", - i-1, s->all_fragments[i-1].first_pixel); - } - } + // now that we've filtered the last rows, they're safe to display + if (slice) + vp3_draw_horiz_band(s, 16*slice); } /* * This is the ffmpeg/libavcodec API init function. */ -static int vp3_decode_init(AVCodecContext *avctx) +static av_cold int vp3_decode_init(AVCodecContext *avctx) { Vp3DecodeContext *s = avctx->priv_data; int i, inter, plane; @@ -1951,10 +1574,10 @@ static int vp3_decode_init(AVCodecContext *avctx) s->version = 1; s->avctx = avctx; - s->width = (avctx->width + 15) & 0xFFFFFFF0; - s->height = (avctx->height + 15) & 0xFFFFFFF0; + s->width = FFALIGN(avctx->width, 16); + s->height = FFALIGN(avctx->height, 16); avctx->pix_fmt = PIX_FMT_YUV420P; - avctx->has_b_frames = 0; + avctx->chroma_sample_location = AVCHROMA_LOC_CENTER; if(avctx->idct_algo==FF_IDCT_AUTO) avctx->idct_algo=FF_IDCT_VP3; dsputil_init(&s->dsp, avctx); @@ -1963,7 +1586,8 @@ static int vp3_decode_init(AVCodecContext *avctx) /* initialize to an impossible value which will force a recalculation * in the first frame decode */ - s->quality_index = -1; + for (i = 0; i < 3; i++) + s->qps[i] = -1; s->y_superblock_width = (s->width + 31) / 32; s->y_superblock_height = (s->height + 31) / 32; @@ -1993,27 +1617,16 @@ static int vp3_decode_init(AVCodecContext *avctx) s->fragment_start[1] = s->fragment_width * s->fragment_height; s->fragment_start[2] = s->fragment_width * s->fragment_height * 5 / 4; - debug_init(" Y plane: %d x %d\n", s->width, s->height); - debug_init(" C plane: %d x %d\n", c_width, c_height); - debug_init(" Y superblocks: %d x %d, %d total\n", - s->y_superblock_width, s->y_superblock_height, y_superblock_count); - debug_init(" C superblocks: %d x %d, %d total\n", - s->c_superblock_width, s->c_superblock_height, c_superblock_count); - debug_init(" total superblocks = %d, U starts @ %d, V starts @ %d\n", - s->superblock_count, s->u_superblock_start, s->v_superblock_start); - debug_init(" macroblocks: %d x %d, %d total\n", - s->macroblock_width, s->macroblock_height, s->macroblock_count); - debug_init(" %d fragments, %d x %d, u starts @ %d, v starts @ %d\n", - s->fragment_count, - s->fragment_width, - s->fragment_height, - s->fragment_start[1], - s->fragment_start[2]); - s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment)); + s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts)); s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65); s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int)); - s->pixel_addresses_inited = 0; + s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int)); + if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts || + !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) { + vp3_decode_end(avctx); + return -1; + } if (!s->theora_tables) { @@ -2067,29 +1680,34 @@ static int vp3_decode_init(AVCodecContext *avctx) for (i = 0; i < 16; i++) { /* DC histograms */ - init_vlc(&s->dc_vlc[i], 5, 32, + if (init_vlc(&s->dc_vlc[i], 5, 32, &s->huffman_table[i][0][1], 4, 2, - &s->huffman_table[i][0][0], 4, 2, 0); + &s->huffman_table[i][0][0], 4, 2, 0) < 0) + goto vlc_fail; /* group 1 AC histograms */ - init_vlc(&s->ac_vlc_1[i], 5, 32, + if (init_vlc(&s->ac_vlc_1[i], 5, 32, &s->huffman_table[i+16][0][1], 4, 2, - &s->huffman_table[i+16][0][0], 4, 2, 0); + &s->huffman_table[i+16][0][0], 4, 2, 0) < 0) + goto vlc_fail; /* group 2 AC histograms */ - init_vlc(&s->ac_vlc_2[i], 5, 32, + if (init_vlc(&s->ac_vlc_2[i], 5, 32, &s->huffman_table[i+16*2][0][1], 4, 2, - &s->huffman_table[i+16*2][0][0], 4, 2, 0); + &s->huffman_table[i+16*2][0][0], 4, 2, 0) < 0) + goto vlc_fail; /* group 3 AC histograms */ - init_vlc(&s->ac_vlc_3[i], 5, 32, + if (init_vlc(&s->ac_vlc_3[i], 5, 32, &s->huffman_table[i+16*3][0][1], 4, 2, - &s->huffman_table[i+16*3][0][0], 4, 2, 0); + &s->huffman_table[i+16*3][0][0], 4, 2, 0) < 0) + goto vlc_fail; /* group 4 AC histograms */ - init_vlc(&s->ac_vlc_4[i], 5, 32, + if (init_vlc(&s->ac_vlc_4[i], 5, 32, &s->huffman_table[i+16*4][0][1], 4, 2, - &s->huffman_table[i+16*4][0][0], 4, 2, 0); + &s->huffman_table[i+16*4][0][0], 4, 2, 0) < 0) + goto vlc_fail; } } @@ -2111,9 +1729,11 @@ static int vp3_decode_init(AVCodecContext *avctx) /* work out the block mapping tables */ s->superblock_fragments = av_malloc(s->superblock_count * 16 * sizeof(int)); - s->superblock_macroblocks = av_malloc(s->superblock_count * 4 * sizeof(int)); - s->macroblock_fragments = av_malloc(s->macroblock_count * 6 * sizeof(int)); s->macroblock_coding = av_malloc(s->macroblock_count + 1); + if (!s->superblock_fragments || !s->macroblock_coding) { + vp3_decode_end(avctx); + return -1; + } init_block_mapping(s); for (i = 0; i < 3; i++) { @@ -2123,6 +1743,10 @@ static int vp3_decode_init(AVCodecContext *avctx) } return 0; + +vlc_fail: + av_log(avctx, AV_LOG_FATAL, "Invalid huffman table\n"); + return -1; } /* @@ -2130,8 +1754,10 @@ static int vp3_decode_init(AVCodecContext *avctx) */ static int vp3_decode_frame(AVCodecContext *avctx, void *data, int *data_size, - uint8_t *buf, int buf_size) + AVPacket *avpkt) { + const uint8_t *buf = avpkt->data; + int buf_size = avpkt->size; Vp3DecodeContext *s = avctx->priv_data; GetBitContext gb; static int counter = 0; @@ -2141,51 +1767,39 @@ static int vp3_decode_frame(AVCodecContext *avctx, if (s->theora && get_bits1(&gb)) { -#if 1 av_log(avctx, AV_LOG_ERROR, "Header packet passed to frame decoder, skipping\n"); return -1; -#else - int ptype = get_bits(&gb, 7); - - skip_bits(&gb, 6*8); /* "theora" */ - - switch(ptype) - { - case 1: - theora_decode_comments(avctx, &gb); - break; - case 2: - theora_decode_tables(avctx, &gb); - init_dequantizer(s); - break; - default: - av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype); - } - return buf_size; -#endif } s->keyframe = !get_bits1(&gb); if (!s->theora) skip_bits(&gb, 1); - s->last_quality_index = s->quality_index; + for (i = 0; i < 3; i++) + s->last_qps[i] = s->qps[i]; - s->nqis=0; + s->nqps=0; do{ - s->qis[s->nqis++]= get_bits(&gb, 6); - } while(s->theora >= 0x030200 && s->nqis<3 && get_bits1(&gb)); - - s->quality_index= s->qis[0]; + s->qps[s->nqps++]= get_bits(&gb, 6); + } while(s->theora >= 0x030200 && s->nqps<3 && get_bits1(&gb)); + for (i = s->nqps; i < 3; i++) + s->qps[i] = -1; if (s->avctx->debug & FF_DEBUG_PICT_INFO) av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n", - s->keyframe?"key":"", counter, s->quality_index); + s->keyframe?"key":"", counter, s->qps[0]); counter++; - if (s->quality_index != s->last_quality_index) { - init_dequantizer(s); + if (s->qps[0] != s->last_qps[0]) init_loop_filter(s); - } + + for (i = 0; i < s->nqps; i++) + // reinit all dequantizers if the first one changed, because + // the DC of the first quantizer must be used for all matrices + if (s->qps[i] != s->last_qps[i] || s->qps[0] != s->last_qps[0]) + init_dequantizer(s, i); + + if (avctx->skip_frame >= AVDISCARD_NONKEY && !s->keyframe) + return buf_size; if (s->keyframe) { if (!s->theora) @@ -2225,20 +1839,10 @@ static int vp3_decode_frame(AVCodecContext *avctx, /* golden frame is also the current frame */ s->current_frame= s->golden_frame; - - /* time to figure out pixel addresses? */ - if (!s->pixel_addresses_inited) - { - if (!s->flipped_image) - vp3_calculate_pixel_addresses(s); - else - theora_calculate_pixel_addresses(s); - s->pixel_addresses_inited = 1; - } } else { /* allocate a new current frame */ s->current_frame.reference = 3; - if (!s->pixel_addresses_inited) { + if (!s->golden_frame.data[0]) { av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n"); return -1; } @@ -2251,69 +1855,46 @@ static int vp3_decode_frame(AVCodecContext *avctx, s->current_frame.qscale_table= s->qscale_table; //FIXME allocate individual tables per AVFrame s->current_frame.qstride= 0; - {START_TIMER init_frame(s, &gb); - STOP_TIMER("init_frame")} - -#if KEYFRAMES_ONLY -if (!s->keyframe) { - memcpy(s->current_frame.data[0], s->golden_frame.data[0], - s->current_frame.linesize[0] * s->height); - memcpy(s->current_frame.data[1], s->golden_frame.data[1], - s->current_frame.linesize[1] * s->height / 2); - memcpy(s->current_frame.data[2], s->golden_frame.data[2], - s->current_frame.linesize[2] * s->height / 2); - -} else { -#endif - - {START_TIMER if (unpack_superblocks(s, &gb)){ av_log(s->avctx, AV_LOG_ERROR, "error in unpack_superblocks\n"); return -1; } - STOP_TIMER("unpack_superblocks")} - {START_TIMER if (unpack_modes(s, &gb)){ av_log(s->avctx, AV_LOG_ERROR, "error in unpack_modes\n"); return -1; } - STOP_TIMER("unpack_modes")} - {START_TIMER if (unpack_vectors(s, &gb)){ av_log(s->avctx, AV_LOG_ERROR, "error in unpack_vectors\n"); return -1; } - STOP_TIMER("unpack_vectors")} - {START_TIMER + if (unpack_block_qpis(s, &gb)){ + av_log(s->avctx, AV_LOG_ERROR, "error in unpack_block_qpis\n"); + return -1; + } if (unpack_dct_coeffs(s, &gb)){ av_log(s->avctx, AV_LOG_ERROR, "error in unpack_dct_coeffs\n"); return -1; } - STOP_TIMER("unpack_dct_coeffs")} - {START_TIMER - reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); - if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { - reverse_dc_prediction(s, s->fragment_start[1], - s->fragment_width / 2, s->fragment_height / 2); - reverse_dc_prediction(s, s->fragment_start[2], - s->fragment_width / 2, s->fragment_height / 2); + for (i = 0; i < 3; i++) { + if (s->flipped_image) + s->data_offset[i] = 0; + else + s->data_offset[i] = ((s->height>>!!i)-1) * s->current_frame.linesize[i]; } - STOP_TIMER("reverse_dc_prediction")} - {START_TIMER + s->last_slice_end = 0; for (i = 0; i < s->macroblock_height; i++) render_slice(s, i); - STOP_TIMER("render_fragments")} - {START_TIMER - apply_loop_filter(s); - STOP_TIMER("apply_loop_filter")} -#if KEYFRAMES_ONLY -} -#endif + // filter the last row + for (i = 0; i < 3; i++) { + int row = (s->height >> (3+!!i)) - 1; + apply_loop_filter(s, i, row, row+1); + } + vp3_draw_horiz_band(s, s->height); *data_size=sizeof(AVFrame); *(AVFrame*)data= s->current_frame; @@ -2334,18 +1915,33 @@ if (!s->keyframe) { /* * This is the ffmpeg/libavcodec API module cleanup function. */ -static int vp3_decode_end(AVCodecContext *avctx) +static av_cold int vp3_decode_end(AVCodecContext *avctx) { Vp3DecodeContext *s = avctx->priv_data; + int i; + av_free(s->superblock_coding); av_free(s->all_fragments); + av_free(s->coeff_counts); av_free(s->coeffs); av_free(s->coded_fragment_list); + av_free(s->fast_fragment_list); av_free(s->superblock_fragments); - av_free(s->superblock_macroblocks); - av_free(s->macroblock_fragments); av_free(s->macroblock_coding); + for (i = 0; i < 16; i++) { + free_vlc(&s->dc_vlc[i]); + free_vlc(&s->ac_vlc_1[i]); + free_vlc(&s->ac_vlc_2[i]); + free_vlc(&s->ac_vlc_3[i]); + free_vlc(&s->ac_vlc_4[i]); + } + + free_vlc(&s->superblock_run_length_vlc); + free_vlc(&s->fragment_run_length_vlc); + free_vlc(&s->mode_code_vlc); + free_vlc(&s->motion_vector_vlc); + /* release all frames */ if (s->golden_frame.data[0] && s->golden_frame.data[0] != s->last_frame.data[0]) avctx->release_buffer(avctx, &s->golden_frame); @@ -2361,7 +1957,7 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb) { Vp3DecodeContext *s = avctx->priv_data; - if (get_bits(gb, 1)) { + if (get_bits1(gb)) { int token; if (s->entries >= 32) { /* overflow */ av_log(avctx, AV_LOG_ERROR, "huffman tree overflow\n"); @@ -2380,21 +1976,25 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb) } s->huff_code_size++; s->hbits <<= 1; - read_huffman_tree(avctx, gb); + if (read_huffman_tree(avctx, gb)) + return -1; s->hbits |= 1; - read_huffman_tree(avctx, gb); + if (read_huffman_tree(avctx, gb)) + return -1; s->hbits >>= 1; s->huff_code_size--; } return 0; } +#if CONFIG_THEORA_DECODER static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) { Vp3DecodeContext *s = avctx->priv_data; + int visible_width, visible_height, colorspace; s->theora = get_bits_long(gb, 24); - av_log(avctx, AV_LOG_INFO, "Theora bitstream version %X\n", s->theora); + av_log(avctx, AV_LOG_DEBUG, "Theora bitstream version %X\n", s->theora); /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */ /* but previous versions have the image flipped relative to vp3 */ @@ -2404,8 +2004,8 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) av_log(avctx, AV_LOG_DEBUG, "Old (width = get_bits(gb, 16) << 4; - s->height = get_bits(gb, 16) << 4; + visible_width = s->width = get_bits(gb, 16) << 4; + visible_height = s->height = get_bits(gb, 16) << 4; if(avcodec_check_dimensions(avctx, s->width, s->height)){ av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height); @@ -2413,27 +2013,13 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) return -1; } - if (s->theora >= 0x030400) - { - skip_bits(gb, 32); /* total number of superblocks in a frame */ - // fixme, the next field is 36bits long - skip_bits(gb, 32); /* total number of blocks in a frame */ - skip_bits(gb, 4); /* total number of blocks in a frame */ - skip_bits(gb, 32); /* total number of macroblocks in a frame */ - - skip_bits(gb, 24); /* frame width */ - skip_bits(gb, 24); /* frame height */ - } - else - { - skip_bits(gb, 24); /* frame width */ - skip_bits(gb, 24); /* frame height */ - } + if (s->theora >= 0x030200) { + visible_width = get_bits_long(gb, 24); + visible_height = get_bits_long(gb, 24); - if (s->theora >= 0x030200) { - skip_bits(gb, 8); /* offset x */ - skip_bits(gb, 8); /* offset y */ - } + skip_bits(gb, 8); /* offset x */ + skip_bits(gb, 8); /* offset y */ + } skip_bits(gb, 32); /* fps numerator */ skip_bits(gb, 32); /* fps denumerator */ @@ -2442,9 +2028,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) if (s->theora < 0x030200) skip_bits(gb, 5); /* keyframe frequency force */ - skip_bits(gb, 8); /* colorspace */ - if (s->theora >= 0x030400) - skip_bits(gb, 2); /* pixel format: 420,res,422,444 */ + colorspace = get_bits(gb, 8); skip_bits(gb, 24); /* bitrate */ skip_bits(gb, 6); /* quality hint */ @@ -2452,15 +2036,27 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb) if (s->theora >= 0x030200) { skip_bits(gb, 5); /* keyframe frequency force */ - - if (s->theora < 0x030400) - skip_bits(gb, 5); /* spare bits */ + skip_bits(gb, 2); /* pixel format: 420,res,422,444 */ + skip_bits(gb, 3); /* reserved */ } // align_get_bits(gb); - avctx->width = s->width; - avctx->height = s->height; + if ( visible_width <= s->width && visible_width > s->width-16 + && visible_height <= s->height && visible_height > s->height-16) + avcodec_set_dimensions(avctx, visible_width, visible_height); + else + avcodec_set_dimensions(avctx, s->width, s->height); + + if (colorspace == 1) { + avctx->color_primaries = AVCOL_PRI_BT470M; + } else if (colorspace == 2) { + avctx->color_primaries = AVCOL_PRI_BT470BG; + } + if (colorspace == 1 || colorspace == 2) { + avctx->colorspace = AVCOL_SPC_BT470BG; + avctx->color_trc = AVCOL_TRC_BT709; + } return 0; } @@ -2473,8 +2069,13 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) if (s->theora >= 0x030200) { n = get_bits(gb, 3); /* loop filter limit values table */ - for (i = 0; i < 64; i++) + for (i = 0; i < 64; i++) { s->filter_limit_values[i] = get_bits(gb, n); + if (s->filter_limit_values[i] > 127) { + av_log(avctx, AV_LOG_ERROR, "filter limit value too large (%i > 127), clamping\n", s->filter_limit_values[i]); + s->filter_limit_values[i] = 127; + } + } } if (s->theora >= 0x030200) @@ -2512,10 +2113,10 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) for (plane = 0; plane <= 2; plane++) { int newqr= 1; if (inter || plane > 0) - newqr = get_bits(gb, 1); + newqr = get_bits1(gb); if (!newqr) { int qtj, plj; - if(inter && get_bits(gb, 1)){ + if(inter && get_bits1(gb)){ qtj = 0; plj = plane; }else{ @@ -2556,11 +2157,13 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) for (s->hti = 0; s->hti < 80; s->hti++) { s->entries = 0; s->huff_code_size = 1; - if (!get_bits(gb, 1)) { + if (!get_bits1(gb)) { s->hbits = 0; - read_huffman_tree(avctx, gb); + if(read_huffman_tree(avctx, gb)) + return -1; s->hbits = 1; - read_huffman_tree(avctx, gb); + if(read_huffman_tree(avctx, gb)) + return -1; } } @@ -2569,13 +2172,14 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb) return 0; } -static int theora_decode_init(AVCodecContext *avctx) +static av_cold int theora_decode_init(AVCodecContext *avctx) { Vp3DecodeContext *s = avctx->priv_data; GetBitContext gb; int ptype; - uint8_t *p= avctx->extradata; - int op_bytes, i; + uint8_t *header_start[3]; + int header_len[3]; + int i; s->theora = 1; @@ -2585,15 +2189,16 @@ static int theora_decode_init(AVCodecContext *avctx) return -1; } - for(i=0;i<3;i++) { - op_bytes = *(p++)<<8; - op_bytes += *(p++); + if (ff_split_xiph_headers(avctx->extradata, avctx->extradata_size, + 42, header_start, header_len) < 0) { + av_log(avctx, AV_LOG_ERROR, "Corrupt extradata\n"); + return -1; + } - init_get_bits(&gb, p, op_bytes); - p += op_bytes; + for(i=0;i<3;i++) { + init_get_bits(&gb, header_start[i], header_len[i] * 8); ptype = get_bits(&gb, 8); - debug_vp3("Theora headerpacket type: %x\n", ptype); if (!(ptype & 0x80)) { @@ -2602,7 +2207,7 @@ static int theora_decode_init(AVCodecContext *avctx) } // FIXME: Check for this as well. - skip_bits(&gb, 6*8); /* "theora" */ + skip_bits_long(&gb, 6*8); /* "theora" */ switch(ptype) { @@ -2614,44 +2219,47 @@ static int theora_decode_init(AVCodecContext *avctx) // theora_decode_comments(avctx, gb); break; case 0x82: - theora_decode_tables(avctx, &gb); + if (theora_decode_tables(avctx, &gb)) + return -1; break; default: av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80); break; } - if(8*op_bytes != get_bits_count(&gb)) - av_log(avctx, AV_LOG_ERROR, "%d bits left in packet %X\n", 8*op_bytes - get_bits_count(&gb), ptype); + if(ptype != 0x81 && 8*header_len[i] != get_bits_count(&gb)) + av_log(avctx, AV_LOG_WARNING, "%d bits left in packet %X\n", 8*header_len[i] - get_bits_count(&gb), ptype); if (s->theora < 0x030200) break; } - vp3_decode_init(avctx); - return 0; + return vp3_decode_init(avctx); } -AVCodec vp3_decoder = { - "vp3", +AVCodec theora_decoder = { + "theora", CODEC_TYPE_VIDEO, - CODEC_ID_VP3, + CODEC_ID_THEORA, sizeof(Vp3DecodeContext), - vp3_decode_init, + theora_decode_init, NULL, vp3_decode_end, vp3_decode_frame, - 0, - NULL + CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND, + NULL, + .long_name = NULL_IF_CONFIG_SMALL("Theora"), }; +#endif -AVCodec theora_decoder = { - "theora", +AVCodec vp3_decoder = { + "vp3", CODEC_TYPE_VIDEO, - CODEC_ID_THEORA, + CODEC_ID_VP3, sizeof(Vp3DecodeContext), - theora_decode_init, + vp3_decode_init, NULL, vp3_decode_end, vp3_decode_frame, - 0, - NULL + CODEC_CAP_DR1 | CODEC_CAP_DRAW_HORIZ_BAND, + NULL, + .long_name = NULL_IF_CONFIG_SMALL("On2 VP3"), };