git.sesse.net Git - ffmpeg/blob - libavcodec/svq3.c

   1 /*
   2  * Copyright (c) 2003 The FFmpeg Project
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /*
  22  * How to use this decoder:
  23  * SVQ3 data is transported within Apple Quicktime files. Quicktime files
  24  * have stsd atoms to describe media trak properties. A stsd atom for a
  25  * video trak contains 1 or more ImageDescription atoms. These atoms begin
  26  * with the 4-byte length of the atom followed by the codec fourcc. Some
  27  * decoders need information in this atom to operate correctly. Such
  28  * is the case with SVQ3. In order to get the best use out of this decoder,
  29  * the calling app must make the SVQ3 ImageDescription atom available
  30  * via the AVCodecContext's extradata[_size] field:
  31  *
  32  * AVCodecContext.extradata = pointer to ImageDescription, first characters
  33  * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
  34  * AVCodecContext.extradata_size = size of ImageDescription atom memory
  35  * buffer (which will be the same as the ImageDescription atom size field
  36  * from the QT file, minus 4 bytes since the length is missing)
  37  *
  38  * You will know you have these parameters passed correctly when the decoder
  39  * correctly decodes this file:
  40  *  http://samples.mplayerhq.hu/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
  41  */
  42
  43 #include <inttypes.h>
  44
  45 #include "libavutil/attributes.h"
  46 #include "internal.h"
  47 #include "avcodec.h"
  48 #include "mpegutils.h"
  49 #include "h264.h"
  50
  51 #include "h264data.h" // FIXME FIXME FIXME
  52
  53 #include "h264_mvpred.h"
  54 #include "golomb.h"
  55 #include "hpeldsp.h"
  56 #include "rectangle.h"
  57 #include "tpeldsp.h"
  58 #include "vdpau_internal.h"
  59
  60 #if CONFIG_ZLIB
  61 #include <zlib.h>
  62 #endif
  63
  64 #include "svq1.h"
  65 #include "svq3.h"
  66
  67 /**
  68  * @file
  69  * svq3 decoder.
  70  */
  71
  72 typedef struct {
  73     H264Context h;
  74     HpelDSPContext hdsp;
  75     TpelDSPContext tdsp;
  76     H264Picture *cur_pic;
  77     H264Picture *next_pic;
  78     H264Picture *last_pic;
  79     int halfpel_flag;
  80     int thirdpel_flag;
  81     int unknown_flag;
  82     int next_slice_index;
  83     uint32_t watermark_key;
  84     uint8_t *buf;
  85     int buf_size;
  86     int adaptive_quant;
  87     int next_p_frame_damaged;
  88     int h_edge_pos;
  89     int v_edge_pos;
  90     int last_frame_output;
  91 } SVQ3Context;
  92
  93 #define FULLPEL_MODE  1
  94 #define HALFPEL_MODE  2
  95 #define THIRDPEL_MODE 3
  96 #define PREDICT_MODE  4
  97
  98 /* dual scan (from some older h264 draft)
  99  * o-->o-->o   o
 100  *         |  /|
 101  * o   o   o / o
 102  * | / |   |/  |
 103  * o   o   o   o
 104  *   /
 105  * o-->o-->o-->o
 106  */
 107 static const uint8_t svq3_scan[16] = {
 108     0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
 109     2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
 110     0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
 111     0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
 112 };
 113
 114 static const uint8_t luma_dc_zigzag_scan[16] = {
 115     0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
 116     3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
 117     1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
 118     3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
 119 };
 120
 121 static const uint8_t svq3_pred_0[25][2] = {
 122     { 0, 0 },
 123     { 1, 0 }, { 0, 1 },
 124     { 0, 2 }, { 1, 1 }, { 2, 0 },
 125     { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
 126     { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
 127     { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
 128     { 2, 4 }, { 3, 3 }, { 4, 2 },
 129     { 4, 3 }, { 3, 4 },
 130     { 4, 4 }
 131 };
 132
 133 static const int8_t svq3_pred_1[6][6][5] = {
 134     { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
 135       { 2,  1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
 136     { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  4,  3 }, { 0, 1,  2,  4,  3 },
 137       { 0,  2,  1,  4,  3 }, { 2, 0,  1,  3,  4 }, { 0, 4,  2,  1,  3 } },
 138     { { 2,  0, -1, -1, -1 }, { 2, 1,  0,  4,  3 }, { 1, 2,  4,  0,  3 },
 139       { 2,  1,  0,  4,  3 }, { 2, 1,  4,  3,  0 }, { 1, 2,  4,  0,  3 } },
 140     { { 2,  0, -1, -1, -1 }, { 2, 0,  1,  4,  3 }, { 1, 2,  0,  4,  3 },
 141       { 2,  1,  0,  4,  3 }, { 2, 1,  3,  4,  0 }, { 2, 4,  1,  0,  3 } },
 142     { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  3,  4 }, { 1, 2,  3,  0,  4 },
 143       { 2,  0,  1,  3,  4 }, { 2, 1,  3,  0,  4 }, { 2, 0,  4,  3,  1 } },
 144     { { 0,  2, -1, -1, -1 }, { 0, 2,  4,  1,  3 }, { 1, 4,  2,  0,  3 },
 145       { 4,  2,  0,  1,  3 }, { 2, 0,  1,  4,  3 }, { 4, 2,  1,  0,  3 } },
 146 };
 147
 148 static const struct {
 149     uint8_t run;
 150     uint8_t level;
 151 } svq3_dct_tables[2][16] = {
 152     { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
 153       { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
 154     { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
 155       { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
 156 };
 157
 158 static const uint32_t svq3_dequant_coeff[32] = {
 159      3881,  4351,  4890,  5481,   6154,   6914,   7761,   8718,
 160      9781, 10987, 12339, 13828,  15523,  17435,  19561,  21873,
 161     24552, 27656, 30847, 34870,  38807,  43747,  49103,  54683,
 162     61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
 163 };
 164
 165 static int svq3_decode_end(AVCodecContext *avctx);
 166
 167 void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
 168 {
 169     const int qmul = svq3_dequant_coeff[qp];
 170 #define stride 16
 171     int i;
 172     int temp[16];
 173     static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
 174
 175     for (i = 0; i < 4; i++) {
 176         const int z0 = 13 * (input[4 * i + 0] +      input[4 * i + 2]);
 177         const int z1 = 13 * (input[4 * i + 0] -      input[4 * i + 2]);
 178         const int z2 =  7 *  input[4 * i + 1] - 17 * input[4 * i + 3];
 179         const int z3 = 17 *  input[4 * i + 1] +  7 * input[4 * i + 3];
 180
 181         temp[4 * i + 0] = z0 + z3;
 182         temp[4 * i + 1] = z1 + z2;
 183         temp[4 * i + 2] = z1 - z2;
 184         temp[4 * i + 3] = z0 - z3;
 185     }
 186
 187     for (i = 0; i < 4; i++) {
 188         const int offset = x_offset[i];
 189         const int z0     = 13 * (temp[4 * 0 + i] +      temp[4 * 2 + i]);
 190         const int z1     = 13 * (temp[4 * 0 + i] -      temp[4 * 2 + i]);
 191         const int z2     =  7 *  temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
 192         const int z3     = 17 *  temp[4 * 1 + i] +  7 * temp[4 * 3 + i];
 193
 194         output[stride *  0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
 195         output[stride *  2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
 196         output[stride *  8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
 197         output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
 198     }
 199 }
 200 #undef stride
 201
 202 void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block,
 203                         int stride, int qp, int dc)
 204 {
 205     const int qmul = svq3_dequant_coeff[qp];
 206     int i;
 207
 208     if (dc) {
 209         dc       = 13 * 13 * (dc == 1 ? 1538 * block[0]
 210                                       : qmul * (block[0] >> 3) / 2);
 211         block[0] = 0;
 212     }
 213
 214     for (i = 0; i < 4; i++) {
 215         const int z0 = 13 * (block[0 + 4 * i] +      block[2 + 4 * i]);
 216         const int z1 = 13 * (block[0 + 4 * i] -      block[2 + 4 * i]);
 217         const int z2 =  7 *  block[1 + 4 * i] - 17 * block[3 + 4 * i];
 218         const int z3 = 17 *  block[1 + 4 * i] +  7 * block[3 + 4 * i];
 219
 220         block[0 + 4 * i] = z0 + z3;
 221         block[1 + 4 * i] = z1 + z2;
 222         block[2 + 4 * i] = z1 - z2;
 223         block[3 + 4 * i] = z0 - z3;
 224     }
 225
 226     for (i = 0; i < 4; i++) {
 227         const int z0 = 13 * (block[i + 4 * 0] +      block[i + 4 * 2]);
 228         const int z1 = 13 * (block[i + 4 * 0] -      block[i + 4 * 2]);
 229         const int z2 =  7 *  block[i + 4 * 1] - 17 * block[i + 4 * 3];
 230         const int z3 = 17 *  block[i + 4 * 1] +  7 * block[i + 4 * 3];
 231         const int rr = (dc + 0x80000);
 232
 233         dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
 234         dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
 235         dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
 236         dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
 237     }
 238
 239     memset(block, 0, 16 * sizeof(int16_t));
 240 }
 241
 242 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
 243                                     int index, const int type)
 244 {
 245     static const uint8_t *const scan_patterns[4] =
 246     { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
 247
 248     int run, level, sign, limit;
 249     unsigned vlc;
 250     const int intra           = 3 * type >> 2;
 251     const uint8_t *const scan = scan_patterns[type];
 252
 253     for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
 254         for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
 255             if ((int32_t)vlc < 0)
 256                 return -1;
 257
 258             sign     = (vlc & 1) ? 0 : -1;
 259             vlc      = vlc + 1 >> 1;
 260
 261             if (type == 3) {
 262                 if (vlc < 3) {
 263                     run   = 0;
 264                     level = vlc;
 265                 } else if (vlc < 4) {
 266                     run   = 1;
 267                     level = 1;
 268                 } else {
 269                     run   = vlc & 0x3;
 270                     level = (vlc + 9 >> 2) - run;
 271                 }
 272             } else {
 273                 if (vlc < 16U) {
 274                     run   = svq3_dct_tables[intra][vlc].run;
 275                     level = svq3_dct_tables[intra][vlc].level;
 276                 } else if (intra) {
 277                     run   = vlc & 0x7;
 278                     level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
 279                 } else {
 280                     run   = vlc & 0xF;
 281                     level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
 282                 }
 283             }
 284
 285
 286             if ((index += run) >= limit)
 287                 return -1;
 288
 289             block[scan[index]] = (level ^ sign) - sign;
 290         }
 291
 292         if (type != 2) {
 293             break;
 294         }
 295     }
 296
 297     return 0;
 298 }
 299
 300 static inline void svq3_mc_dir_part(SVQ3Context *s,
 301                                     int x, int y, int width, int height,
 302                                     int mx, int my, int dxy,
 303                                     int thirdpel, int dir, int avg)
 304 {
 305     H264Context *h = &s->h;
 306     const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
 307     uint8_t *src, *dest;
 308     int i, emu = 0;
 309     int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
 310
 311     mx += x;
 312     my += y;
 313
 314     if (mx < 0 || mx >= s->h_edge_pos - width  - 1 ||
 315         my < 0 || my >= s->v_edge_pos - height - 1) {
 316         emu = 1;
 317         mx = av_clip(mx, -16, s->h_edge_pos - width  + 15);
 318         my = av_clip(my, -16, s->v_edge_pos - height + 15);
 319     }
 320
 321     /* form component predictions */
 322     dest = h->cur_pic.f.data[0] + x + y * h->linesize;
 323     src  = pic->f.data[0] + mx + my * h->linesize;
 324
 325     if (emu) {
 326         h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
 327                                  h->linesize, h->linesize,
 328                                  width + 1, height + 1,
 329                                  mx, my, s->h_edge_pos, s->v_edge_pos);
 330         src = h->edge_emu_buffer;
 331     }
 332     if (thirdpel)
 333         (avg ? s->tdsp.avg_tpel_pixels_tab
 334              : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, h->linesize,
 335                                                  width, height);
 336     else
 337         (avg ? s->hdsp.avg_pixels_tab
 338              : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, h->linesize,
 339                                                        height);
 340
 341     if (!(h->flags & CODEC_FLAG_GRAY)) {
 342         mx     = mx + (mx < (int) x) >> 1;
 343         my     = my + (my < (int) y) >> 1;
 344         width  = width  >> 1;
 345         height = height >> 1;
 346         blocksize++;
 347
 348         for (i = 1; i < 3; i++) {
 349             dest = h->cur_pic.f.data[i] + (x >> 1) + (y >> 1) * h->uvlinesize;
 350             src  = pic->f.data[i] + mx + my * h->uvlinesize;
 351
 352             if (emu) {
 353                 h->vdsp.emulated_edge_mc(h->edge_emu_buffer, src,
 354                                          h->uvlinesize, h->uvlinesize,
 355                                          width + 1, height + 1,
 356                                          mx, my, (s->h_edge_pos >> 1),
 357                                          s->v_edge_pos >> 1);
 358                 src = h->edge_emu_buffer;
 359             }
 360             if (thirdpel)
 361                 (avg ? s->tdsp.avg_tpel_pixels_tab
 362                      : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
 363                                                          h->uvlinesize,
 364                                                          width, height);
 365             else
 366                 (avg ? s->hdsp.avg_pixels_tab
 367                      : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
 368                                                                h->uvlinesize,
 369                                                                height);
 370         }
 371     }
 372 }
 373
 374 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
 375                               int dir, int avg)
 376 {
 377     int i, j, k, mx, my, dx, dy, x, y;
 378     H264Context *h          = &s->h;
 379     const int part_width    = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
 380     const int part_height   = 16 >> ((unsigned)(size + 1) / 3);
 381     const int extra_width   = (mode == PREDICT_MODE) ? -16 * 6 : 0;
 382     const int h_edge_pos    = 6 * (s->h_edge_pos - part_width)  - extra_width;
 383     const int v_edge_pos    = 6 * (s->v_edge_pos - part_height) - extra_width;
 384
 385     for (i = 0; i < 16; i += part_height)
 386         for (j = 0; j < 16; j += part_width) {
 387             const int b_xy = (4 * h->mb_x + (j >> 2)) +
 388                              (4 * h->mb_y + (i >> 2)) * h->b_stride;
 389             int dxy;
 390             x = 16 * h->mb_x + j;
 391             y = 16 * h->mb_y + i;
 392             k = (j >> 2 & 1) + (i >> 1 & 2) +
 393                 (j >> 1 & 4) + (i      & 8);
 394
 395             if (mode != PREDICT_MODE) {
 396                 pred_motion(h, k, part_width >> 2, dir, 1, &mx, &my);
 397             } else {
 398                 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
 399                 my = s->next_pic->motion_val[0][b_xy][1] << 1;
 400
 401                 if (dir == 0) {
 402                     mx = mx * h->frame_num_offset /
 403                          h->prev_frame_num_offset + 1 >> 1;
 404                     my = my * h->frame_num_offset /
 405                          h->prev_frame_num_offset + 1 >> 1;
 406                 } else {
 407                     mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
 408                          h->prev_frame_num_offset + 1 >> 1;
 409                     my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
 410                          h->prev_frame_num_offset + 1 >> 1;
 411                 }
 412             }
 413
 414             /* clip motion vector prediction to frame border */
 415             mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
 416             my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
 417
 418             /* get (optional) motion vector differential */
 419             if (mode == PREDICT_MODE) {
 420                 dx = dy = 0;
 421             } else {
 422                 dy = svq3_get_se_golomb(&h->gb);
 423                 dx = svq3_get_se_golomb(&h->gb);
 424
 425                 if (dx == INVALID_VLC || dy == INVALID_VLC) {
 426                     av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
 427                     return -1;
 428                 }
 429             }
 430
 431             /* compute motion vector */
 432             if (mode == THIRDPEL_MODE) {
 433                 int fx, fy;
 434                 mx  = (mx + 1 >> 1) + dx;
 435                 my  = (my + 1 >> 1) + dy;
 436                 fx  = (unsigned)(mx + 0x3000) / 3 - 0x1000;
 437                 fy  = (unsigned)(my + 0x3000) / 3 - 0x1000;
 438                 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
 439
 440                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 441                                  fx, fy, dxy, 1, dir, avg);
 442                 mx += mx;
 443                 my += my;
 444             } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
 445                 mx  = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
 446                 my  = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
 447                 dxy = (mx & 1) + 2 * (my & 1);
 448
 449                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 450                                  mx >> 1, my >> 1, dxy, 0, dir, avg);
 451                 mx *= 3;
 452                 my *= 3;
 453             } else {
 454                 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
 455                 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
 456
 457                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 458                                  mx, my, 0, 0, dir, avg);
 459                 mx *= 6;
 460                 my *= 6;
 461             }
 462
 463             /* update mv_cache */
 464             if (mode != PREDICT_MODE) {
 465                 int32_t mv = pack16to32(mx, my);
 466
 467                 if (part_height == 8 && i < 8) {
 468                     AV_WN32A(h->mv_cache[dir][scan8[k] + 1 * 8], mv);
 469
 470                     if (part_width == 8 && j < 8)
 471                         AV_WN32A(h->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
 472                 }
 473                 if (part_width == 8 && j < 8)
 474                     AV_WN32A(h->mv_cache[dir][scan8[k] + 1], mv);
 475                 if (part_width == 4 || part_height == 4)
 476                     AV_WN32A(h->mv_cache[dir][scan8[k]], mv);
 477             }
 478
 479             /* write back motion vectors */
 480             fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
 481                            part_width >> 2, part_height >> 2, h->b_stride,
 482                            pack16to32(mx, my), 4);
 483         }
 484
 485     return 0;
 486 }
 487
 488 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
 489 {
 490     H264Context *h = &s->h;
 491     int i, j, k, m, dir, mode;
 492     int cbp = 0;
 493     uint32_t vlc;
 494     int8_t *top, *left;
 495     const int mb_xy         = h->mb_xy;
 496     const int b_xy          = 4 * h->mb_x + 4 * h->mb_y * h->b_stride;
 497
 498     h->top_samples_available      = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
 499     h->left_samples_available     = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
 500     h->topright_samples_available = 0xFFFF;
 501
 502     if (mb_type == 0) {           /* SKIP */
 503         if (h->pict_type == AV_PICTURE_TYPE_P ||
 504             s->next_pic->mb_type[mb_xy] == -1) {
 505             svq3_mc_dir_part(s, 16 * h->mb_x, 16 * h->mb_y, 16, 16,
 506                              0, 0, 0, 0, 0, 0);
 507
 508             if (h->pict_type == AV_PICTURE_TYPE_B)
 509                 svq3_mc_dir_part(s, 16 * h->mb_x, 16 * h->mb_y, 16, 16,
 510                                  0, 0, 0, 0, 1, 1);
 511
 512             mb_type = MB_TYPE_SKIP;
 513         } else {
 514             mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
 515             if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
 516                 return -1;
 517             if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
 518                 return -1;
 519
 520             mb_type = MB_TYPE_16x16;
 521         }
 522     } else if (mb_type < 8) {     /* INTER */
 523         if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
 524             mode = THIRDPEL_MODE;
 525         else if (s->halfpel_flag &&
 526                  s->thirdpel_flag == !get_bits1(&h->gb))
 527             mode = HALFPEL_MODE;
 528         else
 529             mode = FULLPEL_MODE;
 530
 531         /* fill caches */
 532         /* note ref_cache should contain here:
 533          *  ????????
 534          *  ???11111
 535          *  N??11111
 536          *  N??11111
 537          *  N??11111
 538          */
 539
 540         for (m = 0; m < 2; m++) {
 541             if (h->mb_x > 0 && h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
 542                 for (i = 0; i < 4; i++)
 543                     AV_COPY32(h->mv_cache[m][scan8[0] - 1 + i * 8],
 544                               h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
 545             } else {
 546                 for (i = 0; i < 4; i++)
 547                     AV_ZERO32(h->mv_cache[m][scan8[0] - 1 + i * 8]);
 548             }
 549             if (h->mb_y > 0) {
 550                 memcpy(h->mv_cache[m][scan8[0] - 1 * 8],
 551                        h->cur_pic.motion_val[m][b_xy - h->b_stride],
 552                        4 * 2 * sizeof(int16_t));
 553                 memset(&h->ref_cache[m][scan8[0] - 1 * 8],
 554                        (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
 555
 556                 if (h->mb_x < h->mb_width - 1) {
 557                     AV_COPY32(h->mv_cache[m][scan8[0] + 4 - 1 * 8],
 558                               h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
 559                     h->ref_cache[m][scan8[0] + 4 - 1 * 8] =
 560                         (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
 561                          h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
 562                 } else
 563                     h->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
 564                 if (h->mb_x > 0) {
 565                     AV_COPY32(h->mv_cache[m][scan8[0] - 1 - 1 * 8],
 566                               h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
 567                     h->ref_cache[m][scan8[0] - 1 - 1 * 8] =
 568                         (h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
 569                 } else
 570                     h->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
 571             } else
 572                 memset(&h->ref_cache[m][scan8[0] - 1 * 8 - 1],
 573                        PART_NOT_AVAILABLE, 8);
 574
 575             if (h->pict_type != AV_PICTURE_TYPE_B)
 576                 break;
 577         }
 578
 579         /* decode motion vector(s) and form prediction(s) */
 580         if (h->pict_type == AV_PICTURE_TYPE_P) {
 581             if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
 582                 return -1;
 583         } else {        /* AV_PICTURE_TYPE_B */
 584             if (mb_type != 2) {
 585                 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
 586                     return -1;
 587             } else {
 588                 for (i = 0; i < 4; i++)
 589                     memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
 590                            0, 4 * 2 * sizeof(int16_t));
 591             }
 592             if (mb_type != 1) {
 593                 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
 594                     return -1;
 595             } else {
 596                 for (i = 0; i < 4; i++)
 597                     memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
 598                            0, 4 * 2 * sizeof(int16_t));
 599             }
 600         }
 601
 602         mb_type = MB_TYPE_16x16;
 603     } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
 604         memset(h->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
 605
 606         if (mb_type == 8) {
 607             if (h->mb_x > 0) {
 608                 for (i = 0; i < 4; i++)
 609                     h->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
 610                 if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
 611                     h->left_samples_available = 0x5F5F;
 612             }
 613             if (h->mb_y > 0) {
 614                 h->intra4x4_pred_mode_cache[4 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
 615                 h->intra4x4_pred_mode_cache[5 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
 616                 h->intra4x4_pred_mode_cache[6 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
 617                 h->intra4x4_pred_mode_cache[7 + 8 * 0] = h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
 618
 619                 if (h->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
 620                     h->top_samples_available = 0x33FF;
 621             }
 622
 623             /* decode prediction codes for luma blocks */
 624             for (i = 0; i < 16; i += 2) {
 625                 vlc = svq3_get_ue_golomb(&h->gb);
 626
 627                 if (vlc >= 25U) {
 628                     av_log(h->avctx, AV_LOG_ERROR,
 629                            "luma prediction:%"PRIu32"\n", vlc);
 630                     return -1;
 631                 }
 632
 633                 left = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
 634                 top  = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
 635
 636                 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
 637                 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
 638
 639                 if (left[1] == -1 || left[2] == -1) {
 640                     av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
 641                     return -1;
 642                 }
 643             }
 644         } else {    /* mb_type == 33, DC_128_PRED block type */
 645             for (i = 0; i < 4; i++)
 646                 memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
 647         }
 648
 649         write_back_intra_pred_mode(h);
 650
 651         if (mb_type == 8) {
 652             ff_h264_check_intra4x4_pred_mode(h);
 653
 654             h->top_samples_available  = (h->mb_y == 0) ? 0x33FF : 0xFFFF;
 655             h->left_samples_available = (h->mb_x == 0) ? 0x5F5F : 0xFFFF;
 656         } else {
 657             for (i = 0; i < 4; i++)
 658                 memset(&h->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
 659
 660             h->top_samples_available  = 0x33FF;
 661             h->left_samples_available = 0x5F5F;
 662         }
 663
 664         mb_type = MB_TYPE_INTRA4x4;
 665     } else {                      /* INTRA16x16 */
 666         dir = i_mb_type_info[mb_type - 8].pred_mode;
 667         dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
 668
 669         if ((h->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, dir, 0)) < 0) {
 670             av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
 671             return h->intra16x16_pred_mode;
 672         }
 673
 674         cbp     = i_mb_type_info[mb_type - 8].cbp;
 675         mb_type = MB_TYPE_INTRA16x16;
 676     }
 677
 678     if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
 679         for (i = 0; i < 4; i++)
 680             memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
 681                    0, 4 * 2 * sizeof(int16_t));
 682         if (h->pict_type == AV_PICTURE_TYPE_B) {
 683             for (i = 0; i < 4; i++)
 684                 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
 685                        0, 4 * 2 * sizeof(int16_t));
 686         }
 687     }
 688     if (!IS_INTRA4x4(mb_type)) {
 689         memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
 690     }
 691     if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
 692         memset(h->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
 693     }
 694
 695     if (!IS_INTRA16x16(mb_type) &&
 696         (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
 697         if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48U){
 698             av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
 699             return -1;
 700         }
 701
 702         cbp = IS_INTRA(mb_type) ? golomb_to_intra4x4_cbp[vlc]
 703                                 : golomb_to_inter_cbp[vlc];
 704     }
 705     if (IS_INTRA16x16(mb_type) ||
 706         (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
 707         h->qscale += svq3_get_se_golomb(&h->gb);
 708
 709         if (h->qscale > 31u) {
 710             av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", h->qscale);
 711             return -1;
 712         }
 713     }
 714     if (IS_INTRA16x16(mb_type)) {
 715         AV_ZERO128(h->mb_luma_dc[0] + 0);
 716         AV_ZERO128(h->mb_luma_dc[0] + 8);
 717         if (svq3_decode_block(&h->gb, h->mb_luma_dc[0], 0, 1)) {
 718             av_log(h->avctx, AV_LOG_ERROR,
 719                    "error while decoding intra luma dc\n");
 720             return -1;
 721         }
 722     }
 723
 724     if (cbp) {
 725         const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
 726         const int type  = ((h->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
 727
 728         for (i = 0; i < 4; i++)
 729             if ((cbp & (1 << i))) {
 730                 for (j = 0; j < 4; j++) {
 731                     k = index ? (1 * (j & 1) + 2 * (i & 1) +
 732                                  2 * (j & 2) + 4 * (i & 2))
 733                               : (4 * i + j);
 734                     h->non_zero_count_cache[scan8[k]] = 1;
 735
 736                     if (svq3_decode_block(&h->gb, &h->mb[16 * k], index, type)) {
 737                         av_log(h->avctx, AV_LOG_ERROR,
 738                                "error while decoding block\n");
 739                         return -1;
 740                     }
 741                 }
 742             }
 743
 744         if ((cbp & 0x30)) {
 745             for (i = 1; i < 3; ++i)
 746                 if (svq3_decode_block(&h->gb, &h->mb[16 * 16 * i], 0, 3)) {
 747                     av_log(h->avctx, AV_LOG_ERROR,
 748                            "error while decoding chroma dc block\n");
 749                     return -1;
 750                 }
 751
 752             if ((cbp & 0x20)) {
 753                 for (i = 1; i < 3; i++) {
 754                     for (j = 0; j < 4; j++) {
 755                         k                                 = 16 * i + j;
 756                         h->non_zero_count_cache[scan8[k]] = 1;
 757
 758                         if (svq3_decode_block(&h->gb, &h->mb[16 * k], 1, 1)) {
 759                             av_log(h->avctx, AV_LOG_ERROR,
 760                                    "error while decoding chroma ac block\n");
 761                             return -1;
 762                         }
 763                     }
 764                 }
 765             }
 766         }
 767     }
 768
 769     h->cbp                              = cbp;
 770     h->cur_pic.mb_type[mb_xy] = mb_type;
 771
 772     if (IS_INTRA(mb_type))
 773         h->chroma_pred_mode = ff_h264_check_intra_pred_mode(h, DC_PRED8x8, 1);
 774
 775     return 0;
 776 }
 777
 778 static int svq3_decode_slice_header(AVCodecContext *avctx)
 779 {
 780     SVQ3Context *s = avctx->priv_data;
 781     H264Context *h    = &s->h;
 782     const int mb_xy   = h->mb_xy;
 783     int i, header;
 784     unsigned slice_id;
 785
 786     header = get_bits(&h->gb, 8);
 787
 788     if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
 789         /* TODO: what? */
 790         av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
 791         return -1;
 792     } else {
 793         int length = header >> 5 & 3;
 794
 795         s->next_slice_index = get_bits_count(&h->gb) +
 796                               8 * show_bits(&h->gb, 8 * length) +
 797                               8 * length;
 798
 799         if (s->next_slice_index > h->gb.size_in_bits) {
 800             av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
 801             return -1;
 802         }
 803
 804         h->gb.size_in_bits = s->next_slice_index - 8 * (length - 1);
 805         skip_bits(&h->gb, 8);
 806
 807         if (s->watermark_key) {
 808             uint32_t header = AV_RL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1]);
 809             AV_WL32(&h->gb.buffer[(get_bits_count(&h->gb) >> 3) + 1],
 810                     header ^ s->watermark_key);
 811         }
 812         if (length > 0) {
 813             memmove((uint8_t *) &h->gb.buffer[get_bits_count(&h->gb) >> 3],
 814                     &h->gb.buffer[h->gb.size_in_bits >> 3], length - 1);
 815         }
 816         skip_bits_long(&h->gb, 0);
 817     }
 818
 819     if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
 820         av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
 821         return -1;
 822     }
 823
 824     h->slice_type = golomb_to_pict_type[slice_id];
 825
 826     if ((header & 0x9F) == 2) {
 827         i              = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
 828         h->mb_skip_run = get_bits(&h->gb, i) -
 829                          (h->mb_y * h->mb_width + h->mb_x);
 830     } else {
 831         skip_bits1(&h->gb);
 832         h->mb_skip_run = 0;
 833     }
 834
 835     h->slice_num      = get_bits(&h->gb, 8);
 836     h->qscale         = get_bits(&h->gb, 5);
 837     s->adaptive_quant = get_bits1(&h->gb);
 838
 839     /* unknown fields */
 840     skip_bits1(&h->gb);
 841
 842     if (s->unknown_flag)
 843         skip_bits1(&h->gb);
 844
 845     skip_bits1(&h->gb);
 846     skip_bits(&h->gb, 2);
 847
 848     if (skip_1stop_8data_bits(&h->gb) < 0)
 849         return AVERROR_INVALIDDATA;
 850
 851     /* reset intra predictors and invalidate motion vector references */
 852     if (h->mb_x > 0) {
 853         memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
 854                -1, 4 * sizeof(int8_t));
 855         memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_x],
 856                -1, 8 * sizeof(int8_t) * h->mb_x);
 857     }
 858     if (h->mb_y > 0) {
 859         memset(h->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
 860                -1, 8 * sizeof(int8_t) * (h->mb_width - h->mb_x));
 861
 862         if (h->mb_x > 0)
 863             h->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
 864     }
 865
 866     return 0;
 867 }
 868
 869 static av_cold int svq3_decode_init(AVCodecContext *avctx)
 870 {
 871     SVQ3Context *s = avctx->priv_data;
 872     H264Context *h = &s->h;
 873     int m;
 874     unsigned char *extradata;
 875     unsigned char *extradata_end;
 876     unsigned int size;
 877     int marker_found = 0;
 878     int ret;
 879
 880     s->cur_pic  = av_mallocz(sizeof(*s->cur_pic));
 881     s->last_pic = av_mallocz(sizeof(*s->last_pic));
 882     s->next_pic = av_mallocz(sizeof(*s->next_pic));
 883     if (!s->next_pic || !s->last_pic || !s->cur_pic) {
 884         ret = AVERROR(ENOMEM);
 885         goto fail;
 886     }
 887
 888     if ((ret = ff_h264_decode_init(avctx)) < 0)
 889         goto fail;
 890
 891     ff_hpeldsp_init(&s->hdsp, avctx->flags);
 892     ff_tpeldsp_init(&s->tdsp);
 893
 894     h->flags           = avctx->flags;
 895     h->is_complex      = 1;
 896     h->sps.chroma_format_idc = 1;
 897     h->picture_structure = PICT_FRAME;
 898     avctx->pix_fmt     = AV_PIX_FMT_YUVJ420P;
 899     avctx->color_range = AVCOL_RANGE_JPEG;
 900
 901     h->chroma_qp[0] = h->chroma_qp[1] = 4;
 902     h->chroma_x_shift = h->chroma_y_shift = 1;
 903
 904     s->halfpel_flag  = 1;
 905     s->thirdpel_flag = 1;
 906     s->unknown_flag  = 0;
 907
 908     /* prowl for the "SEQH" marker in the extradata */
 909     extradata     = (unsigned char *)avctx->extradata;
 910     extradata_end = avctx->extradata + avctx->extradata_size;
 911     if (extradata) {
 912         for (m = 0; m + 8 < avctx->extradata_size; m++) {
 913             if (!memcmp(extradata, "SEQH", 4)) {
 914                 marker_found = 1;
 915                 break;
 916             }
 917             extradata++;
 918         }
 919     }
 920
 921     /* if a match was found, parse the extra data */
 922     if (marker_found) {
 923         GetBitContext gb;
 924         int frame_size_code;
 925
 926         size = AV_RB32(&extradata[4]);
 927         if (size > extradata_end - extradata - 8) {
 928             ret = AVERROR_INVALIDDATA;
 929             goto fail;
 930         }
 931         init_get_bits(&gb, extradata + 8, size * 8);
 932
 933         /* 'frame size code' and optional 'width, height' */
 934         frame_size_code = get_bits(&gb, 3);
 935         switch (frame_size_code) {
 936         case 0:
 937             avctx->width  = 160;
 938             avctx->height = 120;
 939             break;
 940         case 1:
 941             avctx->width  = 128;
 942             avctx->height =  96;
 943             break;
 944         case 2:
 945             avctx->width  = 176;
 946             avctx->height = 144;
 947             break;
 948         case 3:
 949             avctx->width  = 352;
 950             avctx->height = 288;
 951             break;
 952         case 4:
 953             avctx->width  = 704;
 954             avctx->height = 576;
 955             break;
 956         case 5:
 957             avctx->width  = 240;
 958             avctx->height = 180;
 959             break;
 960         case 6:
 961             avctx->width  = 320;
 962             avctx->height = 240;
 963             break;
 964         case 7:
 965             avctx->width  = get_bits(&gb, 12);
 966             avctx->height = get_bits(&gb, 12);
 967             break;
 968         }
 969
 970         s->halfpel_flag  = get_bits1(&gb);
 971         s->thirdpel_flag = get_bits1(&gb);
 972
 973         /* unknown fields */
 974         skip_bits1(&gb);
 975         skip_bits1(&gb);
 976         skip_bits1(&gb);
 977         skip_bits1(&gb);
 978
 979         h->low_delay = get_bits1(&gb);
 980
 981         /* unknown field */
 982         skip_bits1(&gb);
 983
 984         if (skip_1stop_8data_bits(&gb) < 0) {
 985             ret = AVERROR_INVALIDDATA;
 986             goto fail;
 987         }
 988
 989         s->unknown_flag  = get_bits1(&gb);
 990         avctx->has_b_frames = !h->low_delay;
 991         if (s->unknown_flag) {
 992 #if CONFIG_ZLIB
 993             unsigned watermark_width  = svq3_get_ue_golomb(&gb);
 994             unsigned watermark_height = svq3_get_ue_golomb(&gb);
 995             int u1                    = svq3_get_ue_golomb(&gb);
 996             int u2                    = get_bits(&gb, 8);
 997             int u3                    = get_bits(&gb, 2);
 998             int u4                    = svq3_get_ue_golomb(&gb);
 999             unsigned long buf_len     = watermark_width *
1000                                         watermark_height * 4;
1001             int offset                = get_bits_count(&gb) + 7 >> 3;
1002             uint8_t *buf;
1003
1004             if (watermark_height <= 0 ||
1005                 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height) {
1006                 ret = -1;
1007                 goto fail;
1008             }
1009
1010             buf = av_malloc(buf_len);
1011             av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1012                    watermark_width, watermark_height);
1013             av_log(avctx, AV_LOG_DEBUG,
1014                    "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1015                    u1, u2, u3, u4, offset);
1016             if (uncompress(buf, &buf_len, extradata + 8 + offset,
1017                            size - offset) != Z_OK) {
1018                 av_log(avctx, AV_LOG_ERROR,
1019                        "could not uncompress watermark logo\n");
1020                 av_free(buf);
1021                 ret = -1;
1022                 goto fail;
1023             }
1024             s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1025             s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1026             av_log(avctx, AV_LOG_DEBUG,
1027                    "watermark key %#"PRIx32"\n", s->watermark_key);
1028             av_free(buf);
1029 #else
1030             av_log(avctx, AV_LOG_ERROR,
1031                    "this svq3 file contains watermark which need zlib support compiled in\n");
1032             ret = -1;
1033             goto fail;
1034 #endif
1035         }
1036     }
1037
1038     h->width  = avctx->width;
1039     h->height = avctx->height;
1040     h->mb_width  = (h->width + 15) / 16;
1041     h->mb_height = (h->height + 15) / 16;
1042     h->mb_stride = h->mb_width + 1;
1043     h->mb_num    = h->mb_width * h->mb_height;
1044     h->b_stride = 4 * h->mb_width;
1045     s->h_edge_pos = h->mb_width * 16;
1046     s->v_edge_pos = h->mb_height * 16;
1047
1048     if ((ret = ff_h264_alloc_tables(h)) < 0) {
1049         av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1050         goto fail;
1051     }
1052
1053     return 0;
1054 fail:
1055     svq3_decode_end(avctx);
1056     return ret;
1057 }
1058
1059 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1060 {
1061     int i;
1062     for (i = 0; i < 2; i++) {
1063         av_buffer_unref(&pic->motion_val_buf[i]);
1064         av_buffer_unref(&pic->ref_index_buf[i]);
1065     }
1066     av_buffer_unref(&pic->mb_type_buf);
1067
1068     av_frame_unref(&pic->f);
1069 }
1070
1071 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1072 {
1073     SVQ3Context *s = avctx->priv_data;
1074     H264Context *h = &s->h;
1075     const int big_mb_num    = h->mb_stride * (h->mb_height + 1) + 1;
1076     const int mb_array_size = h->mb_stride * h->mb_height;
1077     const int b4_stride     = h->mb_width * 4 + 1;
1078     const int b4_array_size = b4_stride * h->mb_height * 4;
1079     int ret;
1080
1081     if (!pic->motion_val_buf[0]) {
1082         int i;
1083
1084         pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
1085         if (!pic->mb_type_buf)
1086             return AVERROR(ENOMEM);
1087         pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1088
1089         for (i = 0; i < 2; i++) {
1090             pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1091             pic->ref_index_buf[i]  = av_buffer_allocz(4 * mb_array_size);
1092             if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1093                 ret = AVERROR(ENOMEM);
1094                 goto fail;
1095             }
1096
1097             pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1098             pic->ref_index[i]  = pic->ref_index_buf[i]->data;
1099         }
1100     }
1101     pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);
1102
1103     ret = ff_get_buffer(avctx, &pic->f,
1104                         pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1105     if (ret < 0)
1106         goto fail;
1107
1108     if (!h->edge_emu_buffer) {
1109         h->edge_emu_buffer = av_mallocz(pic->f.linesize[0] * 17);
1110         if (!h->edge_emu_buffer)
1111             return AVERROR(ENOMEM);
1112     }
1113
1114     h->linesize   = pic->f.linesize[0];
1115     h->uvlinesize = pic->f.linesize[1];
1116
1117     return 0;
1118 fail:
1119     free_picture(avctx, pic);
1120     return ret;
1121 }
1122
1123 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1124                              int *got_frame, AVPacket *avpkt)
1125 {
1126     SVQ3Context *s     = avctx->priv_data;
1127     H264Context *h     = &s->h;
1128     int buf_size       = avpkt->size;
1129     int left;
1130     uint8_t *buf;
1131     int ret, m, i;
1132
1133     /* special case for last picture */
1134     if (buf_size == 0) {
1135         if (s->next_pic->f.data[0] && !h->low_delay && !s->last_frame_output) {
1136             ret = av_frame_ref(data, &s->next_pic->f);
1137             if (ret < 0)
1138                 return ret;
1139             s->last_frame_output = 1;
1140             *got_frame          = 1;
1141         }
1142         return 0;
1143     }
1144
1145     h->mb_x = h->mb_y = h->mb_xy = 0;
1146
1147     if (s->watermark_key) {
1148         av_fast_padded_malloc(&s->buf, &s->buf_size, buf_size);
1149         if (!s->buf)
1150             return AVERROR(ENOMEM);
1151         memcpy(s->buf, avpkt->data, buf_size);
1152         buf = s->buf;
1153     } else {
1154         buf = avpkt->data;
1155     }
1156
1157     init_get_bits(&h->gb, buf, 8 * buf_size);
1158
1159     if (svq3_decode_slice_header(avctx))
1160         return -1;
1161
1162     h->pict_type = h->slice_type;
1163
1164     if (h->pict_type != AV_PICTURE_TYPE_B)
1165         FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1166
1167     av_frame_unref(&s->cur_pic->f);
1168
1169     /* for skipping the frame */
1170     s->cur_pic->f.pict_type = h->pict_type;
1171     s->cur_pic->f.key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1172
1173     ret = get_buffer(avctx, s->cur_pic);
1174     if (ret < 0)
1175         return ret;
1176
1177     h->cur_pic_ptr = s->cur_pic;
1178     av_frame_unref(&h->cur_pic.f);
1179     memcpy(&h->cur_pic.tf, &s->cur_pic->tf, sizeof(h->cur_pic) - offsetof(H264Picture, tf));
1180     ret = av_frame_ref(&h->cur_pic.f, &s->cur_pic->f);
1181     if (ret < 0)
1182         return ret;
1183
1184     for (i = 0; i < 16; i++) {
1185         h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1186         h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * h->linesize * ((scan8[i] - scan8[0]) >> 3);
1187     }
1188     for (i = 0; i < 16; i++) {
1189         h->block_offset[16 + i]      =
1190         h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1191         h->block_offset[48 + 16 + i] =
1192         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * h->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1193     }
1194
1195     if (h->pict_type != AV_PICTURE_TYPE_I) {
1196         if (!s->last_pic->f.data[0]) {
1197             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1198             av_frame_unref(&s->last_pic->f);
1199             ret = get_buffer(avctx, s->last_pic);
1200             if (ret < 0)
1201                 return ret;
1202             memset(s->last_pic->f.data[0], 0, avctx->height * s->last_pic->f.linesize[0]);
1203             memset(s->last_pic->f.data[1], 0x80, (avctx->height / 2) *
1204                    s->last_pic->f.linesize[1]);
1205             memset(s->last_pic->f.data[2], 0x80, (avctx->height / 2) *
1206                    s->last_pic->f.linesize[2]);
1207         }
1208
1209         if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f.data[0]) {
1210             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1211             av_frame_unref(&s->next_pic->f);
1212             ret = get_buffer(avctx, s->next_pic);
1213             if (ret < 0)
1214                 return ret;
1215             memset(s->next_pic->f.data[0], 0, avctx->height * s->next_pic->f.linesize[0]);
1216             memset(s->next_pic->f.data[1], 0x80, (avctx->height / 2) *
1217                    s->next_pic->f.linesize[1]);
1218             memset(s->next_pic->f.data[2], 0x80, (avctx->height / 2) *
1219                    s->next_pic->f.linesize[2]);
1220         }
1221     }
1222
1223     if (avctx->debug & FF_DEBUG_PICT_INFO)
1224         av_log(h->avctx, AV_LOG_DEBUG,
1225                "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1226                av_get_picture_type_char(h->pict_type),
1227                s->halfpel_flag, s->thirdpel_flag,
1228                s->adaptive_quant, h->qscale, h->slice_num);
1229
1230     if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
1231         avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1232         avctx->skip_frame >= AVDISCARD_ALL)
1233         return 0;
1234
1235     if (s->next_p_frame_damaged) {
1236         if (h->pict_type == AV_PICTURE_TYPE_B)
1237             return 0;
1238         else
1239             s->next_p_frame_damaged = 0;
1240     }
1241
1242     if (h->pict_type == AV_PICTURE_TYPE_B) {
1243         h->frame_num_offset = h->slice_num - h->prev_frame_num;
1244
1245         if (h->frame_num_offset < 0)
1246             h->frame_num_offset += 256;
1247         if (h->frame_num_offset == 0 ||
1248             h->frame_num_offset >= h->prev_frame_num_offset) {
1249             av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1250             return -1;
1251         }
1252     } else {
1253         h->prev_frame_num        = h->frame_num;
1254         h->frame_num             = h->slice_num;
1255         h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1256
1257         if (h->prev_frame_num_offset < 0)
1258             h->prev_frame_num_offset += 256;
1259     }
1260
1261     for (m = 0; m < 2; m++) {
1262         int i;
1263         for (i = 0; i < 4; i++) {
1264             int j;
1265             for (j = -1; j < 4; j++)
1266                 h->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1267             if (i < 3)
1268                 h->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1269         }
1270     }
1271
1272     for (h->mb_y = 0; h->mb_y < h->mb_height; h->mb_y++) {
1273         for (h->mb_x = 0; h->mb_x < h->mb_width; h->mb_x++) {
1274             unsigned mb_type;
1275             h->mb_xy = h->mb_x + h->mb_y * h->mb_stride;
1276
1277             if ((get_bits_count(&h->gb) + 7) >= h->gb.size_in_bits &&
1278                 ((get_bits_count(&h->gb) & 7) == 0 ||
1279                  show_bits(&h->gb, -get_bits_count(&h->gb) & 7) == 0)) {
1280                 skip_bits(&h->gb, s->next_slice_index - get_bits_count(&h->gb));
1281                 h->gb.size_in_bits = 8 * buf_size;
1282
1283                 if (svq3_decode_slice_header(avctx))
1284                     return -1;
1285
1286                 /* TODO: support s->mb_skip_run */
1287             }
1288
1289             mb_type = svq3_get_ue_golomb(&h->gb);
1290
1291             if (h->pict_type == AV_PICTURE_TYPE_I)
1292                 mb_type += 8;
1293             else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1294                 mb_type += 4;
1295             if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1296                 av_log(h->avctx, AV_LOG_ERROR,
1297                        "error while decoding MB %d %d\n", h->mb_x, h->mb_y);
1298                 return -1;
1299             }
1300
1301             if (mb_type != 0 || h->cbp)
1302                 ff_h264_hl_decode_mb(h);
1303
1304             if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1305                 h->cur_pic.mb_type[h->mb_x + h->mb_y * h->mb_stride] =
1306                     (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1307         }
1308
1309         ff_draw_horiz_band(avctx, &s->cur_pic->f,
1310                            s->last_pic->f.data[0] ? &s->last_pic->f : NULL,
1311                            16 * h->mb_y, 16, h->picture_structure, 0,
1312                            h->low_delay);
1313     }
1314
1315     left = buf_size*8 - get_bits_count(&h->gb);
1316
1317     if (h->mb_y != h->mb_height || h->mb_x != h->mb_width) {
1318         av_log(avctx, AV_LOG_INFO, "frame num %d incomplete pic x %d y %d left %d\n", avctx->frame_number, h->mb_y, h->mb_x, left);
1319         //av_hex_dump(stderr, buf+buf_size-8, 8);
1320     }
1321
1322     if (left < 0) {
1323         av_log(avctx, AV_LOG_ERROR, "frame num %d left %d\n", avctx->frame_number, left);
1324         return -1;
1325     }
1326
1327     if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1328         ret = av_frame_ref(data, &s->cur_pic->f);
1329     else if (s->last_pic->f.data[0])
1330         ret = av_frame_ref(data, &s->last_pic->f);
1331     if (ret < 0)
1332         return ret;
1333
1334     /* Do not output the last pic after seeking. */
1335     if (s->last_pic->f.data[0] || h->low_delay)
1336         *got_frame = 1;
1337
1338     if (h->pict_type != AV_PICTURE_TYPE_B) {
1339         FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1340     } else {
1341         av_frame_unref(&s->cur_pic->f);
1342     }
1343
1344     return buf_size;
1345 }
1346
1347 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1348 {
1349     SVQ3Context *s = avctx->priv_data;
1350     H264Context *h = &s->h;
1351
1352     free_picture(avctx, s->cur_pic);
1353     free_picture(avctx, s->next_pic);
1354     free_picture(avctx, s->last_pic);
1355     av_freep(&s->cur_pic);
1356     av_freep(&s->next_pic);
1357     av_freep(&s->last_pic);
1358
1359     av_frame_unref(&h->cur_pic.f);
1360
1361     ff_h264_free_context(h);
1362
1363     av_freep(&s->buf);
1364     s->buf_size = 0;
1365     av_freep(&h->edge_emu_buffer);
1366
1367     return 0;
1368 }
1369
1370 AVCodec ff_svq3_decoder = {
1371     .name           = "svq3",
1372     .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1373     .type           = AVMEDIA_TYPE_VIDEO,
1374     .id             = AV_CODEC_ID_SVQ3,
1375     .priv_data_size = sizeof(SVQ3Context),
1376     .init           = svq3_decode_init,
1377     .close          = svq3_decode_end,
1378     .decode         = svq3_decode_frame,
1379     .capabilities   = CODEC_CAP_DRAW_HORIZ_BAND |
1380                       CODEC_CAP_DR1             |
1381                       CODEC_CAP_DELAY,
1382     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1383                                                      AV_PIX_FMT_NONE},
1384 };