git.sesse.net Git - ffmpeg/blob - libavcodec/svq3.c

   1 /*
   2  * Copyright (c) 2003 The Libav Project
   3  *
   4  * This file is part of Libav.
   5  *
   6  * Libav is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * Libav is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with Libav; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 /*
  22  * How to use this decoder:
  23  * SVQ3 data is transported within Apple Quicktime files. Quicktime files
  24  * have stsd atoms to describe media trak properties. A stsd atom for a
  25  * video trak contains 1 or more ImageDescription atoms. These atoms begin
  26  * with the 4-byte length of the atom followed by the codec fourcc. Some
  27  * decoders need information in this atom to operate correctly. Such
  28  * is the case with SVQ3. In order to get the best use out of this decoder,
  29  * the calling app must make the SVQ3 ImageDescription atom available
  30  * via the AVCodecContext's extradata[_size] field:
  31  *
  32  * AVCodecContext.extradata = pointer to ImageDescription, first characters
  33  * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
  34  * AVCodecContext.extradata_size = size of ImageDescription atom memory
  35  * buffer (which will be the same as the ImageDescription atom size field
  36  * from the QT file, minus 4 bytes since the length is missing)
  37  *
  38  * You will know you have these parameters passed correctly when the decoder
  39  * correctly decodes this file:
  40  *  http://samples.libav.org/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
  41  */
  42
  43 #include <inttypes.h>
  44
  45 #include "libavutil/attributes.h"
  46 #include "internal.h"
  47 #include "avcodec.h"
  48 #include "mpegutils.h"
  49 #include "h264.h"
  50 #include "h264_mvpred.h"
  51 #include "h264data.h"
  52 #include "golomb.h"
  53 #include "hpeldsp.h"
  54 #include "mathops.h"
  55 #include "rectangle.h"
  56 #include "tpeldsp.h"
  57
  58 #if CONFIG_ZLIB
  59 #include <zlib.h>
  60 #endif
  61
  62 #include "svq1.h"
  63 #include "svq3.h"
  64
  65 /**
  66  * @file
  67  * svq3 decoder.
  68  */
  69
  70 typedef struct SVQ3Context {
  71     H264Context h;
  72     HpelDSPContext hdsp;
  73     TpelDSPContext tdsp;
  74     H264Picture *cur_pic;
  75     H264Picture *next_pic;
  76     H264Picture *last_pic;
  77     GetBitContext gb;
  78     uint8_t *slice_buf;
  79     int slice_size;
  80     int halfpel_flag;
  81     int thirdpel_flag;
  82     int unknown_flag;
  83     uint32_t watermark_key;
  84     int adaptive_quant;
  85     int next_p_frame_damaged;
  86     int h_edge_pos;
  87     int v_edge_pos;
  88     int last_frame_output;
  89 } SVQ3Context;
  90
  91 #define FULLPEL_MODE  1
  92 #define HALFPEL_MODE  2
  93 #define THIRDPEL_MODE 3
  94 #define PREDICT_MODE  4
  95
  96 /* dual scan (from some older h264 draft)
  97  * o-->o-->o   o
  98  *         |  /|
  99  * o   o   o / o
 100  * | / |   |/  |
 101  * o   o   o   o
 102  *   /
 103  * o-->o-->o-->o
 104  */
 105 static const uint8_t svq3_scan[16] = {
 106     0 + 0 * 4, 1 + 0 * 4, 2 + 0 * 4, 2 + 1 * 4,
 107     2 + 2 * 4, 3 + 0 * 4, 3 + 1 * 4, 3 + 2 * 4,
 108     0 + 1 * 4, 0 + 2 * 4, 1 + 1 * 4, 1 + 2 * 4,
 109     0 + 3 * 4, 1 + 3 * 4, 2 + 3 * 4, 3 + 3 * 4,
 110 };
 111
 112 static const uint8_t luma_dc_zigzag_scan[16] = {
 113     0 * 16 + 0 * 64, 1 * 16 + 0 * 64, 2 * 16 + 0 * 64, 0 * 16 + 2 * 64,
 114     3 * 16 + 0 * 64, 0 * 16 + 1 * 64, 1 * 16 + 1 * 64, 2 * 16 + 1 * 64,
 115     1 * 16 + 2 * 64, 2 * 16 + 2 * 64, 3 * 16 + 2 * 64, 0 * 16 + 3 * 64,
 116     3 * 16 + 1 * 64, 1 * 16 + 3 * 64, 2 * 16 + 3 * 64, 3 * 16 + 3 * 64,
 117 };
 118
 119 static const uint8_t svq3_pred_0[25][2] = {
 120     { 0, 0 },
 121     { 1, 0 }, { 0, 1 },
 122     { 0, 2 }, { 1, 1 }, { 2, 0 },
 123     { 3, 0 }, { 2, 1 }, { 1, 2 }, { 0, 3 },
 124     { 0, 4 }, { 1, 3 }, { 2, 2 }, { 3, 1 }, { 4, 0 },
 125     { 4, 1 }, { 3, 2 }, { 2, 3 }, { 1, 4 },
 126     { 2, 4 }, { 3, 3 }, { 4, 2 },
 127     { 4, 3 }, { 3, 4 },
 128     { 4, 4 }
 129 };
 130
 131 static const int8_t svq3_pred_1[6][6][5] = {
 132     { { 2, -1, -1, -1, -1 }, { 2, 1, -1, -1, -1 }, { 1, 2, -1, -1, -1 },
 133       { 2,  1, -1, -1, -1 }, { 1, 2, -1, -1, -1 }, { 1, 2, -1, -1, -1 } },
 134     { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  4,  3 }, { 0, 1,  2,  4,  3 },
 135       { 0,  2,  1,  4,  3 }, { 2, 0,  1,  3,  4 }, { 0, 4,  2,  1,  3 } },
 136     { { 2,  0, -1, -1, -1 }, { 2, 1,  0,  4,  3 }, { 1, 2,  4,  0,  3 },
 137       { 2,  1,  0,  4,  3 }, { 2, 1,  4,  3,  0 }, { 1, 2,  4,  0,  3 } },
 138     { { 2,  0, -1, -1, -1 }, { 2, 0,  1,  4,  3 }, { 1, 2,  0,  4,  3 },
 139       { 2,  1,  0,  4,  3 }, { 2, 1,  3,  4,  0 }, { 2, 4,  1,  0,  3 } },
 140     { { 0,  2, -1, -1, -1 }, { 0, 2,  1,  3,  4 }, { 1, 2,  3,  0,  4 },
 141       { 2,  0,  1,  3,  4 }, { 2, 1,  3,  0,  4 }, { 2, 0,  4,  3,  1 } },
 142     { { 0,  2, -1, -1, -1 }, { 0, 2,  4,  1,  3 }, { 1, 4,  2,  0,  3 },
 143       { 4,  2,  0,  1,  3 }, { 2, 0,  1,  4,  3 }, { 4, 2,  1,  0,  3 } },
 144 };
 145
 146 static const struct {
 147     uint8_t run;
 148     uint8_t level;
 149 } svq3_dct_tables[2][16] = {
 150     { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 2, 1 }, { 0, 2 }, { 3, 1 }, { 4, 1 }, { 5, 1 },
 151       { 0, 3 }, { 1, 2 }, { 2, 2 }, { 6, 1 }, { 7, 1 }, { 8, 1 }, { 9, 1 }, { 0, 4 } },
 152     { { 0, 0 }, { 0, 1 }, { 1, 1 }, { 0, 2 }, { 2, 1 }, { 0, 3 }, { 0, 4 }, { 0, 5 },
 153       { 3, 1 }, { 4, 1 }, { 1, 2 }, { 1, 3 }, { 0, 6 }, { 0, 7 }, { 0, 8 }, { 0, 9 } }
 154 };
 155
 156 static const uint32_t svq3_dequant_coeff[32] = {
 157      3881,  4351,  4890,  5481,   6154,   6914,   7761,   8718,
 158      9781, 10987, 12339, 13828,  15523,  17435,  19561,  21873,
 159     24552, 27656, 30847, 34870,  38807,  43747,  49103,  54683,
 160     61694, 68745, 77615, 89113, 100253, 109366, 126635, 141533
 161 };
 162
 163 void ff_svq3_luma_dc_dequant_idct_c(int16_t *output, int16_t *input, int qp)
 164 {
 165     const int qmul = svq3_dequant_coeff[qp];
 166 #define stride 16
 167     int i;
 168     int temp[16];
 169     static const uint8_t x_offset[4] = { 0, 1 * stride, 4 * stride, 5 * stride };
 170
 171     for (i = 0; i < 4; i++) {
 172         const int z0 = 13 * (input[4 * i + 0] +      input[4 * i + 2]);
 173         const int z1 = 13 * (input[4 * i + 0] -      input[4 * i + 2]);
 174         const int z2 =  7 *  input[4 * i + 1] - 17 * input[4 * i + 3];
 175         const int z3 = 17 *  input[4 * i + 1] +  7 * input[4 * i + 3];
 176
 177         temp[4 * i + 0] = z0 + z3;
 178         temp[4 * i + 1] = z1 + z2;
 179         temp[4 * i + 2] = z1 - z2;
 180         temp[4 * i + 3] = z0 - z3;
 181     }
 182
 183     for (i = 0; i < 4; i++) {
 184         const int offset = x_offset[i];
 185         const int z0     = 13 * (temp[4 * 0 + i] +      temp[4 * 2 + i]);
 186         const int z1     = 13 * (temp[4 * 0 + i] -      temp[4 * 2 + i]);
 187         const int z2     =  7 *  temp[4 * 1 + i] - 17 * temp[4 * 3 + i];
 188         const int z3     = 17 *  temp[4 * 1 + i] +  7 * temp[4 * 3 + i];
 189
 190         output[stride *  0 + offset] = (z0 + z3) * qmul + 0x80000 >> 20;
 191         output[stride *  2 + offset] = (z1 + z2) * qmul + 0x80000 >> 20;
 192         output[stride *  8 + offset] = (z1 - z2) * qmul + 0x80000 >> 20;
 193         output[stride * 10 + offset] = (z0 - z3) * qmul + 0x80000 >> 20;
 194     }
 195 }
 196 #undef stride
 197
 198 void ff_svq3_add_idct_c(uint8_t *dst, int16_t *block,
 199                         int stride, int qp, int dc)
 200 {
 201     const int qmul = svq3_dequant_coeff[qp];
 202     int i;
 203
 204     if (dc) {
 205         dc       = 13 * 13 * (dc == 1 ? 1538 * block[0]
 206                                       : qmul * (block[0] >> 3) / 2);
 207         block[0] = 0;
 208     }
 209
 210     for (i = 0; i < 4; i++) {
 211         const int z0 = 13 * (block[0 + 4 * i] +      block[2 + 4 * i]);
 212         const int z1 = 13 * (block[0 + 4 * i] -      block[2 + 4 * i]);
 213         const int z2 =  7 *  block[1 + 4 * i] - 17 * block[3 + 4 * i];
 214         const int z3 = 17 *  block[1 + 4 * i] +  7 * block[3 + 4 * i];
 215
 216         block[0 + 4 * i] = z0 + z3;
 217         block[1 + 4 * i] = z1 + z2;
 218         block[2 + 4 * i] = z1 - z2;
 219         block[3 + 4 * i] = z0 - z3;
 220     }
 221
 222     for (i = 0; i < 4; i++) {
 223         const int z0 = 13 * (block[i + 4 * 0] +      block[i + 4 * 2]);
 224         const int z1 = 13 * (block[i + 4 * 0] -      block[i + 4 * 2]);
 225         const int z2 =  7 *  block[i + 4 * 1] - 17 * block[i + 4 * 3];
 226         const int z3 = 17 *  block[i + 4 * 1] +  7 * block[i + 4 * 3];
 227         const int rr = (dc + 0x80000);
 228
 229         dst[i + stride * 0] = av_clip_uint8(dst[i + stride * 0] + ((z0 + z3) * qmul + rr >> 20));
 230         dst[i + stride * 1] = av_clip_uint8(dst[i + stride * 1] + ((z1 + z2) * qmul + rr >> 20));
 231         dst[i + stride * 2] = av_clip_uint8(dst[i + stride * 2] + ((z1 - z2) * qmul + rr >> 20));
 232         dst[i + stride * 3] = av_clip_uint8(dst[i + stride * 3] + ((z0 - z3) * qmul + rr >> 20));
 233     }
 234
 235     memset(block, 0, 16 * sizeof(int16_t));
 236 }
 237
 238 static inline int svq3_decode_block(GetBitContext *gb, int16_t *block,
 239                                     int index, const int type)
 240 {
 241     static const uint8_t *const scan_patterns[4] = {
 242         luma_dc_zigzag_scan, ff_zigzag_scan, svq3_scan, ff_h264_chroma_dc_scan
 243     };
 244
 245     int run, level, limit;
 246     unsigned vlc;
 247     const int intra           = 3 * type >> 2;
 248     const uint8_t *const scan = scan_patterns[type];
 249
 250     for (limit = (16 >> intra); index < 16; index = limit, limit += 8) {
 251         for (; (vlc = svq3_get_ue_golomb(gb)) != 0; index++) {
 252             int sign = (vlc & 1) ? 0 : -1;
 253             vlc      = vlc + 1 >> 1;
 254
 255             if (type == 3) {
 256                 if (vlc < 3) {
 257                     run   = 0;
 258                     level = vlc;
 259                 } else if (vlc < 4) {
 260                     run   = 1;
 261                     level = 1;
 262                 } else {
 263                     run   = vlc & 0x3;
 264                     level = (vlc + 9 >> 2) - run;
 265                 }
 266             } else {
 267                 if (vlc < 16) {
 268                     run   = svq3_dct_tables[intra][vlc].run;
 269                     level = svq3_dct_tables[intra][vlc].level;
 270                 } else if (intra) {
 271                     run   = vlc & 0x7;
 272                     level = (vlc >> 3) +
 273                             ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
 274                 } else {
 275                     run   = vlc & 0xF;
 276                     level = (vlc >> 4) +
 277                             ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
 278                 }
 279             }
 280
 281             if ((index += run) >= limit)
 282                 return -1;
 283
 284             block[scan[index]] = (level ^ sign) - sign;
 285         }
 286
 287         if (type != 2) {
 288             break;
 289         }
 290     }
 291
 292     return 0;
 293 }
 294
 295 static inline void svq3_mc_dir_part(SVQ3Context *s,
 296                                     int x, int y, int width, int height,
 297                                     int mx, int my, int dxy,
 298                                     int thirdpel, int dir, int avg)
 299 {
 300     H264Context *h = &s->h;
 301     H264SliceContext *sl = &h->slice_ctx[0];
 302     const H264Picture *pic = (dir == 0) ? s->last_pic : s->next_pic;
 303     uint8_t *src, *dest;
 304     int i, emu = 0;
 305     int blocksize = 2 - (width >> 3); // 16->0, 8->1, 4->2
 306
 307     mx += x;
 308     my += y;
 309
 310     if (mx < 0 || mx >= s->h_edge_pos - width  - 1 ||
 311         my < 0 || my >= s->v_edge_pos - height - 1) {
 312         emu = 1;
 313         mx = av_clip(mx, -16, s->h_edge_pos - width  + 15);
 314         my = av_clip(my, -16, s->v_edge_pos - height + 15);
 315     }
 316
 317     /* form component predictions */
 318     dest = h->cur_pic.f->data[0] + x + y * sl->linesize;
 319     src  = pic->f->data[0] + mx + my * sl->linesize;
 320
 321     if (emu) {
 322         h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
 323                                  sl->linesize, sl->linesize,
 324                                  width + 1, height + 1,
 325                                  mx, my, s->h_edge_pos, s->v_edge_pos);
 326         src = sl->edge_emu_buffer;
 327     }
 328     if (thirdpel)
 329         (avg ? s->tdsp.avg_tpel_pixels_tab
 330              : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src, sl->linesize,
 331                                                  width, height);
 332     else
 333         (avg ? s->hdsp.avg_pixels_tab
 334              : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src, sl->linesize,
 335                                                        height);
 336
 337     if (!(h->flags & AV_CODEC_FLAG_GRAY)) {
 338         mx     = mx + (mx < (int) x) >> 1;
 339         my     = my + (my < (int) y) >> 1;
 340         width  = width  >> 1;
 341         height = height >> 1;
 342         blocksize++;
 343
 344         for (i = 1; i < 3; i++) {
 345             dest = h->cur_pic.f->data[i] + (x >> 1) + (y >> 1) * sl->uvlinesize;
 346             src  = pic->f->data[i] + mx + my * sl->uvlinesize;
 347
 348             if (emu) {
 349                 h->vdsp.emulated_edge_mc(sl->edge_emu_buffer, src,
 350                                          sl->uvlinesize, sl->uvlinesize,
 351                                          width + 1, height + 1,
 352                                          mx, my, (s->h_edge_pos >> 1),
 353                                          s->v_edge_pos >> 1);
 354                 src = sl->edge_emu_buffer;
 355             }
 356             if (thirdpel)
 357                 (avg ? s->tdsp.avg_tpel_pixels_tab
 358                      : s->tdsp.put_tpel_pixels_tab)[dxy](dest, src,
 359                                                          sl->uvlinesize,
 360                                                          width, height);
 361             else
 362                 (avg ? s->hdsp.avg_pixels_tab
 363                      : s->hdsp.put_pixels_tab)[blocksize][dxy](dest, src,
 364                                                                sl->uvlinesize,
 365                                                                height);
 366         }
 367     }
 368 }
 369
 370 static inline int svq3_mc_dir(SVQ3Context *s, int size, int mode,
 371                               int dir, int avg)
 372 {
 373     int i, j, k, mx, my, dx, dy, x, y;
 374     H264Context *h          = &s->h;
 375     H264SliceContext *sl    = &h->slice_ctx[0];
 376     const int part_width    = ((size & 5) == 4) ? 4 : 16 >> (size & 1);
 377     const int part_height   = 16 >> ((unsigned)(size + 1) / 3);
 378     const int extra_width   = (mode == PREDICT_MODE) ? -16 * 6 : 0;
 379     const int h_edge_pos    = 6 * (s->h_edge_pos - part_width)  - extra_width;
 380     const int v_edge_pos    = 6 * (s->v_edge_pos - part_height) - extra_width;
 381
 382     for (i = 0; i < 16; i += part_height)
 383         for (j = 0; j < 16; j += part_width) {
 384             const int b_xy = (4 * sl->mb_x + (j >> 2)) +
 385                              (4 * sl->mb_y + (i >> 2)) * h->b_stride;
 386             int dxy;
 387             x = 16 * sl->mb_x + j;
 388             y = 16 * sl->mb_y + i;
 389             k = (j >> 2 & 1) + (i >> 1 & 2) +
 390                 (j >> 1 & 4) + (i      & 8);
 391
 392             if (mode != PREDICT_MODE) {
 393                 pred_motion(h, sl, k, part_width >> 2, dir, 1, &mx, &my);
 394             } else {
 395                 mx = s->next_pic->motion_val[0][b_xy][0] << 1;
 396                 my = s->next_pic->motion_val[0][b_xy][1] << 1;
 397
 398                 if (dir == 0) {
 399                     mx = mx * h->frame_num_offset /
 400                          h->prev_frame_num_offset + 1 >> 1;
 401                     my = my * h->frame_num_offset /
 402                          h->prev_frame_num_offset + 1 >> 1;
 403                 } else {
 404                     mx = mx * (h->frame_num_offset - h->prev_frame_num_offset) /
 405                          h->prev_frame_num_offset + 1 >> 1;
 406                     my = my * (h->frame_num_offset - h->prev_frame_num_offset) /
 407                          h->prev_frame_num_offset + 1 >> 1;
 408                 }
 409             }
 410
 411             /* clip motion vector prediction to frame border */
 412             mx = av_clip(mx, extra_width - 6 * x, h_edge_pos - 6 * x);
 413             my = av_clip(my, extra_width - 6 * y, v_edge_pos - 6 * y);
 414
 415             /* get (optional) motion vector differential */
 416             if (mode == PREDICT_MODE) {
 417                 dx = dy = 0;
 418             } else {
 419                 dy = svq3_get_se_golomb(&h->gb);
 420                 dx = svq3_get_se_golomb(&h->gb);
 421
 422                 if (dx == INVALID_VLC || dy == INVALID_VLC) {
 423                     av_log(h->avctx, AV_LOG_ERROR, "invalid MV vlc\n");
 424                     return -1;
 425                 }
 426             }
 427
 428             /* compute motion vector */
 429             if (mode == THIRDPEL_MODE) {
 430                 int fx, fy;
 431                 mx  = (mx + 1 >> 1) + dx;
 432                 my  = (my + 1 >> 1) + dy;
 433                 fx  = (unsigned)(mx + 0x3000) / 3 - 0x1000;
 434                 fy  = (unsigned)(my + 0x3000) / 3 - 0x1000;
 435                 dxy = (mx - 3 * fx) + 4 * (my - 3 * fy);
 436
 437                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 438                                  fx, fy, dxy, 1, dir, avg);
 439                 mx += mx;
 440                 my += my;
 441             } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
 442                 mx  = (unsigned)(mx + 1 + 0x3000) / 3 + dx - 0x1000;
 443                 my  = (unsigned)(my + 1 + 0x3000) / 3 + dy - 0x1000;
 444                 dxy = (mx & 1) + 2 * (my & 1);
 445
 446                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 447                                  mx >> 1, my >> 1, dxy, 0, dir, avg);
 448                 mx *= 3;
 449                 my *= 3;
 450             } else {
 451                 mx = (unsigned)(mx + 3 + 0x6000) / 6 + dx - 0x1000;
 452                 my = (unsigned)(my + 3 + 0x6000) / 6 + dy - 0x1000;
 453
 454                 svq3_mc_dir_part(s, x, y, part_width, part_height,
 455                                  mx, my, 0, 0, dir, avg);
 456                 mx *= 6;
 457                 my *= 6;
 458             }
 459
 460             /* update mv_cache */
 461             if (mode != PREDICT_MODE) {
 462                 int32_t mv = pack16to32(mx, my);
 463
 464                 if (part_height == 8 && i < 8) {
 465                     AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 * 8], mv);
 466
 467                     if (part_width == 8 && j < 8)
 468                         AV_WN32A(sl->mv_cache[dir][scan8[k] + 1 + 1 * 8], mv);
 469                 }
 470                 if (part_width == 8 && j < 8)
 471                     AV_WN32A(sl->mv_cache[dir][scan8[k] + 1], mv);
 472                 if (part_width == 4 || part_height == 4)
 473                     AV_WN32A(sl->mv_cache[dir][scan8[k]], mv);
 474             }
 475
 476             /* write back motion vectors */
 477             fill_rectangle(h->cur_pic.motion_val[dir][b_xy],
 478                            part_width >> 2, part_height >> 2, h->b_stride,
 479                            pack16to32(mx, my), 4);
 480         }
 481
 482     return 0;
 483 }
 484
 485 static av_always_inline void hl_decode_mb_idct_luma(const H264Context *h, H264SliceContext *sl,
 486                                                     int mb_type, const int *block_offset,
 487                                                     int linesize, uint8_t *dest_y)
 488 {
 489     int i;
 490     if (!IS_INTRA4x4(mb_type)) {
 491         for (i = 0; i < 16; i++)
 492             if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
 493                 uint8_t *const ptr = dest_y + block_offset[i];
 494                 ff_svq3_add_idct_c(ptr, sl->mb + i * 16, linesize,
 495                                    sl->qscale, IS_INTRA(mb_type) ? 1 : 0);
 496             }
 497     }
 498 }
 499
 500 static av_always_inline int dctcoef_get(int16_t *mb, int index)
 501 {
 502     return AV_RN16A(mb + index);
 503 }
 504
 505 static av_always_inline void hl_decode_mb_predict_luma(const H264Context *h,
 506                                                        H264SliceContext *sl,
 507                                                        int mb_type,
 508                                                        const int *block_offset,
 509                                                        int linesize,
 510                                                        uint8_t *dest_y)
 511 {
 512     int i;
 513     int qscale = sl->qscale;
 514
 515     if (IS_INTRA4x4(mb_type)) {
 516         for (i = 0; i < 16; i++) {
 517             uint8_t *const ptr = dest_y + block_offset[i];
 518             const int dir      = sl->intra4x4_pred_mode_cache[scan8[i]];
 519
 520             uint8_t *topright;
 521             int nnz, tr;
 522             if (dir == DIAG_DOWN_LEFT_PRED || dir == VERT_LEFT_PRED) {
 523                 const int topright_avail = (sl->topright_samples_available << i) & 0x8000;
 524                 assert(sl->mb_y || linesize <= block_offset[i]);
 525                 if (!topright_avail) {
 526                     tr       = ptr[3 - linesize] * 0x01010101u;
 527                     topright = (uint8_t *)&tr;
 528                 } else
 529                     topright = ptr + 4 - linesize;
 530             } else
 531                 topright = NULL;
 532
 533             h->hpc.pred4x4[dir](ptr, topright, linesize);
 534             nnz = sl->non_zero_count_cache[scan8[i]];
 535             if (nnz) {
 536                 ff_svq3_add_idct_c(ptr, sl->mb + i * 16, linesize, qscale, 0);
 537             }
 538         }
 539     } else {
 540         h->hpc.pred16x16[sl->intra16x16_pred_mode](dest_y, linesize);
 541         ff_svq3_luma_dc_dequant_idct_c(sl->mb,
 542                                        sl->mb_luma_dc[0], qscale);
 543     }
 544 }
 545
 546 static void hl_decode_mb(const H264Context *h, H264SliceContext *sl)
 547 {
 548     const int mb_x    = sl->mb_x;
 549     const int mb_y    = sl->mb_y;
 550     const int mb_xy   = sl->mb_xy;
 551     const int mb_type = h->cur_pic.mb_type[mb_xy];
 552     uint8_t *dest_y, *dest_cb, *dest_cr;
 553     int linesize, uvlinesize;
 554     int i, j;
 555     const int *block_offset = &h->block_offset[0];
 556     const int block_h   = 16 >> h->chroma_y_shift;
 557
 558     dest_y  = h->cur_pic.f->data[0] + (mb_x     + mb_y * sl->linesize)  * 16;
 559     dest_cb = h->cur_pic.f->data[1] +  mb_x * 8 + mb_y * sl->uvlinesize * block_h;
 560     dest_cr = h->cur_pic.f->data[2] +  mb_x * 8 + mb_y * sl->uvlinesize * block_h;
 561
 562     h->vdsp.prefetch(dest_y  + (sl->mb_x & 3) * 4 * sl->linesize   + 64, sl->linesize,      4);
 563     h->vdsp.prefetch(dest_cb + (sl->mb_x & 7)     * sl->uvlinesize + 64, dest_cr - dest_cb, 2);
 564
 565     h->list_counts[mb_xy] = sl->list_count;
 566
 567     linesize   = sl->mb_linesize   = sl->linesize;
 568     uvlinesize = sl->mb_uvlinesize = sl->uvlinesize;
 569
 570     if (IS_INTRA(mb_type)) {
 571         h->hpc.pred8x8[sl->chroma_pred_mode](dest_cb, uvlinesize);
 572         h->hpc.pred8x8[sl->chroma_pred_mode](dest_cr, uvlinesize);
 573
 574         hl_decode_mb_predict_luma(h, sl, mb_type, block_offset, linesize, dest_y);
 575     }
 576
 577     hl_decode_mb_idct_luma(h, sl, mb_type, block_offset, linesize, dest_y);
 578
 579     if (sl->cbp & 0x30) {
 580         uint8_t *dest[2] = { dest_cb, dest_cr };
 581         h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 1,
 582                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 1 : 4][sl->chroma_qp[0]][0]);
 583         h->h264dsp.h264_chroma_dc_dequant_idct(sl->mb + 16 * 16 * 2,
 584                                                h->dequant4_coeff[IS_INTRA(mb_type) ? 2 : 5][sl->chroma_qp[1]][0]);
 585         for (j = 1; j < 3; j++) {
 586             for (i = j * 16; i < j * 16 + 4; i++)
 587                 if (sl->non_zero_count_cache[scan8[i]] || sl->mb[i * 16]) {
 588                     uint8_t *const ptr = dest[j - 1] + block_offset[i];
 589                     ff_svq3_add_idct_c(ptr, sl->mb + i * 16,
 590                                        uvlinesize,
 591                                        ff_h264_chroma_qp[0][sl->qscale + 12] - 12, 2);
 592                 }
 593         }
 594     }
 595 }
 596
 597 static int svq3_decode_mb(SVQ3Context *s, unsigned int mb_type)
 598 {
 599     H264Context *h = &s->h;
 600     H264SliceContext *sl = &h->slice_ctx[0];
 601     int i, j, k, m, dir, mode;
 602     int cbp = 0;
 603     uint32_t vlc;
 604     int8_t *top, *left;
 605     const int mb_xy         = sl->mb_xy;
 606     const int b_xy          = 4 * sl->mb_x + 4 * sl->mb_y * h->b_stride;
 607
 608     sl->top_samples_available      = (sl->mb_y == 0) ? 0x33FF : 0xFFFF;
 609     sl->left_samples_available     = (sl->mb_x == 0) ? 0x5F5F : 0xFFFF;
 610     sl->topright_samples_available = 0xFFFF;
 611
 612     if (mb_type == 0) {           /* SKIP */
 613         if (h->pict_type == AV_PICTURE_TYPE_P ||
 614             s->next_pic->mb_type[mb_xy] == -1) {
 615             svq3_mc_dir_part(s, 16 * sl->mb_x, 16 * sl->mb_y, 16, 16,
 616                              0, 0, 0, 0, 0, 0);
 617
 618             if (h->pict_type == AV_PICTURE_TYPE_B)
 619                 svq3_mc_dir_part(s, 16 * sl->mb_x, 16 * sl->mb_y, 16, 16,
 620                                  0, 0, 0, 0, 1, 1);
 621
 622             mb_type = MB_TYPE_SKIP;
 623         } else {
 624             mb_type = FFMIN(s->next_pic->mb_type[mb_xy], 6);
 625             if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 0, 0) < 0)
 626                 return -1;
 627             if (svq3_mc_dir(s, mb_type, PREDICT_MODE, 1, 1) < 0)
 628                 return -1;
 629
 630             mb_type = MB_TYPE_16x16;
 631         }
 632     } else if (mb_type < 8) {     /* INTER */
 633         if (s->thirdpel_flag && s->halfpel_flag == !get_bits1(&h->gb))
 634             mode = THIRDPEL_MODE;
 635         else if (s->halfpel_flag &&
 636                  s->thirdpel_flag == !get_bits1(&h->gb))
 637             mode = HALFPEL_MODE;
 638         else
 639             mode = FULLPEL_MODE;
 640
 641         /* fill caches */
 642         /* note ref_cache should contain here:
 643          *  ????????
 644          *  ???11111
 645          *  N??11111
 646          *  N??11111
 647          *  N??11111
 648          */
 649
 650         for (m = 0; m < 2; m++) {
 651             if (sl->mb_x > 0 && sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6] != -1) {
 652                 for (i = 0; i < 4; i++)
 653                     AV_COPY32(sl->mv_cache[m][scan8[0] - 1 + i * 8],
 654                               h->cur_pic.motion_val[m][b_xy - 1 + i * h->b_stride]);
 655             } else {
 656                 for (i = 0; i < 4; i++)
 657                     AV_ZERO32(sl->mv_cache[m][scan8[0] - 1 + i * 8]);
 658             }
 659             if (sl->mb_y > 0) {
 660                 memcpy(sl->mv_cache[m][scan8[0] - 1 * 8],
 661                        h->cur_pic.motion_val[m][b_xy - h->b_stride],
 662                        4 * 2 * sizeof(int16_t));
 663                 memset(&sl->ref_cache[m][scan8[0] - 1 * 8],
 664                        (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1, 4);
 665
 666                 if (sl->mb_x < h->mb_width - 1) {
 667                     AV_COPY32(sl->mv_cache[m][scan8[0] + 4 - 1 * 8],
 668                               h->cur_pic.motion_val[m][b_xy - h->b_stride + 4]);
 669                     sl->ref_cache[m][scan8[0] + 4 - 1 * 8] =
 670                         (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride + 1] + 6] == -1 ||
 671                          sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride]] == -1) ? PART_NOT_AVAILABLE : 1;
 672                 } else
 673                     sl->ref_cache[m][scan8[0] + 4 - 1 * 8] = PART_NOT_AVAILABLE;
 674                 if (sl->mb_x > 0) {
 675                     AV_COPY32(sl->mv_cache[m][scan8[0] - 1 - 1 * 8],
 676                               h->cur_pic.motion_val[m][b_xy - h->b_stride - 1]);
 677                     sl->ref_cache[m][scan8[0] - 1 - 1 * 8] =
 678                         (sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] == -1) ? PART_NOT_AVAILABLE : 1;
 679                 } else
 680                     sl->ref_cache[m][scan8[0] - 1 - 1 * 8] = PART_NOT_AVAILABLE;
 681             } else
 682                 memset(&sl->ref_cache[m][scan8[0] - 1 * 8 - 1],
 683                        PART_NOT_AVAILABLE, 8);
 684
 685             if (h->pict_type != AV_PICTURE_TYPE_B)
 686                 break;
 687         }
 688
 689         /* decode motion vector(s) and form prediction(s) */
 690         if (h->pict_type == AV_PICTURE_TYPE_P) {
 691             if (svq3_mc_dir(s, mb_type - 1, mode, 0, 0) < 0)
 692                 return -1;
 693         } else {        /* AV_PICTURE_TYPE_B */
 694             if (mb_type != 2) {
 695                 if (svq3_mc_dir(s, 0, mode, 0, 0) < 0)
 696                     return -1;
 697             } else {
 698                 for (i = 0; i < 4; i++)
 699                     memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
 700                            0, 4 * 2 * sizeof(int16_t));
 701             }
 702             if (mb_type != 1) {
 703                 if (svq3_mc_dir(s, 0, mode, 1, mb_type == 3) < 0)
 704                     return -1;
 705             } else {
 706                 for (i = 0; i < 4; i++)
 707                     memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
 708                            0, 4 * 2 * sizeof(int16_t));
 709             }
 710         }
 711
 712         mb_type = MB_TYPE_16x16;
 713     } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
 714         memset(sl->intra4x4_pred_mode_cache, -1, 8 * 5 * sizeof(int8_t));
 715
 716         if (mb_type == 8) {
 717             if (sl->mb_x > 0) {
 718                 for (i = 0; i < 4; i++)
 719                     sl->intra4x4_pred_mode_cache[scan8[0] - 1 + i * 8] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - 1] + 6 - i];
 720                 if (sl->intra4x4_pred_mode_cache[scan8[0] - 1] == -1)
 721                     sl->left_samples_available = 0x5F5F;
 722             }
 723             if (sl->mb_y > 0) {
 724                 sl->intra4x4_pred_mode_cache[4 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 0];
 725                 sl->intra4x4_pred_mode_cache[5 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 1];
 726                 sl->intra4x4_pred_mode_cache[6 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 2];
 727                 sl->intra4x4_pred_mode_cache[7 + 8 * 0] = sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride] + 3];
 728
 729                 if (sl->intra4x4_pred_mode_cache[4 + 8 * 0] == -1)
 730                     sl->top_samples_available = 0x33FF;
 731             }
 732
 733             /* decode prediction codes for luma blocks */
 734             for (i = 0; i < 16; i += 2) {
 735                 vlc = svq3_get_ue_golomb(&h->gb);
 736
 737                 if (vlc >= 25) {
 738                     av_log(h->avctx, AV_LOG_ERROR,
 739                            "luma prediction:%"PRIu32"\n", vlc);
 740                     return -1;
 741                 }
 742
 743                 left = &sl->intra4x4_pred_mode_cache[scan8[i] - 1];
 744                 top  = &sl->intra4x4_pred_mode_cache[scan8[i] - 8];
 745
 746                 left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
 747                 left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
 748
 749                 if (left[1] == -1 || left[2] == -1) {
 750                     av_log(h->avctx, AV_LOG_ERROR, "weird prediction\n");
 751                     return -1;
 752                 }
 753             }
 754         } else {    /* mb_type == 33, DC_128_PRED block type */
 755             for (i = 0; i < 4; i++)
 756                 memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_PRED, 4);
 757         }
 758
 759         write_back_intra_pred_mode(h, sl);
 760
 761         if (mb_type == 8) {
 762             ff_h264_check_intra4x4_pred_mode(h, sl);
 763
 764             sl->top_samples_available  = (sl->mb_y == 0) ? 0x33FF : 0xFFFF;
 765             sl->left_samples_available = (sl->mb_x == 0) ? 0x5F5F : 0xFFFF;
 766         } else {
 767             for (i = 0; i < 4; i++)
 768                 memset(&sl->intra4x4_pred_mode_cache[scan8[0] + 8 * i], DC_128_PRED, 4);
 769
 770             sl->top_samples_available  = 0x33FF;
 771             sl->left_samples_available = 0x5F5F;
 772         }
 773
 774         mb_type = MB_TYPE_INTRA4x4;
 775     } else {                      /* INTRA16x16 */
 776         dir = ff_h264_i_mb_type_info[mb_type - 8].pred_mode;
 777         dir = (dir >> 1) ^ 3 * (dir & 1) ^ 1;
 778
 779         if ((sl->intra16x16_pred_mode = ff_h264_check_intra_pred_mode(h, sl, dir, 0)) < 0) {
 780             av_log(h->avctx, AV_LOG_ERROR, "ff_h264_check_intra_pred_mode < 0\n");
 781             return sl->intra16x16_pred_mode;
 782         }
 783
 784         cbp     = ff_h264_i_mb_type_info[mb_type - 8].cbp;
 785         mb_type = MB_TYPE_INTRA16x16;
 786     }
 787
 788     if (!IS_INTER(mb_type) && h->pict_type != AV_PICTURE_TYPE_I) {
 789         for (i = 0; i < 4; i++)
 790             memset(h->cur_pic.motion_val[0][b_xy + i * h->b_stride],
 791                    0, 4 * 2 * sizeof(int16_t));
 792         if (h->pict_type == AV_PICTURE_TYPE_B) {
 793             for (i = 0; i < 4; i++)
 794                 memset(h->cur_pic.motion_val[1][b_xy + i * h->b_stride],
 795                        0, 4 * 2 * sizeof(int16_t));
 796         }
 797     }
 798     if (!IS_INTRA4x4(mb_type)) {
 799         memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy], DC_PRED, 8);
 800     }
 801     if (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B) {
 802         memset(sl->non_zero_count_cache + 8, 0, 14 * 8 * sizeof(uint8_t));
 803     }
 804
 805     if (!IS_INTRA16x16(mb_type) &&
 806         (!IS_SKIP(mb_type) || h->pict_type == AV_PICTURE_TYPE_B)) {
 807         if ((vlc = svq3_get_ue_golomb(&h->gb)) >= 48) {
 808             av_log(h->avctx, AV_LOG_ERROR, "cbp_vlc=%"PRIu32"\n", vlc);
 809             return -1;
 810         }
 811
 812         cbp = IS_INTRA(mb_type) ? ff_h264_golomb_to_intra4x4_cbp[vlc]
 813                                 : ff_h264_golomb_to_inter_cbp[vlc];
 814     }
 815     if (IS_INTRA16x16(mb_type) ||
 816         (h->pict_type != AV_PICTURE_TYPE_I && s->adaptive_quant && cbp)) {
 817         sl->qscale += svq3_get_se_golomb(&h->gb);
 818
 819         if (sl->qscale > 31u) {
 820             av_log(h->avctx, AV_LOG_ERROR, "qscale:%d\n", sl->qscale);
 821             return -1;
 822         }
 823     }
 824     if (IS_INTRA16x16(mb_type)) {
 825         AV_ZERO128(sl->mb_luma_dc[0] + 0);
 826         AV_ZERO128(sl->mb_luma_dc[0] + 8);
 827         if (svq3_decode_block(&h->gb, sl->mb_luma_dc[0], 0, 1)) {
 828             av_log(h->avctx, AV_LOG_ERROR,
 829                    "error while decoding intra luma dc\n");
 830             return -1;
 831         }
 832     }
 833
 834     if (cbp) {
 835         const int index = IS_INTRA16x16(mb_type) ? 1 : 0;
 836         const int type  = ((sl->qscale < 24 && IS_INTRA4x4(mb_type)) ? 2 : 1);
 837
 838         for (i = 0; i < 4; i++)
 839             if ((cbp & (1 << i))) {
 840                 for (j = 0; j < 4; j++) {
 841                     k = index ? (1 * (j & 1) + 2 * (i & 1) +
 842                                  2 * (j & 2) + 4 * (i & 2))
 843                               : (4 * i + j);
 844                     sl->non_zero_count_cache[scan8[k]] = 1;
 845
 846                     if (svq3_decode_block(&h->gb, &sl->mb[16 * k], index, type)) {
 847                         av_log(h->avctx, AV_LOG_ERROR,
 848                                "error while decoding block\n");
 849                         return -1;
 850                     }
 851                 }
 852             }
 853
 854         if ((cbp & 0x30)) {
 855             for (i = 1; i < 3; ++i)
 856                 if (svq3_decode_block(&h->gb, &sl->mb[16 * 16 * i], 0, 3)) {
 857                     av_log(h->avctx, AV_LOG_ERROR,
 858                            "error while decoding chroma dc block\n");
 859                     return -1;
 860                 }
 861
 862             if ((cbp & 0x20)) {
 863                 for (i = 1; i < 3; i++) {
 864                     for (j = 0; j < 4; j++) {
 865                         k                                 = 16 * i + j;
 866                         sl->non_zero_count_cache[scan8[k]] = 1;
 867
 868                         if (svq3_decode_block(&h->gb, &sl->mb[16 * k], 1, 1)) {
 869                             av_log(h->avctx, AV_LOG_ERROR,
 870                                    "error while decoding chroma ac block\n");
 871                             return -1;
 872                         }
 873                     }
 874                 }
 875             }
 876         }
 877     }
 878
 879     sl->cbp                   = cbp;
 880     h->cur_pic.mb_type[mb_xy] = mb_type;
 881
 882     if (IS_INTRA(mb_type))
 883         sl->chroma_pred_mode = ff_h264_check_intra_pred_mode(h, sl, DC_PRED8x8, 1);
 884
 885     return 0;
 886 }
 887
 888 static int svq3_decode_slice_header(AVCodecContext *avctx)
 889 {
 890     SVQ3Context *s = avctx->priv_data;
 891     H264Context *h    = &s->h;
 892     H264SliceContext *sl = &h->slice_ctx[0];
 893     const int mb_xy   = sl->mb_xy;
 894     int i, header;
 895     unsigned slice_id;
 896
 897     header = get_bits(&s->gb, 8);
 898
 899     if (((header & 0x9F) != 1 && (header & 0x9F) != 2) || (header & 0x60) == 0) {
 900         /* TODO: what? */
 901         av_log(avctx, AV_LOG_ERROR, "unsupported slice header (%02X)\n", header);
 902         return -1;
 903     } else {
 904         int slice_bits, slice_bytes, slice_length;
 905         int length = header >> 5 & 3;
 906
 907         slice_length = show_bits(&s->gb, 8 * length);
 908         slice_bits   = slice_length * 8;
 909         slice_bytes  = slice_length + length - 1;
 910
 911         if (slice_bytes > get_bits_left(&s->gb)) {
 912             av_log(avctx, AV_LOG_ERROR, "slice after bitstream end\n");
 913             return -1;
 914         }
 915
 916         skip_bits(&s->gb, 8);
 917
 918         av_fast_malloc(&s->slice_buf, &s->slice_size, slice_bytes + AV_INPUT_BUFFER_PADDING_SIZE);
 919         if (!s->slice_buf)
 920             return AVERROR(ENOMEM);
 921
 922         memcpy(s->slice_buf, s->gb.buffer + s->gb.index / 8, slice_bytes);
 923
 924         init_get_bits(&h->gb, s->slice_buf, slice_bits);
 925
 926         if (s->watermark_key) {
 927             uint32_t header = AV_RL32(&h->gb.buffer[1]);
 928             AV_WL32(&h->gb.buffer[1], header ^ s->watermark_key);
 929         }
 930         if (length > 0) {
 931             memcpy(s->slice_buf, &s->slice_buf[slice_length], length - 1);
 932         }
 933         skip_bits_long(&s->gb, slice_bytes * 8);
 934     }
 935
 936     if ((slice_id = svq3_get_ue_golomb(&h->gb)) >= 3) {
 937         av_log(h->avctx, AV_LOG_ERROR, "illegal slice type %u \n", slice_id);
 938         return -1;
 939     }
 940
 941     sl->slice_type = ff_h264_golomb_to_pict_type[slice_id];
 942
 943     if ((header & 0x9F) == 2) {
 944         i              = (h->mb_num < 64) ? 6 : (1 + av_log2(h->mb_num - 1));
 945         sl->mb_skip_run = get_bits(&h->gb, i) -
 946                          (sl->mb_y * h->mb_width + sl->mb_x);
 947     } else {
 948         skip_bits1(&h->gb);
 949         sl->mb_skip_run = 0;
 950     }
 951
 952     sl->slice_num     = get_bits(&h->gb, 8);
 953     sl->qscale        = get_bits(&h->gb, 5);
 954     s->adaptive_quant = get_bits1(&h->gb);
 955
 956     /* unknown fields */
 957     skip_bits1(&h->gb);
 958
 959     if (s->unknown_flag)
 960         skip_bits1(&h->gb);
 961
 962     skip_bits1(&h->gb);
 963     skip_bits(&h->gb, 2);
 964
 965     while (get_bits1(&h->gb))
 966         skip_bits(&h->gb, 8);
 967
 968     /* reset intra predictors and invalidate motion vector references */
 969     if (sl->mb_x > 0) {
 970         memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - 1] + 3,
 971                -1, 4 * sizeof(int8_t));
 972         memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - sl->mb_x],
 973                -1, 8 * sizeof(int8_t) * sl->mb_x);
 974     }
 975     if (sl->mb_y > 0) {
 976         memset(sl->intra4x4_pred_mode + h->mb2br_xy[mb_xy - h->mb_stride],
 977                -1, 8 * sizeof(int8_t) * (h->mb_width - sl->mb_x));
 978
 979         if (sl->mb_x > 0)
 980             sl->intra4x4_pred_mode[h->mb2br_xy[mb_xy - h->mb_stride - 1] + 3] = -1;
 981     }
 982
 983     return 0;
 984 }
 985
 986 static av_cold int svq3_decode_init(AVCodecContext *avctx)
 987 {
 988     SVQ3Context *s = avctx->priv_data;
 989     H264Context *h = &s->h;
 990     H264SliceContext *sl;
 991     int m;
 992     unsigned char *extradata;
 993     unsigned char *extradata_end;
 994     unsigned int size;
 995     int marker_found = 0;
 996
 997     s->cur_pic  = av_mallocz(sizeof(*s->cur_pic));
 998     s->last_pic = av_mallocz(sizeof(*s->last_pic));
 999     s->next_pic = av_mallocz(sizeof(*s->next_pic));
1000     if (!s->next_pic || !s->last_pic || !s->cur_pic) {
1001         av_freep(&s->cur_pic);
1002         av_freep(&s->last_pic);
1003         av_freep(&s->next_pic);
1004         return AVERROR(ENOMEM);
1005     }
1006
1007     s->cur_pic->f  = av_frame_alloc();
1008     s->last_pic->f = av_frame_alloc();
1009     s->next_pic->f = av_frame_alloc();
1010     if (!s->cur_pic->f || !s->last_pic->f || !s->next_pic->f)
1011         return AVERROR(ENOMEM);
1012
1013     if (ff_h264_decode_init(avctx) < 0)
1014         return -1;
1015
1016     // we will overwrite it later during decoding
1017     av_frame_free(&h->cur_pic.f);
1018
1019     ff_h264dsp_init(&h->h264dsp, 8, 1);
1020     ff_h264chroma_init(&h->h264chroma, 8);
1021     ff_h264qpel_init(&h->h264qpel, 8);
1022     ff_h264_pred_init(&h->hpc, AV_CODEC_ID_SVQ3, 8, 1);
1023     ff_videodsp_init(&h->vdsp, 8);
1024
1025     memset(h->pps.scaling_matrix4, 16, 6 * 16 * sizeof(uint8_t));
1026     memset(h->pps.scaling_matrix8, 16, 2 * 64 * sizeof(uint8_t));
1027
1028     h->sps.bit_depth_luma = 8;
1029     h->chroma_format_idc = 1;
1030
1031     ff_hpeldsp_init(&s->hdsp, avctx->flags);
1032     ff_tpeldsp_init(&s->tdsp);
1033
1034     sl = h->slice_ctx;
1035
1036     h->flags           = avctx->flags;
1037     sl->is_complex     = 1;
1038     h->picture_structure = PICT_FRAME;
1039     avctx->pix_fmt     = AV_PIX_FMT_YUVJ420P;
1040     avctx->color_range = AVCOL_RANGE_JPEG;
1041
1042     h->slice_ctx[0].chroma_qp[0] = h->slice_ctx[0].chroma_qp[1] = 4;
1043     h->chroma_x_shift = h->chroma_y_shift = 1;
1044
1045     s->halfpel_flag  = 1;
1046     s->thirdpel_flag = 1;
1047     s->unknown_flag  = 0;
1048
1049     /* prowl for the "SEQH" marker in the extradata */
1050     extradata     = (unsigned char *)avctx->extradata;
1051     extradata_end = avctx->extradata + avctx->extradata_size;
1052     if (extradata) {
1053         for (m = 0; m + 8 < avctx->extradata_size; m++) {
1054             if (!memcmp(extradata, "SEQH", 4)) {
1055                 marker_found = 1;
1056                 break;
1057             }
1058             extradata++;
1059         }
1060     }
1061
1062     /* if a match was found, parse the extra data */
1063     if (marker_found) {
1064         GetBitContext gb;
1065         int frame_size_code;
1066
1067         size = AV_RB32(&extradata[4]);
1068         if (size > extradata_end - extradata - 8)
1069             return AVERROR_INVALIDDATA;
1070         init_get_bits(&gb, extradata + 8, size * 8);
1071
1072         /* 'frame size code' and optional 'width, height' */
1073         frame_size_code = get_bits(&gb, 3);
1074         switch (frame_size_code) {
1075         case 0:
1076             avctx->width  = 160;
1077             avctx->height = 120;
1078             break;
1079         case 1:
1080             avctx->width  = 128;
1081             avctx->height =  96;
1082             break;
1083         case 2:
1084             avctx->width  = 176;
1085             avctx->height = 144;
1086             break;
1087         case 3:
1088             avctx->width  = 352;
1089             avctx->height = 288;
1090             break;
1091         case 4:
1092             avctx->width  = 704;
1093             avctx->height = 576;
1094             break;
1095         case 5:
1096             avctx->width  = 240;
1097             avctx->height = 180;
1098             break;
1099         case 6:
1100             avctx->width  = 320;
1101             avctx->height = 240;
1102             break;
1103         case 7:
1104             avctx->width  = get_bits(&gb, 12);
1105             avctx->height = get_bits(&gb, 12);
1106             break;
1107         }
1108
1109         s->halfpel_flag  = get_bits1(&gb);
1110         s->thirdpel_flag = get_bits1(&gb);
1111
1112         /* unknown fields */
1113         skip_bits1(&gb);
1114         skip_bits1(&gb);
1115         skip_bits1(&gb);
1116         skip_bits1(&gb);
1117
1118         h->low_delay = get_bits1(&gb);
1119
1120         /* unknown field */
1121         skip_bits1(&gb);
1122
1123         while (get_bits1(&gb))
1124             skip_bits(&gb, 8);
1125
1126         s->unknown_flag  = get_bits1(&gb);
1127         avctx->has_b_frames = !h->low_delay;
1128         if (s->unknown_flag) {
1129 #if CONFIG_ZLIB
1130             unsigned watermark_width  = svq3_get_ue_golomb(&gb);
1131             unsigned watermark_height = svq3_get_ue_golomb(&gb);
1132             int u1                    = svq3_get_ue_golomb(&gb);
1133             int u2                    = get_bits(&gb, 8);
1134             int u3                    = get_bits(&gb, 2);
1135             int u4                    = svq3_get_ue_golomb(&gb);
1136             unsigned long buf_len     = watermark_width *
1137                                         watermark_height * 4;
1138             int offset                = get_bits_count(&gb) + 7 >> 3;
1139             uint8_t *buf;
1140
1141             if (watermark_height > 0 &&
1142                 (uint64_t)watermark_width * 4 > UINT_MAX / watermark_height)
1143                 return -1;
1144
1145             buf = av_malloc(buf_len);
1146             av_log(avctx, AV_LOG_DEBUG, "watermark size: %ux%u\n",
1147                    watermark_width, watermark_height);
1148             av_log(avctx, AV_LOG_DEBUG,
1149                    "u1: %x u2: %x u3: %x compressed data size: %d offset: %d\n",
1150                    u1, u2, u3, u4, offset);
1151             if (uncompress(buf, &buf_len, extradata + 8 + offset,
1152                            size - offset) != Z_OK) {
1153                 av_log(avctx, AV_LOG_ERROR,
1154                        "could not uncompress watermark logo\n");
1155                 av_free(buf);
1156                 return -1;
1157             }
1158             s->watermark_key = ff_svq1_packet_checksum(buf, buf_len, 0);
1159             s->watermark_key = s->watermark_key << 16 | s->watermark_key;
1160             av_log(avctx, AV_LOG_DEBUG,
1161                    "watermark key %#"PRIx32"\n", s->watermark_key);
1162             av_free(buf);
1163 #else
1164             av_log(avctx, AV_LOG_ERROR,
1165                    "this svq3 file contains watermark which need zlib support compiled in\n");
1166             return -1;
1167 #endif
1168         }
1169     }
1170
1171     h->width  = avctx->width;
1172     h->height = avctx->height;
1173     h->mb_width  = (h->width + 15) / 16;
1174     h->mb_height = (h->height + 15) / 16;
1175     h->mb_stride = h->mb_width + 1;
1176     h->mb_num    = h->mb_width * h->mb_height;
1177     h->b_stride = 4 * h->mb_width;
1178     s->h_edge_pos = h->mb_width * 16;
1179     s->v_edge_pos = h->mb_height * 16;
1180
1181     if (ff_h264_alloc_tables(h) < 0) {
1182         av_log(avctx, AV_LOG_ERROR, "svq3 memory allocation failed\n");
1183         return AVERROR(ENOMEM);
1184     }
1185
1186     return 0;
1187 }
1188
1189 static void free_picture(AVCodecContext *avctx, H264Picture *pic)
1190 {
1191     int i;
1192     for (i = 0; i < 2; i++) {
1193         av_buffer_unref(&pic->motion_val_buf[i]);
1194         av_buffer_unref(&pic->ref_index_buf[i]);
1195     }
1196     av_buffer_unref(&pic->mb_type_buf);
1197
1198     av_frame_unref(pic->f);
1199 }
1200
1201 static int get_buffer(AVCodecContext *avctx, H264Picture *pic)
1202 {
1203     SVQ3Context *s = avctx->priv_data;
1204     H264Context *h = &s->h;
1205     H264SliceContext *sl = &h->slice_ctx[0];
1206     const int big_mb_num    = h->mb_stride * (h->mb_height + 1) + 1;
1207     const int mb_array_size = h->mb_stride * h->mb_height;
1208     const int b4_stride     = h->mb_width * 4 + 1;
1209     const int b4_array_size = b4_stride * h->mb_height * 4;
1210     int ret;
1211
1212     if (!pic->motion_val_buf[0]) {
1213         int i;
1214
1215         pic->mb_type_buf = av_buffer_allocz((big_mb_num + h->mb_stride) * sizeof(uint32_t));
1216         if (!pic->mb_type_buf)
1217             return AVERROR(ENOMEM);
1218         pic->mb_type = (uint32_t*)pic->mb_type_buf->data + 2 * h->mb_stride + 1;
1219
1220         for (i = 0; i < 2; i++) {
1221             pic->motion_val_buf[i] = av_buffer_allocz(2 * (b4_array_size + 4) * sizeof(int16_t));
1222             pic->ref_index_buf[i]  = av_buffer_allocz(4 * mb_array_size);
1223             if (!pic->motion_val_buf[i] || !pic->ref_index_buf[i]) {
1224                 ret = AVERROR(ENOMEM);
1225                 goto fail;
1226             }
1227
1228             pic->motion_val[i] = (int16_t (*)[2])pic->motion_val_buf[i]->data + 4;
1229             pic->ref_index[i]  = pic->ref_index_buf[i]->data;
1230         }
1231     }
1232     pic->reference = !(h->pict_type == AV_PICTURE_TYPE_B);
1233
1234     ret = ff_get_buffer(avctx, pic->f,
1235                         pic->reference ? AV_GET_BUFFER_FLAG_REF : 0);
1236     if (ret < 0)
1237         goto fail;
1238
1239     if (!sl->edge_emu_buffer) {
1240         sl->edge_emu_buffer = av_mallocz(pic->f->linesize[0] * 17);
1241         if (!sl->edge_emu_buffer)
1242             return AVERROR(ENOMEM);
1243     }
1244
1245     sl->linesize   = pic->f->linesize[0];
1246     sl->uvlinesize = pic->f->linesize[1];
1247
1248     return 0;
1249 fail:
1250     free_picture(avctx, pic);
1251     return ret;
1252 }
1253
1254 static int svq3_decode_frame(AVCodecContext *avctx, void *data,
1255                              int *got_frame, AVPacket *avpkt)
1256 {
1257     const uint8_t *buf = avpkt->data;
1258     SVQ3Context *s     = avctx->priv_data;
1259     H264Context *h     = &s->h;
1260     H264SliceContext *sl = &h->slice_ctx[0];
1261     int buf_size       = avpkt->size;
1262     int ret, m, i;
1263
1264     /* special case for last picture */
1265     if (buf_size == 0) {
1266         if (s->next_pic->f->data[0] && !h->low_delay && !s->last_frame_output) {
1267             ret = av_frame_ref(data, s->next_pic->f);
1268             if (ret < 0)
1269                 return ret;
1270             s->last_frame_output = 1;
1271             *got_frame          = 1;
1272         }
1273         return 0;
1274     }
1275
1276     ret = init_get_bits(&s->gb, buf, 8 * buf_size);
1277     if (ret < 0)
1278         return ret;
1279
1280     sl->mb_x = sl->mb_y = sl->mb_xy = 0;
1281
1282     if (svq3_decode_slice_header(avctx))
1283         return -1;
1284
1285     h->pict_type = sl->slice_type;
1286
1287     if (h->pict_type != AV_PICTURE_TYPE_B)
1288         FFSWAP(H264Picture*, s->next_pic, s->last_pic);
1289
1290     av_frame_unref(s->cur_pic->f);
1291
1292     /* for skipping the frame */
1293     s->cur_pic->f->pict_type = h->pict_type;
1294     s->cur_pic->f->key_frame = (h->pict_type == AV_PICTURE_TYPE_I);
1295
1296     ret = get_buffer(avctx, s->cur_pic);
1297     if (ret < 0)
1298         return ret;
1299
1300     h->cur_pic_ptr = s->cur_pic;
1301     h->cur_pic     = *s->cur_pic;
1302
1303     for (i = 0; i < 16; i++) {
1304         h->block_offset[i]           = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1305         h->block_offset[48 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->linesize * ((scan8[i] - scan8[0]) >> 3);
1306     }
1307     for (i = 0; i < 16; i++) {
1308         h->block_offset[16 + i]      =
1309         h->block_offset[32 + i]      = (4 * ((scan8[i] - scan8[0]) & 7)) + 4 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1310         h->block_offset[48 + 16 + i] =
1311         h->block_offset[48 + 32 + i] = (4 * ((scan8[i] - scan8[0]) & 7)) + 8 * sl->uvlinesize * ((scan8[i] - scan8[0]) >> 3);
1312     }
1313
1314     if (h->pict_type != AV_PICTURE_TYPE_I) {
1315         if (!s->last_pic->f->data[0]) {
1316             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1317             ret = get_buffer(avctx, s->last_pic);
1318             if (ret < 0)
1319                 return ret;
1320             memset(s->last_pic->f->data[0], 0, avctx->height * s->last_pic->f->linesize[0]);
1321             memset(s->last_pic->f->data[1], 0x80, (avctx->height / 2) *
1322                    s->last_pic->f->linesize[1]);
1323             memset(s->last_pic->f->data[2], 0x80, (avctx->height / 2) *
1324                    s->last_pic->f->linesize[2]);
1325         }
1326
1327         if (h->pict_type == AV_PICTURE_TYPE_B && !s->next_pic->f->data[0]) {
1328             av_log(avctx, AV_LOG_ERROR, "Missing reference frame.\n");
1329             ret = get_buffer(avctx, s->next_pic);
1330             if (ret < 0)
1331                 return ret;
1332             memset(s->next_pic->f->data[0], 0, avctx->height * s->next_pic->f->linesize[0]);
1333             memset(s->next_pic->f->data[1], 0x80, (avctx->height / 2) *
1334                    s->next_pic->f->linesize[1]);
1335             memset(s->next_pic->f->data[2], 0x80, (avctx->height / 2) *
1336                    s->next_pic->f->linesize[2]);
1337         }
1338     }
1339
1340     if (avctx->debug & FF_DEBUG_PICT_INFO)
1341         av_log(h->avctx, AV_LOG_DEBUG,
1342                "%c hpel:%d, tpel:%d aqp:%d qp:%d, slice_num:%02X\n",
1343                av_get_picture_type_char(h->pict_type),
1344                s->halfpel_flag, s->thirdpel_flag,
1345                s->adaptive_quant, h->slice_ctx[0].qscale, sl->slice_num);
1346
1347     if (avctx->skip_frame >= AVDISCARD_NONREF && h->pict_type == AV_PICTURE_TYPE_B ||
1348         avctx->skip_frame >= AVDISCARD_NONKEY && h->pict_type != AV_PICTURE_TYPE_I ||
1349         avctx->skip_frame >= AVDISCARD_ALL)
1350         return 0;
1351
1352     if (s->next_p_frame_damaged) {
1353         if (h->pict_type == AV_PICTURE_TYPE_B)
1354             return 0;
1355         else
1356             s->next_p_frame_damaged = 0;
1357     }
1358
1359     if (h->pict_type == AV_PICTURE_TYPE_B) {
1360         h->frame_num_offset = sl->slice_num - h->prev_frame_num;
1361
1362         if (h->frame_num_offset < 0)
1363             h->frame_num_offset += 256;
1364         if (h->frame_num_offset == 0 ||
1365             h->frame_num_offset >= h->prev_frame_num_offset) {
1366             av_log(h->avctx, AV_LOG_ERROR, "error in B-frame picture id\n");
1367             return -1;
1368         }
1369     } else {
1370         h->prev_frame_num        = h->frame_num;
1371         h->frame_num             = sl->slice_num;
1372         h->prev_frame_num_offset = h->frame_num - h->prev_frame_num;
1373
1374         if (h->prev_frame_num_offset < 0)
1375             h->prev_frame_num_offset += 256;
1376     }
1377
1378     for (m = 0; m < 2; m++) {
1379         int i;
1380         for (i = 0; i < 4; i++) {
1381             int j;
1382             for (j = -1; j < 4; j++)
1383                 sl->ref_cache[m][scan8[0] + 8 * i + j] = 1;
1384             if (i < 3)
1385                 sl->ref_cache[m][scan8[0] + 8 * i + j] = PART_NOT_AVAILABLE;
1386         }
1387     }
1388
1389     for (sl->mb_y = 0; sl->mb_y < h->mb_height; sl->mb_y++) {
1390         for (sl->mb_x = 0; sl->mb_x < h->mb_width; sl->mb_x++) {
1391             unsigned mb_type;
1392             sl->mb_xy = sl->mb_x + sl->mb_y * h->mb_stride;
1393
1394             if ((get_bits_left(&h->gb)) <= 7) {
1395                 if (((get_bits_count(&h->gb) & 7) == 0 ||
1396                     show_bits(&h->gb, get_bits_left(&h->gb) & 7) == 0)) {
1397
1398                     if (svq3_decode_slice_header(avctx))
1399                         return -1;
1400                 }
1401                 /* TODO: support s->mb_skip_run */
1402             }
1403
1404             mb_type = svq3_get_ue_golomb(&h->gb);
1405
1406             if (h->pict_type == AV_PICTURE_TYPE_I)
1407                 mb_type += 8;
1408             else if (h->pict_type == AV_PICTURE_TYPE_B && mb_type >= 4)
1409                 mb_type += 4;
1410             if (mb_type > 33 || svq3_decode_mb(s, mb_type)) {
1411                 av_log(h->avctx, AV_LOG_ERROR,
1412                        "error while decoding MB %d %d\n", sl->mb_x, sl->mb_y);
1413                 return -1;
1414             }
1415
1416             if (mb_type != 0)
1417                 hl_decode_mb(h, &h->slice_ctx[0]);
1418
1419             if (h->pict_type != AV_PICTURE_TYPE_B && !h->low_delay)
1420                 h->cur_pic.mb_type[sl->mb_x + sl->mb_y * h->mb_stride] =
1421                     (h->pict_type == AV_PICTURE_TYPE_P && mb_type < 8) ? (mb_type - 1) : -1;
1422         }
1423
1424         ff_draw_horiz_band(avctx, s->cur_pic->f,
1425                            s->last_pic->f->data[0] ? s->last_pic->f : NULL,
1426                            16 * sl->mb_y, 16, h->picture_structure, 0,
1427                            h->low_delay);
1428     }
1429
1430     if (h->pict_type == AV_PICTURE_TYPE_B || h->low_delay)
1431         ret = av_frame_ref(data, s->cur_pic->f);
1432     else if (s->last_pic->f->data[0])
1433         ret = av_frame_ref(data, s->last_pic->f);
1434     if (ret < 0)
1435         return ret;
1436
1437     /* Do not output the last pic after seeking. */
1438     if (s->last_pic->f->data[0] || h->low_delay)
1439         *got_frame = 1;
1440
1441     if (h->pict_type != AV_PICTURE_TYPE_B) {
1442         FFSWAP(H264Picture*, s->cur_pic, s->next_pic);
1443     } else {
1444         av_frame_unref(s->cur_pic->f);
1445     }
1446
1447     return buf_size;
1448 }
1449
1450 static av_cold int svq3_decode_end(AVCodecContext *avctx)
1451 {
1452     SVQ3Context *s = avctx->priv_data;
1453     H264Context *h = &s->h;
1454
1455     free_picture(avctx, s->cur_pic);
1456     free_picture(avctx, s->next_pic);
1457     free_picture(avctx, s->last_pic);
1458     av_frame_free(&s->cur_pic->f);
1459     av_frame_free(&s->next_pic->f);
1460     av_frame_free(&s->last_pic->f);
1461     av_freep(&s->cur_pic);
1462     av_freep(&s->next_pic);
1463     av_freep(&s->last_pic);
1464     av_freep(&s->slice_buf);
1465
1466     memset(&h->cur_pic, 0, sizeof(h->cur_pic));
1467
1468     ff_h264_free_context(h);
1469
1470     return 0;
1471 }
1472
1473 AVCodec ff_svq3_decoder = {
1474     .name           = "svq3",
1475     .long_name      = NULL_IF_CONFIG_SMALL("Sorenson Vector Quantizer 3 / Sorenson Video 3 / SVQ3"),
1476     .type           = AVMEDIA_TYPE_VIDEO,
1477     .id             = AV_CODEC_ID_SVQ3,
1478     .priv_data_size = sizeof(SVQ3Context),
1479     .init           = svq3_decode_init,
1480     .close          = svq3_decode_end,
1481     .decode         = svq3_decode_frame,
1482     .capabilities   = AV_CODEC_CAP_DRAW_HORIZ_BAND |
1483                       AV_CODEC_CAP_DR1             |
1484                       AV_CODEC_CAP_DELAY,
1485     .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_YUVJ420P,
1486                                                      AV_PIX_FMT_NONE},
1487 };