git.sesse.net Git - ffmpeg/blob - libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * (SVQ1 Decoder)
  23  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  24  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  25  *
  26  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  27  */
  28
  29 /**
  30  * @file svq1.c
  31  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  32  * For more information of the SVQ1 algorithm, visit:
  33  *   http://www.pcisys.net/~melanson/codecs/
  34  */
  35
  36
  37 //#define DEBUG_SVQ1
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 #include <limits.h>
  43
  44 #include "common.h"
  45 #include "avcodec.h"
  46 #include "dsputil.h"
  47 #include "mpegvideo.h"
  48 #include "bswap.h"
  49
  50 #undef NDEBUG
  51 #include <assert.h>
  52
  53 extern const uint8_t mvtab[33][2];
  54
  55 static VLC svq1_block_type;
  56 static VLC svq1_motion_component;
  57 static VLC svq1_intra_multistage[6];
  58 static VLC svq1_inter_multistage[6];
  59 static VLC svq1_intra_mean;
  60 static VLC svq1_inter_mean;
  61
  62 #define SVQ1_BLOCK_SKIP         0
  63 #define SVQ1_BLOCK_INTER        1
  64 #define SVQ1_BLOCK_INTER_4V     2
  65 #define SVQ1_BLOCK_INTRA        3
  66
  67 typedef struct SVQ1Context {
  68     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
  69     AVCodecContext *avctx;
  70     DSPContext dsp;
  71     AVFrame picture;
  72     AVFrame current_picture;
  73     AVFrame last_picture;
  74     PutBitContext pb;
  75     GetBitContext gb;
  76
  77     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  78
  79     int frame_width;
  80     int frame_height;
  81
  82     /* Y plane block dimensions */
  83     int y_block_width;
  84     int y_block_height;
  85
  86     /* U & V plane (C planes) block dimensions */
  87     int c_block_width;
  88     int c_block_height;
  89
  90     uint16_t *mb_type;
  91     uint32_t *dummy;
  92     int16_t (*motion_val8[3])[2];
  93     int16_t (*motion_val16[3])[2];
  94
  95     int64_t rd_total;
  96 } SVQ1Context;
  97
  98 /* motion vector (prediction) */
  99 typedef struct svq1_pmv_s {
 100   int           x;
 101   int           y;
 102 } svq1_pmv_t;
 103
 104 #include "svq1_cb.h"
 105 #include "svq1_vlc.h"
 106
 107 static const uint16_t checksum_table[256] = {
 108   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 109   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 110   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 111   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 112   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 113   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 114   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 115   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 116   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 117   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 118   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 119   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 120   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 121   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 122   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 123   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 124   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 125   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 126   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 127   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 128   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 129   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 130   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 131   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 132   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 133   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 134   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 135   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 136   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 137   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 138   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 139   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 140 };
 141
 142 static const uint8_t string_table[256] = {
 143   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 144   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 145   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 146   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 147   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 148   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 149   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 150   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 151   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 152   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 153   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 154   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 155   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 156   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 157   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 158   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 159   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 160   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 161   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 162   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 163   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 164   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 165   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 166   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 167   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 168   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 169   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 170   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 171   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 172   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 173   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 174   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 175 };
 176
 177 #define SVQ1_PROCESS_VECTOR()\
 178     for (; level > 0; i++) {\
 179       /* process next depth */\
 180       if (i == m) {\
 181         m = n;\
 182         if (--level == 0)\
 183           break;\
 184       }\
 185       /* divide block if next bit set */\
 186       if (get_bits (bitbuf, 1) == 0)\
 187         break;\
 188       /* add child nodes */\
 189       list[n++] = list[i];\
 190       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 191     }
 192
 193 #define SVQ1_ADD_CODEBOOK()\
 194           /* add codebook entries to vector */\
 195           for (j=0; j < stages; j++) {\
 196             n3  = codebook[entries[j]] ^ 0x80808080;\
 197             n1 += ((n3 & 0xFF00FF00) >> 8);\
 198             n2 +=  (n3 & 0x00FF00FF);\
 199           }\
 200 \
 201           /* clip to [0..255] */\
 202           if (n1 & 0xFF00FF00) {\
 203             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 204             n1 += 0x7F007F00;\
 205             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 206             n1 &= (n3 & 0x00FF00FF);\
 207           }\
 208 \
 209           if (n2 & 0xFF00FF00) {\
 210             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 211             n2 += 0x7F007F00;\
 212             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 213             n2 &= (n3 & 0x00FF00FF);\
 214           }
 215
 216 #define SVQ1_DO_CODEBOOK_INTRA()\
 217       for (y=0; y < height; y++) {\
 218         for (x=0; x < (width / 4); x++, codebook++) {\
 219         n1 = n4;\
 220         n2 = n4;\
 221         SVQ1_ADD_CODEBOOK()\
 222         /* store result */\
 223         dst[x] = (n1 << 8) | n2;\
 224         }\
 225         dst += (pitch / 4);\
 226       }
 227
 228 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 229       for (y=0; y < height; y++) {\
 230         for (x=0; x < (width / 4); x++, codebook++) {\
 231         n3 = dst[x];\
 232         /* add mean value to vector */\
 233         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 234         n2 =  (n3 & 0x00FF00FF)          + n4;\
 235         SVQ1_ADD_CODEBOOK()\
 236         /* store result */\
 237         dst[x] = (n1 << 8) | n2;\
 238         }\
 239         dst += (pitch / 4);\
 240       }
 241
 242 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 243       codebook = (const uint32_t *) cbook[level];\
 244       bit_cache = get_bits (bitbuf, 4*stages);\
 245       /* calculate codebook entries for this vector */\
 246       for (j=0; j < stages; j++) {\
 247         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 248       }\
 249       mean -= (stages * 128);\
 250       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 251
 252 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 253   uint32_t    bit_cache;
 254   uint8_t    *list[63];
 255   uint32_t   *dst;
 256   const uint32_t *codebook;
 257   int         entries[6];
 258   int         i, j, m, n;
 259   int         mean, stages;
 260   unsigned    x, y, width, height, level;
 261   uint32_t    n1, n2, n3, n4;
 262
 263   /* initialize list for breadth first processing of vectors */
 264   list[0] = pixels;
 265
 266   /* recursively process vector */
 267   for (i=0, m=1, n=1, level=5; i < n; i++) {
 268     SVQ1_PROCESS_VECTOR();
 269
 270     /* destination address and vector size */
 271     dst = (uint32_t *) list[i];
 272     width = 1 << ((4 + level) /2);
 273     height = 1 << ((3 + level) /2);
 274
 275     /* get number of stages (-1 skips vector, 0 for mean only) */
 276     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 277
 278     if (stages == -1) {
 279         for (y=0; y < height; y++) {
 280           memset (&dst[y*(pitch / 4)], 0, width);
 281         }
 282       continue;                 /* skip vector */
 283     }
 284
 285     if ((stages > 0) && (level >= 4)) {
 286 #ifdef DEBUG_SVQ1
 287     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 288 #endif
 289       return -1;        /* invalid vector */
 290     }
 291
 292     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 293
 294     if (stages == 0) {
 295       for (y=0; y < height; y++) {
 296         memset (&dst[y*(pitch / 4)], mean, width);
 297       }
 298     } else {
 299       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 300       SVQ1_DO_CODEBOOK_INTRA()
 301     }
 302   }
 303
 304   return 0;
 305 }
 306
 307 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 308   uint32_t    bit_cache;
 309   uint8_t    *list[63];
 310   uint32_t   *dst;
 311   const uint32_t *codebook;
 312   int         entries[6];
 313   int         i, j, m, n;
 314   int         mean, stages;
 315   int         x, y, width, height, level;
 316   uint32_t    n1, n2, n3, n4;
 317
 318   /* initialize list for breadth first processing of vectors */
 319   list[0] = pixels;
 320
 321   /* recursively process vector */
 322   for (i=0, m=1, n=1, level=5; i < n; i++) {
 323     SVQ1_PROCESS_VECTOR();
 324
 325     /* destination address and vector size */
 326     dst = (uint32_t *) list[i];
 327     width = 1 << ((4 + level) /2);
 328     height = 1 << ((3 + level) /2);
 329
 330     /* get number of stages (-1 skips vector, 0 for mean only) */
 331     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 332
 333     if (stages == -1) continue; /* skip vector */
 334
 335     if ((stages > 0) && (level >= 4)) {
 336 #ifdef DEBUG_SVQ1
 337     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 338 #endif
 339       return -1;        /* invalid vector */
 340     }
 341
 342     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 343
 344     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 345     SVQ1_DO_CODEBOOK_NONINTRA()
 346   }
 347   return 0;
 348 }
 349
 350 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 351   int        diff;
 352   int        i;
 353
 354   for (i=0; i < 2; i++) {
 355
 356     /* get motion code */
 357     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 358     if(diff<0)
 359         return -1;
 360     else if(diff){
 361         if(get_bits1(bitbuf)) diff= -diff;
 362     }
 363
 364     /* add median of motion vector predictors and clip result */
 365     if (i == 1)
 366       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 367     else
 368       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 369   }
 370
 371   return 0;
 372 }
 373
 374 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 375   uint8_t *src;
 376   uint8_t *dst;
 377   int      i;
 378
 379   src = &previous[x + y*pitch];
 380   dst = current;
 381
 382   for (i=0; i < 16; i++) {
 383     memcpy (dst, src, 16);
 384     src += pitch;
 385     dst += pitch;
 386   }
 387 }
 388
 389 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 390                                uint8_t *current, uint8_t *previous, int pitch,
 391                                svq1_pmv_t *motion, int x, int y) {
 392   uint8_t    *src;
 393   uint8_t    *dst;
 394   svq1_pmv_t  mv;
 395   svq1_pmv_t *pmv[3];
 396   int         result;
 397
 398   /* predict and decode motion vector */
 399   pmv[0] = &motion[0];
 400   if (y == 0) {
 401     pmv[1] =
 402     pmv[2] = pmv[0];
 403   }
 404   else {
 405     pmv[1] = &motion[(x / 8) + 2];
 406     pmv[2] = &motion[(x / 8) + 4];
 407   }
 408
 409   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 410
 411   if (result != 0)
 412     return result;
 413
 414   motion[0].x                =
 415   motion[(x / 8) + 2].x      =
 416   motion[(x / 8) + 3].x      = mv.x;
 417   motion[0].y                =
 418   motion[(x / 8) + 2].y      =
 419   motion[(x / 8) + 3].y      = mv.y;
 420
 421   if(y + (mv.y >> 1)<0)
 422      mv.y= 0;
 423   if(x + (mv.x >> 1)<0)
 424      mv.x= 0;
 425
 426 #if 0
 427   int w= (s->width+15)&~15;
 428   int h= (s->height+15)&~15;
 429   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 430       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 431 #endif
 432
 433   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 434   dst = current;
 435
 436   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 437
 438   return 0;
 439 }
 440
 441 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 442                                   uint8_t *current, uint8_t *previous, int pitch,
 443                                   svq1_pmv_t *motion,int x, int y) {
 444   uint8_t    *src;
 445   uint8_t    *dst;
 446   svq1_pmv_t  mv;
 447   svq1_pmv_t *pmv[4];
 448   int         i, result;
 449
 450   /* predict and decode motion vector (0) */
 451   pmv[0] = &motion[0];
 452   if (y == 0) {
 453     pmv[1] =
 454     pmv[2] = pmv[0];
 455   }
 456   else {
 457     pmv[1] = &motion[(x / 8) + 2];
 458     pmv[2] = &motion[(x / 8) + 4];
 459   }
 460
 461   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 462
 463   if (result != 0)
 464     return result;
 465
 466   /* predict and decode motion vector (1) */
 467   pmv[0] = &mv;
 468   if (y == 0) {
 469     pmv[1] =
 470     pmv[2] = pmv[0];
 471   }
 472   else {
 473     pmv[1] = &motion[(x / 8) + 3];
 474   }
 475   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 476
 477   if (result != 0)
 478     return result;
 479
 480   /* predict and decode motion vector (2) */
 481   pmv[1] = &motion[0];
 482   pmv[2] = &motion[(x / 8) + 1];
 483
 484   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 485
 486   if (result != 0)
 487     return result;
 488
 489   /* predict and decode motion vector (3) */
 490   pmv[2] = &motion[(x / 8) + 2];
 491   pmv[3] = &motion[(x / 8) + 3];
 492
 493   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 494
 495   if (result != 0)
 496     return result;
 497
 498   /* form predictions */
 499   for (i=0; i < 4; i++) {
 500     int mvx= pmv[i]->x + (i&1)*16;
 501     int mvy= pmv[i]->y + (i>>1)*16;
 502
 503     ///XXX /FIXME cliping or padding?
 504     if(y + (mvy >> 1)<0)
 505        mvy= 0;
 506     if(x + (mvx >> 1)<0)
 507        mvx= 0;
 508
 509 #if 0
 510   int w= (s->width+15)&~15;
 511   int h= (s->height+15)&~15;
 512   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 513       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 514 #endif
 515     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 516     dst = current;
 517
 518     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 519
 520     /* select next block */
 521     if (i & 1) {
 522       current  += 8*(pitch - 1);
 523     } else {
 524       current  += 8;
 525     }
 526   }
 527
 528   return 0;
 529 }
 530
 531 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 532                         uint8_t *current, uint8_t *previous, int pitch,
 533                         svq1_pmv_t *motion, int x, int y) {
 534   uint32_t block_type;
 535   int      result = 0;
 536
 537   /* get block type */
 538   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 539
 540   /* reset motion vectors */
 541   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 542     motion[0].x                 =
 543     motion[0].y                 =
 544     motion[(x / 8) + 2].x =
 545     motion[(x / 8) + 2].y =
 546     motion[(x / 8) + 3].x =
 547     motion[(x / 8) + 3].y = 0;
 548   }
 549
 550   switch (block_type) {
 551   case SVQ1_BLOCK_SKIP:
 552     svq1_skip_block (current, previous, pitch, x, y);
 553     break;
 554
 555   case SVQ1_BLOCK_INTER:
 556     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 557
 558     if (result != 0)
 559     {
 560 #ifdef DEBUG_SVQ1
 561     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 562 #endif
 563       break;
 564     }
 565     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 566     break;
 567
 568   case SVQ1_BLOCK_INTER_4V:
 569     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 570
 571     if (result != 0)
 572     {
 573 #ifdef DEBUG_SVQ1
 574     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 575 #endif
 576       break;
 577     }
 578     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 579     break;
 580
 581   case SVQ1_BLOCK_INTRA:
 582     result = svq1_decode_block_intra (bitbuf, current, pitch);
 583     break;
 584   }
 585
 586   return result;
 587 }
 588
 589 /* standard video sizes */
 590 static struct { int width; int height; } svq1_frame_size_table[8] = {
 591   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 592   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 593 };
 594
 595 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 596   int i;
 597
 598   for (i=0; i < length; i++) {
 599     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 600   }
 601
 602   return value;
 603 }
 604
 605 #if 0 /* unused, remove? */
 606 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 607                                          int width, int height, int value) {
 608   int x, y;
 609
 610   for (y=0; y < height; y++) {
 611     for (x=0; x < width; x++) {
 612       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 613     }
 614
 615     pixels += pitch;
 616   }
 617
 618   return value;
 619 }
 620 #endif
 621
 622 #ifdef CONFIG_DECODERS
 623 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 624   uint8_t seed;
 625   int     i;
 626
 627   out[0] = get_bits (bitbuf, 8);
 628
 629   seed = string_table[out[0]];
 630
 631   for (i=1; i <= out[0]; i++) {
 632     out[i] = get_bits (bitbuf, 8) ^ seed;
 633     seed   = string_table[out[i] ^ seed];
 634   }
 635 }
 636
 637 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 638   int frame_size_code;
 639   int temporal_reference;
 640
 641   temporal_reference = get_bits (bitbuf, 8);
 642
 643   /* frame type */
 644   s->pict_type= get_bits (bitbuf, 2)+1;
 645   if(s->pict_type==4)
 646       return -1;
 647
 648   if (s->pict_type == I_TYPE) {
 649
 650     /* unknown fields */
 651     if (s->f_code == 0x50 || s->f_code == 0x60) {
 652       int csum = get_bits (bitbuf, 16);
 653
 654       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 655
 656 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 657 //              (csum == 0) ? "correct" : "incorrect", csum);
 658     }
 659
 660     if ((s->f_code ^ 0x10) >= 0x50) {
 661       uint8_t msg[256];
 662
 663       svq1_parse_string (bitbuf, msg);
 664
 665       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 666     }
 667
 668     skip_bits (bitbuf, 2);
 669     skip_bits (bitbuf, 2);
 670     skip_bits1 (bitbuf);
 671
 672     /* load frame size */
 673     frame_size_code = get_bits (bitbuf, 3);
 674
 675     if (frame_size_code == 7) {
 676       /* load width, height (12 bits each) */
 677       s->width = get_bits (bitbuf, 12);
 678       s->height = get_bits (bitbuf, 12);
 679
 680       if (!s->width || !s->height)
 681         return -1;
 682     } else {
 683       /* get width, height from table */
 684       s->width = svq1_frame_size_table[frame_size_code].width;
 685       s->height = svq1_frame_size_table[frame_size_code].height;
 686     }
 687   }
 688
 689   /* unknown fields */
 690   if (get_bits (bitbuf, 1) == 1) {
 691     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 692     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 693
 694     if (get_bits (bitbuf, 2) != 0)
 695       return -1;
 696   }
 697
 698   if (get_bits (bitbuf, 1) == 1) {
 699     skip_bits1 (bitbuf);
 700     skip_bits (bitbuf, 4);
 701     skip_bits1 (bitbuf);
 702     skip_bits (bitbuf, 2);
 703
 704     while (get_bits (bitbuf, 1) == 1) {
 705       skip_bits (bitbuf, 8);
 706     }
 707   }
 708
 709   return 0;
 710 }
 711
 712 static int svq1_decode_frame(AVCodecContext *avctx,
 713                              void *data, int *data_size,
 714                              uint8_t *buf, int buf_size)
 715 {
 716   MpegEncContext *s=avctx->priv_data;
 717   uint8_t        *current, *previous;
 718   int             result, i, x, y, width, height;
 719   AVFrame *pict = data;
 720
 721   /* initialize bit buffer */
 722   init_get_bits(&s->gb,buf,buf_size*8);
 723
 724   /* decode frame header */
 725   s->f_code = get_bits (&s->gb, 22);
 726
 727   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 728     return -1;
 729
 730   /* swap some header bytes (why?) */
 731   if (s->f_code != 0x20) {
 732     uint32_t *src = (uint32_t *) (buf + 4);
 733
 734     for (i=0; i < 4; i++) {
 735       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 736     }
 737   }
 738
 739   result = svq1_decode_frame_header (&s->gb, s);
 740
 741   if (result != 0)
 742   {
 743 #ifdef DEBUG_SVQ1
 744     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 745 #endif
 746     return result;
 747   }
 748
 749   //FIXME this avoids some confusion for "B frames" without 2 references
 750   //this should be removed after libavcodec can handle more flexible picture types & ordering
 751   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 752
 753   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 754   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
 755      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
 756      || avctx->skip_frame >= AVDISCARD_ALL)
 757       return buf_size;
 758
 759   if(MPV_frame_start(s, avctx) < 0)
 760       return -1;
 761
 762   /* decode y, u and v components */
 763   for (i=0; i < 3; i++) {
 764     int linesize;
 765     if (i == 0) {
 766       width  = (s->width+15)&~15;
 767       height = (s->height+15)&~15;
 768       linesize= s->linesize;
 769     } else {
 770       if(s->flags&CODEC_FLAG_GRAY) break;
 771       width  = (s->width/4+15)&~15;
 772       height = (s->height/4+15)&~15;
 773       linesize= s->uvlinesize;
 774     }
 775
 776     current  = s->current_picture.data[i];
 777
 778     if(s->pict_type==B_TYPE){
 779         previous = s->next_picture.data[i];
 780     }else{
 781         previous = s->last_picture.data[i];
 782     }
 783
 784     if (s->pict_type == I_TYPE) {
 785       /* keyframe */
 786       for (y=0; y < height; y+=16) {
 787         for (x=0; x < width; x+=16) {
 788           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 789           if (result != 0)
 790           {
 791 //#ifdef DEBUG_SVQ1
 792             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 793 //#endif
 794             return result;
 795           }
 796         }
 797         current += 16*linesize;
 798       }
 799     } else {
 800       svq1_pmv_t pmv[width/8+3];
 801       /* delta frame */
 802       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 803
 804       for (y=0; y < height; y+=16) {
 805         for (x=0; x < width; x+=16) {
 806           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 807                                             linesize, pmv, x, y);
 808           if (result != 0)
 809           {
 810 #ifdef DEBUG_SVQ1
 811     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 812 #endif
 813             return result;
 814           }
 815         }
 816
 817         pmv[0].x =
 818         pmv[0].y = 0;
 819
 820         current += 16*linesize;
 821       }
 822     }
 823   }
 824
 825   *pict = *(AVFrame*)&s->current_picture;
 826
 827
 828   MPV_frame_end(s);
 829
 830   *data_size=sizeof(AVFrame);
 831   return buf_size;
 832 }
 833
 834 static int svq1_decode_init(AVCodecContext *avctx)
 835 {
 836     MpegEncContext *s = avctx->priv_data;
 837     int i;
 838
 839     MPV_decode_defaults(s);
 840
 841     s->avctx = avctx;
 842     s->width = (avctx->width+3)&~3;
 843     s->height = (avctx->height+3)&~3;
 844     s->codec_id= avctx->codec->id;
 845     avctx->pix_fmt = PIX_FMT_YUV410P;
 846     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 847     s->flags= avctx->flags;
 848     if (MPV_common_init(s) < 0) return -1;
 849
 850     init_vlc(&svq1_block_type, 2, 4,
 851         &svq1_block_type_vlc[0][1], 2, 1,
 852         &svq1_block_type_vlc[0][0], 2, 1, 1);
 853
 854     init_vlc(&svq1_motion_component, 7, 33,
 855         &mvtab[0][1], 2, 1,
 856         &mvtab[0][0], 2, 1, 1);
 857
 858     for (i = 0; i < 6; i++) {
 859         init_vlc(&svq1_intra_multistage[i], 3, 8,
 860             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 861             &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
 862         init_vlc(&svq1_inter_multistage[i], 3, 8,
 863             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 864             &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
 865     }
 866
 867     init_vlc(&svq1_intra_mean, 8, 256,
 868         &svq1_intra_mean_vlc[0][1], 4, 2,
 869         &svq1_intra_mean_vlc[0][0], 4, 2, 1);
 870
 871     init_vlc(&svq1_inter_mean, 9, 512,
 872         &svq1_inter_mean_vlc[0][1], 4, 2,
 873         &svq1_inter_mean_vlc[0][0], 4, 2, 1);
 874
 875     return 0;
 876 }
 877
 878 static int svq1_decode_end(AVCodecContext *avctx)
 879 {
 880     MpegEncContext *s = avctx->priv_data;
 881
 882     MPV_common_end(s);
 883     return 0;
 884 }
 885 #endif /* CONFIG_DECODERS */
 886
 887 #ifdef CONFIG_ENCODERS
 888 static void svq1_write_header(SVQ1Context *s, int frame_type)
 889 {
 890     int i;
 891
 892     /* frame code */
 893     put_bits(&s->pb, 22, 0x20);
 894
 895     /* temporal reference (sure hope this is a "don't care") */
 896     put_bits(&s->pb, 8, 0x00);
 897
 898     /* frame type */
 899     put_bits(&s->pb, 2, frame_type - 1);
 900
 901     if (frame_type == I_TYPE) {
 902
 903         /* no checksum since frame code is 0x20 */
 904
 905         /* no embedded string either */
 906
 907         /* output 5 unknown bits (2 + 2 + 1) */
 908         put_bits(&s->pb, 5, 0);
 909
 910         for (i = 0; i < 7; i++)
 911         {
 912             if ((svq1_frame_size_table[i].width == s->frame_width) &&
 913                 (svq1_frame_size_table[i].height == s->frame_height))
 914             {
 915                 put_bits(&s->pb, 3, i);
 916                 break;
 917             }
 918         }
 919
 920         if (i == 7)
 921         {
 922             put_bits(&s->pb, 3, 7);
 923                 put_bits(&s->pb, 12, s->frame_width);
 924                 put_bits(&s->pb, 12, s->frame_height);
 925         }
 926     }
 927
 928     /* no checksum or extra data (next 2 bits get 0) */
 929     put_bits(&s->pb, 2, 0);
 930 }
 931
 932
 933 #define QUALITY_THRESHOLD 100
 934 #define THRESHOLD_MULTIPLIER 0.6
 935
 936 #if defined(HAVE_ALTIVEC)
 937 #undef vector
 938 #endif
 939
 940 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 941     int count, y, x, i, j, split, best_mean, best_score, best_count;
 942     int best_vector[6];
 943     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 944     int w= 2<<((level+2)>>1);
 945     int h= 2<<((level+1)>>1);
 946     int size=w*h;
 947     int16_t block[7][256];
 948     const int8_t *codebook_sum, *codebook;
 949     const uint16_t (*mean_vlc)[2];
 950     const uint8_t (*multistage_vlc)[2];
 951
 952     best_score=0;
 953     //FIXME optimize, this doenst need to be done multiple times
 954     if(intra){
 955         codebook_sum= svq1_intra_codebook_sum[level];
 956         codebook= svq1_intra_codebooks[level];
 957         mean_vlc= svq1_intra_mean_vlc;
 958         multistage_vlc= svq1_intra_multistage_vlc[level];
 959         for(y=0; y<h; y++){
 960             for(x=0; x<w; x++){
 961                 int v= src[x + y*stride];
 962                 block[0][x + w*y]= v;
 963                 best_score += v*v;
 964                 block_sum[0] += v;
 965             }
 966         }
 967     }else{
 968         codebook_sum= svq1_inter_codebook_sum[level];
 969         codebook= svq1_inter_codebooks[level];
 970         mean_vlc= svq1_inter_mean_vlc + 256;
 971         multistage_vlc= svq1_inter_multistage_vlc[level];
 972         for(y=0; y<h; y++){
 973             for(x=0; x<w; x++){
 974                 int v= src[x + y*stride] - ref[x + y*stride];
 975                 block[0][x + w*y]= v;
 976                 best_score += v*v;
 977                 block_sum[0] += v;
 978             }
 979         }
 980     }
 981
 982     best_count=0;
 983     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 984     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 985
 986     if(level<4){
 987         for(count=1; count<7; count++){
 988             int best_vector_score= INT_MAX;
 989             int best_vector_sum=-999, best_vector_mean=-999;
 990             const int stage= count-1;
 991             const int8_t *vector;
 992
 993             for(i=0; i<16; i++){
 994                 int sum= codebook_sum[stage*16 + i];
 995                 int sqr=0;
 996                 int diff, mean, score;
 997
 998                 vector = codebook + stage*size*16 + i*size;
 999
1000                 for(j=0; j<size; j++){
1001                     int v= vector[j];
1002                     sqr += (v - block[stage][j])*(v - block[stage][j]);
1003                 }
1004                 diff= block_sum[stage] - sum;
1005                 mean= (diff + (size>>1)) >> (level+3);
1006                 assert(mean >-300 && mean<300);
1007                 if(intra) mean= clip(mean, 0, 255);
1008                 else      mean= clip(mean, -256, 255);
1009                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1010                 if(score < best_vector_score){
1011                     best_vector_score= score;
1012                     best_vector[stage]= i;
1013                     best_vector_sum= sum;
1014                     best_vector_mean= mean;
1015                 }
1016             }
1017             assert(best_vector_mean != -999);
1018             vector= codebook + stage*size*16 + best_vector[stage]*size;
1019             for(j=0; j<size; j++){
1020                 block[stage+1][j] = block[stage][j] - vector[j];
1021             }
1022             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1023             best_vector_score +=
1024                 lambda*(+ 1 + 4*count
1025                         + multistage_vlc[1+count][1]
1026                         + mean_vlc[best_vector_mean][1]);
1027
1028             if(best_vector_score < best_score){
1029                 best_score= best_vector_score;
1030                 best_count= count;
1031                 best_mean= best_vector_mean;
1032             }
1033         }
1034     }
1035
1036     split=0;
1037     if(best_score > threshold && level){
1038         int score=0;
1039         int offset= (level&1) ? stride*h/2 : w/2;
1040         PutBitContext backup[6];
1041
1042         for(i=level-1; i>=0; i--){
1043             backup[i]= s->reorder_pb[i];
1044         }
1045         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1046         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1047         score += lambda;
1048
1049         if(score < best_score){
1050             best_score= score;
1051             split=1;
1052         }else{
1053             for(i=level-1; i>=0; i--){
1054                 s->reorder_pb[i]= backup[i];
1055             }
1056         }
1057     }
1058     if (level > 0)
1059         put_bits(&s->reorder_pb[level], 1, split);
1060
1061     if(!split){
1062         assert((best_mean >= 0 && best_mean<256) || !intra);
1063         assert(best_mean >= -256 && best_mean<256);
1064         assert(best_count >=0 && best_count<7);
1065         assert(level<4 || best_count==0);
1066
1067         /* output the encoding */
1068         put_bits(&s->reorder_pb[level],
1069             multistage_vlc[1 + best_count][1],
1070             multistage_vlc[1 + best_count][0]);
1071         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1072             mean_vlc[best_mean][0]);
1073
1074         for (i = 0; i < best_count; i++){
1075             assert(best_vector[i]>=0 && best_vector[i]<16);
1076             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1077         }
1078
1079         for(y=0; y<h; y++){
1080             for(x=0; x<w; x++){
1081                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1082             }
1083         }
1084     }
1085
1086     return best_score;
1087 }
1088
1089
1090 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1091     int width, int height, int src_stride, int stride)
1092 {
1093     int x, y;
1094     int i;
1095     int block_width, block_height;
1096     int level;
1097     int threshold[6];
1098     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1099
1100     /* figure out the acceptable level thresholds in advance */
1101     threshold[5] = QUALITY_THRESHOLD;
1102     for (level = 4; level >= 0; level--)
1103         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1104
1105     block_width = (width + 15) / 16;
1106     block_height = (height + 15) / 16;
1107
1108     if(s->picture.pict_type == P_TYPE){
1109         s->m.avctx= s->avctx;
1110         s->m.current_picture_ptr= &s->m.current_picture;
1111         s->m.last_picture_ptr   = &s->m.last_picture;
1112         s->m.last_picture.data[0]= ref_plane;
1113         s->m.linesize=
1114         s->m.last_picture.linesize[0]=
1115         s->m.new_picture.linesize[0]=
1116         s->m.current_picture.linesize[0]= stride;
1117         s->m.width= width;
1118         s->m.height= height;
1119         s->m.mb_width= block_width;
1120         s->m.mb_height= block_height;
1121         s->m.mb_stride= s->m.mb_width+1;
1122         s->m.b8_stride= 2*s->m.mb_width+1;
1123         s->m.f_code=1;
1124         s->m.pict_type= s->picture.pict_type;
1125         s->m.me_method= s->avctx->me_method;
1126         s->m.me.scene_change_score=0;
1127         s->m.flags= s->avctx->flags;
1128 //        s->m.out_format = FMT_H263;
1129 //        s->m.unrestricted_mv= 1;
1130
1131         s->m.lambda= s->picture.quality;
1132         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1133         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1134
1135         if(!s->motion_val8[plane]){
1136             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
1137             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
1138         }
1139
1140         s->m.mb_type= s->mb_type;
1141
1142         //dummies, to avoid segfaults
1143         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1144         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1145         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1146         s->m.current_picture.mb_type= s->dummy;
1147
1148         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
1149         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
1150         s->m.dsp= s->dsp; //move
1151         ff_init_me(&s->m);
1152
1153         s->m.me.dia_size= s->avctx->dia_size;
1154         s->m.first_slice_line=1;
1155         for (y = 0; y < block_height; y++) {
1156             uint8_t src[stride*16];
1157
1158             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1159             s->m.mb_y= y;
1160
1161             for(i=0; i<16 && i + 16*y<height; i++){
1162                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1163                 for(x=width; x<16*block_width; x++)
1164                     src[i*stride+x]= src[i*stride+x-1];
1165             }
1166             for(; i<16 && i + 16*y<16*block_height; i++)
1167                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1168
1169             for (x = 0; x < block_width; x++) {
1170                 s->m.mb_x= x;
1171                 ff_init_block_index(&s->m);
1172                 ff_update_block_index(&s->m);
1173
1174                 ff_estimate_p_frame_motion(&s->m, x, y);
1175             }
1176             s->m.first_slice_line=0;
1177         }
1178
1179         ff_fix_long_p_mvs(&s->m);
1180         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1181     }
1182
1183     s->m.first_slice_line=1;
1184     for (y = 0; y < block_height; y++) {
1185         uint8_t src[stride*16];
1186
1187         for(i=0; i<16 && i + 16*y<height; i++){
1188             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1189             for(x=width; x<16*block_width; x++)
1190                 src[i*stride+x]= src[i*stride+x-1];
1191         }
1192         for(; i<16 && i + 16*y<16*block_height; i++)
1193             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1194
1195         s->m.mb_y= y;
1196         for (x = 0; x < block_width; x++) {
1197             uint8_t reorder_buffer[3][6][7*32];
1198             int count[3][6];
1199             int offset = y * 16 * stride + x * 16;
1200             uint8_t *decoded= decoded_plane + offset;
1201             uint8_t *ref= ref_plane + offset;
1202             int score[4]={0,0,0,0}, best;
1203             uint8_t temp[16*stride];
1204
1205             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
1206                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1207                 return -1;
1208             }
1209
1210             s->m.mb_x= x;
1211             ff_init_block_index(&s->m);
1212             ff_update_block_index(&s->m);
1213
1214             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1215                 for(i=0; i<6; i++){
1216                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1217                 }
1218                 if(s->picture.pict_type == P_TYPE){
1219                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1220                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1221                     score[0]= vlc[1]*lambda;
1222                 }
1223                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1224                 for(i=0; i<6; i++){
1225                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1226                     flush_put_bits(&s->reorder_pb[i]);
1227                 }
1228             }else
1229                 score[0]= INT_MAX;
1230
1231             best=0;
1232
1233             if(s->picture.pict_type == P_TYPE){
1234                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1235                 int mx, my, pred_x, pred_y, dxy;
1236                 int16_t *motion_ptr;
1237
1238                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1239                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1240                     for(i=0; i<6; i++)
1241                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1242
1243                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1244
1245                     s->m.pb= s->reorder_pb[5];
1246                     mx= motion_ptr[0];
1247                     my= motion_ptr[1];
1248                     assert(mx>=-32 && mx<=31);
1249                     assert(my>=-32 && my<=31);
1250                     assert(pred_x>=-32 && pred_x<=31);
1251                     assert(pred_y>=-32 && pred_y<=31);
1252                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1253                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1254                     s->reorder_pb[5]= s->m.pb;
1255                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1256
1257                     dxy= (mx&1) + 2*(my&1);
1258
1259                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1260
1261                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1262                     best= score[1] <= score[0];
1263
1264                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1265                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1266                     score[2]+= vlc[1]*lambda;
1267                     if(score[2] < score[best] && mx==0 && my==0){
1268                         best=2;
1269                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1270                         for(i=0; i<6; i++){
1271                             count[2][i]=0;
1272                         }
1273                         put_bits(&s->pb, vlc[1], vlc[0]);
1274                     }
1275                 }
1276
1277                 if(best==1){
1278                     for(i=0; i<6; i++){
1279                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1280                         flush_put_bits(&s->reorder_pb[i]);
1281                     }
1282                 }else{
1283                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1284                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1285                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1286                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1287                 }
1288             }
1289
1290             s->rd_total += score[best];
1291
1292             for(i=5; i>=0; i--){
1293                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1294             }
1295             if(best==0){
1296                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1297             }
1298         }
1299         s->m.first_slice_line=0;
1300     }
1301     return 0;
1302 }
1303
1304 static int svq1_encode_init(AVCodecContext *avctx)
1305 {
1306     SVQ1Context * const s = avctx->priv_data;
1307
1308     dsputil_init(&s->dsp, avctx);
1309     avctx->coded_frame= (AVFrame*)&s->picture;
1310
1311     s->frame_width = avctx->width;
1312     s->frame_height = avctx->height;
1313
1314     s->y_block_width = (s->frame_width + 15) / 16;
1315     s->y_block_height = (s->frame_height + 15) / 16;
1316
1317     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1318     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1319
1320     s->avctx= avctx;
1321     s->m.avctx= avctx;
1322     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1323     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1324     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1325     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1326     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1327     h263_encode_init(&s->m); //mv_penalty
1328
1329     return 0;
1330 }
1331
1332 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1333     int buf_size, void *data)
1334 {
1335     SVQ1Context * const s = avctx->priv_data;
1336     AVFrame *pict = data;
1337     AVFrame * const p= (AVFrame*)&s->picture;
1338     AVFrame temp;
1339     int i;
1340
1341     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1342         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1343         return -1;
1344     }
1345
1346     if(!s->current_picture.data[0]){
1347         avctx->get_buffer(avctx, &s->current_picture);
1348         avctx->get_buffer(avctx, &s->last_picture);
1349     }
1350
1351     temp= s->current_picture;
1352     s->current_picture= s->last_picture;
1353     s->last_picture= temp;
1354
1355     init_put_bits(&s->pb, buf, buf_size);
1356
1357     *p = *pict;
1358     p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1359     p->key_frame = p->pict_type == I_TYPE;
1360
1361     svq1_write_header(s, p->pict_type);
1362     for(i=0; i<3; i++){
1363         if(svq1_encode_plane(s, i,
1364             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1365             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1366             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
1367                 return -1;
1368     }
1369
1370 //    align_put_bits(&s->pb);
1371     while(put_bits_count(&s->pb) & 31)
1372         put_bits(&s->pb, 1, 0);
1373
1374     flush_put_bits(&s->pb);
1375
1376     return (put_bits_count(&s->pb) / 8);
1377 }
1378
1379 static int svq1_encode_end(AVCodecContext *avctx)
1380 {
1381     SVQ1Context * const s = avctx->priv_data;
1382     int i;
1383
1384     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1385
1386     av_freep(&s->m.me.scratchpad);
1387     av_freep(&s->m.me.map);
1388     av_freep(&s->m.me.score_map);
1389     av_freep(&s->mb_type);
1390     av_freep(&s->dummy);
1391
1392     for(i=0; i<3; i++){
1393         av_freep(&s->motion_val8[i]);
1394         av_freep(&s->motion_val16[i]);
1395     }
1396
1397     return 0;
1398 }
1399
1400 #endif //CONFIG_ENCODERS
1401
1402 #ifdef CONFIG_DECODERS
1403 AVCodec svq1_decoder = {
1404     "svq1",
1405     CODEC_TYPE_VIDEO,
1406     CODEC_ID_SVQ1,
1407     sizeof(MpegEncContext),
1408     svq1_decode_init,
1409     NULL,
1410     svq1_decode_end,
1411     svq1_decode_frame,
1412     CODEC_CAP_DR1,
1413     .flush= ff_mpeg_flush,
1414     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1415 };
1416 #endif
1417
1418 #ifdef CONFIG_ENCODERS
1419
1420 AVCodec svq1_encoder = {
1421     "svq1",
1422     CODEC_TYPE_VIDEO,
1423     CODEC_ID_SVQ1,
1424     sizeof(SVQ1Context),
1425     svq1_encode_init,
1426     svq1_encode_frame,
1427     svq1_encode_end,
1428     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1429 };
1430
1431 #endif //CONFIG_ENCODERS