git.sesse.net Git - ffmpeg/blob - libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * (SVQ1 Decoder)
  23  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  24  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  25  *
  26  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  27  */
  28
  29 /**
  30  * @file svq1.c
  31  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  32  * For more information of the SVQ1 algorithm, visit:
  33  *   http://www.pcisys.net/~melanson/codecs/
  34  */
  35
  36
  37 //#define DEBUG_SVQ1
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 #include <limits.h>
  43
  44 #include "avcodec.h"
  45 #include "dsputil.h"
  46 #include "mpegvideo.h"
  47 #include "bswap.h"
  48
  49 #undef NDEBUG
  50 #include <assert.h>
  51
  52 extern const uint8_t mvtab[33][2];
  53
  54 static VLC svq1_block_type;
  55 static VLC svq1_motion_component;
  56 static VLC svq1_intra_multistage[6];
  57 static VLC svq1_inter_multistage[6];
  58 static VLC svq1_intra_mean;
  59 static VLC svq1_inter_mean;
  60
  61 #define SVQ1_BLOCK_SKIP         0
  62 #define SVQ1_BLOCK_INTER        1
  63 #define SVQ1_BLOCK_INTER_4V     2
  64 #define SVQ1_BLOCK_INTRA        3
  65
  66 typedef struct SVQ1Context {
  67     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
  68     AVCodecContext *avctx;
  69     DSPContext dsp;
  70     AVFrame picture;
  71     AVFrame current_picture;
  72     AVFrame last_picture;
  73     PutBitContext pb;
  74     GetBitContext gb;
  75
  76     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  77
  78     int frame_width;
  79     int frame_height;
  80
  81     /* Y plane block dimensions */
  82     int y_block_width;
  83     int y_block_height;
  84
  85     /* U & V plane (C planes) block dimensions */
  86     int c_block_width;
  87     int c_block_height;
  88
  89     uint16_t *mb_type;
  90     uint32_t *dummy;
  91     int16_t (*motion_val8[3])[2];
  92     int16_t (*motion_val16[3])[2];
  93
  94     int64_t rd_total;
  95 } SVQ1Context;
  96
  97 /* motion vector (prediction) */
  98 typedef struct svq1_pmv_s {
  99   int           x;
 100   int           y;
 101 } svq1_pmv_t;
 102
 103 #include "svq1_cb.h"
 104 #include "svq1_vlc.h"
 105
 106 static const uint16_t checksum_table[256] = {
 107   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 108   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 109   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 110   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 111   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 112   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 113   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 114   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 115   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 116   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 117   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 118   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 119   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 120   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 121   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 122   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 123   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 124   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 125   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 126   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 127   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 128   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 129   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 130   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 131   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 132   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 133   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 134   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 135   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 136   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 137   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 138   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 139 };
 140
 141 static const uint8_t string_table[256] = {
 142   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 143   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 144   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 145   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 146   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 147   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 148   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 149   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 150   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 151   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 152   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 153   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 154   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 155   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 156   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 157   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 158   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 159   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 160   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 161   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 162   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 163   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 164   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 165   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 166   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 167   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 168   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 169   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 170   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 171   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 172   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 173   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 174 };
 175
 176 #define SVQ1_PROCESS_VECTOR()\
 177     for (; level > 0; i++) {\
 178       /* process next depth */\
 179       if (i == m) {\
 180         m = n;\
 181         if (--level == 0)\
 182           break;\
 183       }\
 184       /* divide block if next bit set */\
 185       if (get_bits (bitbuf, 1) == 0)\
 186         break;\
 187       /* add child nodes */\
 188       list[n++] = list[i];\
 189       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 190     }
 191
 192 #define SVQ1_ADD_CODEBOOK()\
 193           /* add codebook entries to vector */\
 194           for (j=0; j < stages; j++) {\
 195             n3  = codebook[entries[j]] ^ 0x80808080;\
 196             n1 += ((n3 & 0xFF00FF00) >> 8);\
 197             n2 +=  (n3 & 0x00FF00FF);\
 198           }\
 199 \
 200           /* clip to [0..255] */\
 201           if (n1 & 0xFF00FF00) {\
 202             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 203             n1 += 0x7F007F00;\
 204             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 205             n1 &= (n3 & 0x00FF00FF);\
 206           }\
 207 \
 208           if (n2 & 0xFF00FF00) {\
 209             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 210             n2 += 0x7F007F00;\
 211             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 212             n2 &= (n3 & 0x00FF00FF);\
 213           }
 214
 215 #define SVQ1_DO_CODEBOOK_INTRA()\
 216       for (y=0; y < height; y++) {\
 217         for (x=0; x < (width / 4); x++, codebook++) {\
 218         n1 = n4;\
 219         n2 = n4;\
 220         SVQ1_ADD_CODEBOOK()\
 221         /* store result */\
 222         dst[x] = (n1 << 8) | n2;\
 223         }\
 224         dst += (pitch / 4);\
 225       }
 226
 227 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 228       for (y=0; y < height; y++) {\
 229         for (x=0; x < (width / 4); x++, codebook++) {\
 230         n3 = dst[x];\
 231         /* add mean value to vector */\
 232         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 233         n2 =  (n3 & 0x00FF00FF)          + n4;\
 234         SVQ1_ADD_CODEBOOK()\
 235         /* store result */\
 236         dst[x] = (n1 << 8) | n2;\
 237         }\
 238         dst += (pitch / 4);\
 239       }
 240
 241 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 242       codebook = (const uint32_t *) cbook[level];\
 243       bit_cache = get_bits (bitbuf, 4*stages);\
 244       /* calculate codebook entries for this vector */\
 245       for (j=0; j < stages; j++) {\
 246         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 247       }\
 248       mean -= (stages * 128);\
 249       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 250
 251 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 252   uint32_t    bit_cache;
 253   uint8_t    *list[63];
 254   uint32_t   *dst;
 255   const uint32_t *codebook;
 256   int         entries[6];
 257   int         i, j, m, n;
 258   int         mean, stages;
 259   unsigned    x, y, width, height, level;
 260   uint32_t    n1, n2, n3, n4;
 261
 262   /* initialize list for breadth first processing of vectors */
 263   list[0] = pixels;
 264
 265   /* recursively process vector */
 266   for (i=0, m=1, n=1, level=5; i < n; i++) {
 267     SVQ1_PROCESS_VECTOR();
 268
 269     /* destination address and vector size */
 270     dst = (uint32_t *) list[i];
 271     width = 1 << ((4 + level) /2);
 272     height = 1 << ((3 + level) /2);
 273
 274     /* get number of stages (-1 skips vector, 0 for mean only) */
 275     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 276
 277     if (stages == -1) {
 278         for (y=0; y < height; y++) {
 279           memset (&dst[y*(pitch / 4)], 0, width);
 280         }
 281       continue;                 /* skip vector */
 282     }
 283
 284     if ((stages > 0) && (level >= 4)) {
 285 #ifdef DEBUG_SVQ1
 286     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 287 #endif
 288       return -1;        /* invalid vector */
 289     }
 290
 291     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 292
 293     if (stages == 0) {
 294       for (y=0; y < height; y++) {
 295         memset (&dst[y*(pitch / 4)], mean, width);
 296       }
 297     } else {
 298       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 299       SVQ1_DO_CODEBOOK_INTRA()
 300     }
 301   }
 302
 303   return 0;
 304 }
 305
 306 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 307   uint32_t    bit_cache;
 308   uint8_t    *list[63];
 309   uint32_t   *dst;
 310   const uint32_t *codebook;
 311   int         entries[6];
 312   int         i, j, m, n;
 313   int         mean, stages;
 314   int         x, y, width, height, level;
 315   uint32_t    n1, n2, n3, n4;
 316
 317   /* initialize list for breadth first processing of vectors */
 318   list[0] = pixels;
 319
 320   /* recursively process vector */
 321   for (i=0, m=1, n=1, level=5; i < n; i++) {
 322     SVQ1_PROCESS_VECTOR();
 323
 324     /* destination address and vector size */
 325     dst = (uint32_t *) list[i];
 326     width = 1 << ((4 + level) /2);
 327     height = 1 << ((3 + level) /2);
 328
 329     /* get number of stages (-1 skips vector, 0 for mean only) */
 330     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 331
 332     if (stages == -1) continue; /* skip vector */
 333
 334     if ((stages > 0) && (level >= 4)) {
 335 #ifdef DEBUG_SVQ1
 336     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 337 #endif
 338       return -1;        /* invalid vector */
 339     }
 340
 341     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 342
 343     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 344     SVQ1_DO_CODEBOOK_NONINTRA()
 345   }
 346   return 0;
 347 }
 348
 349 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 350   int        diff;
 351   int        i;
 352
 353   for (i=0; i < 2; i++) {
 354
 355     /* get motion code */
 356     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 357     if(diff<0)
 358         return -1;
 359     else if(diff){
 360         if(get_bits1(bitbuf)) diff= -diff;
 361     }
 362
 363     /* add median of motion vector predictors and clip result */
 364     if (i == 1)
 365       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 366     else
 367       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 368   }
 369
 370   return 0;
 371 }
 372
 373 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 374   uint8_t *src;
 375   uint8_t *dst;
 376   int      i;
 377
 378   src = &previous[x + y*pitch];
 379   dst = current;
 380
 381   for (i=0; i < 16; i++) {
 382     memcpy (dst, src, 16);
 383     src += pitch;
 384     dst += pitch;
 385   }
 386 }
 387
 388 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 389                                uint8_t *current, uint8_t *previous, int pitch,
 390                                svq1_pmv_t *motion, int x, int y) {
 391   uint8_t    *src;
 392   uint8_t    *dst;
 393   svq1_pmv_t  mv;
 394   svq1_pmv_t *pmv[3];
 395   int         result;
 396
 397   /* predict and decode motion vector */
 398   pmv[0] = &motion[0];
 399   if (y == 0) {
 400     pmv[1] =
 401     pmv[2] = pmv[0];
 402   }
 403   else {
 404     pmv[1] = &motion[(x / 8) + 2];
 405     pmv[2] = &motion[(x / 8) + 4];
 406   }
 407
 408   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 409
 410   if (result != 0)
 411     return result;
 412
 413   motion[0].x                =
 414   motion[(x / 8) + 2].x      =
 415   motion[(x / 8) + 3].x      = mv.x;
 416   motion[0].y                =
 417   motion[(x / 8) + 2].y      =
 418   motion[(x / 8) + 3].y      = mv.y;
 419
 420   if(y + (mv.y >> 1)<0)
 421      mv.y= 0;
 422   if(x + (mv.x >> 1)<0)
 423      mv.x= 0;
 424
 425 #if 0
 426   int w= (s->width+15)&~15;
 427   int h= (s->height+15)&~15;
 428   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 429       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 430 #endif
 431
 432   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 433   dst = current;
 434
 435   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 436
 437   return 0;
 438 }
 439
 440 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 441                                   uint8_t *current, uint8_t *previous, int pitch,
 442                                   svq1_pmv_t *motion,int x, int y) {
 443   uint8_t    *src;
 444   uint8_t    *dst;
 445   svq1_pmv_t  mv;
 446   svq1_pmv_t *pmv[4];
 447   int         i, result;
 448
 449   /* predict and decode motion vector (0) */
 450   pmv[0] = &motion[0];
 451   if (y == 0) {
 452     pmv[1] =
 453     pmv[2] = pmv[0];
 454   }
 455   else {
 456     pmv[1] = &motion[(x / 8) + 2];
 457     pmv[2] = &motion[(x / 8) + 4];
 458   }
 459
 460   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 461
 462   if (result != 0)
 463     return result;
 464
 465   /* predict and decode motion vector (1) */
 466   pmv[0] = &mv;
 467   if (y == 0) {
 468     pmv[1] =
 469     pmv[2] = pmv[0];
 470   }
 471   else {
 472     pmv[1] = &motion[(x / 8) + 3];
 473   }
 474   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 475
 476   if (result != 0)
 477     return result;
 478
 479   /* predict and decode motion vector (2) */
 480   pmv[1] = &motion[0];
 481   pmv[2] = &motion[(x / 8) + 1];
 482
 483   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 484
 485   if (result != 0)
 486     return result;
 487
 488   /* predict and decode motion vector (3) */
 489   pmv[2] = &motion[(x / 8) + 2];
 490   pmv[3] = &motion[(x / 8) + 3];
 491
 492   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 493
 494   if (result != 0)
 495     return result;
 496
 497   /* form predictions */
 498   for (i=0; i < 4; i++) {
 499     int mvx= pmv[i]->x + (i&1)*16;
 500     int mvy= pmv[i]->y + (i>>1)*16;
 501
 502     ///XXX /FIXME cliping or padding?
 503     if(y + (mvy >> 1)<0)
 504        mvy= 0;
 505     if(x + (mvx >> 1)<0)
 506        mvx= 0;
 507
 508 #if 0
 509   int w= (s->width+15)&~15;
 510   int h= (s->height+15)&~15;
 511   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 512       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 513 #endif
 514     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 515     dst = current;
 516
 517     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 518
 519     /* select next block */
 520     if (i & 1) {
 521       current  += 8*(pitch - 1);
 522     } else {
 523       current  += 8;
 524     }
 525   }
 526
 527   return 0;
 528 }
 529
 530 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 531                         uint8_t *current, uint8_t *previous, int pitch,
 532                         svq1_pmv_t *motion, int x, int y) {
 533   uint32_t block_type;
 534   int      result = 0;
 535
 536   /* get block type */
 537   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 538
 539   /* reset motion vectors */
 540   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 541     motion[0].x                 =
 542     motion[0].y                 =
 543     motion[(x / 8) + 2].x =
 544     motion[(x / 8) + 2].y =
 545     motion[(x / 8) + 3].x =
 546     motion[(x / 8) + 3].y = 0;
 547   }
 548
 549   switch (block_type) {
 550   case SVQ1_BLOCK_SKIP:
 551     svq1_skip_block (current, previous, pitch, x, y);
 552     break;
 553
 554   case SVQ1_BLOCK_INTER:
 555     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 556
 557     if (result != 0)
 558     {
 559 #ifdef DEBUG_SVQ1
 560     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 561 #endif
 562       break;
 563     }
 564     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 565     break;
 566
 567   case SVQ1_BLOCK_INTER_4V:
 568     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 569
 570     if (result != 0)
 571     {
 572 #ifdef DEBUG_SVQ1
 573     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 574 #endif
 575       break;
 576     }
 577     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 578     break;
 579
 580   case SVQ1_BLOCK_INTRA:
 581     result = svq1_decode_block_intra (bitbuf, current, pitch);
 582     break;
 583   }
 584
 585   return result;
 586 }
 587
 588 /* standard video sizes */
 589 static struct { int width; int height; } svq1_frame_size_table[8] = {
 590   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 591   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 592 };
 593
 594 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 595   int i;
 596
 597   for (i=0; i < length; i++) {
 598     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 599   }
 600
 601   return value;
 602 }
 603
 604 #if 0 /* unused, remove? */
 605 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 606                                          int width, int height, int value) {
 607   int x, y;
 608
 609   for (y=0; y < height; y++) {
 610     for (x=0; x < width; x++) {
 611       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 612     }
 613
 614     pixels += pitch;
 615   }
 616
 617   return value;
 618 }
 619 #endif
 620
 621 #ifdef CONFIG_DECODERS
 622 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 623   uint8_t seed;
 624   int     i;
 625
 626   out[0] = get_bits (bitbuf, 8);
 627
 628   seed = string_table[out[0]];
 629
 630   for (i=1; i <= out[0]; i++) {
 631     out[i] = get_bits (bitbuf, 8) ^ seed;
 632     seed   = string_table[out[i] ^ seed];
 633   }
 634 }
 635
 636 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 637   int frame_size_code;
 638   int temporal_reference;
 639
 640   temporal_reference = get_bits (bitbuf, 8);
 641
 642   /* frame type */
 643   s->pict_type= get_bits (bitbuf, 2)+1;
 644   if(s->pict_type==4)
 645       return -1;
 646
 647   if (s->pict_type == I_TYPE) {
 648
 649     /* unknown fields */
 650     if (s->f_code == 0x50 || s->f_code == 0x60) {
 651       int csum = get_bits (bitbuf, 16);
 652
 653       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 654
 655 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 656 //              (csum == 0) ? "correct" : "incorrect", csum);
 657     }
 658
 659     if ((s->f_code ^ 0x10) >= 0x50) {
 660       uint8_t msg[256];
 661
 662       svq1_parse_string (bitbuf, msg);
 663
 664       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 665     }
 666
 667     skip_bits (bitbuf, 2);
 668     skip_bits (bitbuf, 2);
 669     skip_bits1 (bitbuf);
 670
 671     /* load frame size */
 672     frame_size_code = get_bits (bitbuf, 3);
 673
 674     if (frame_size_code == 7) {
 675       /* load width, height (12 bits each) */
 676       s->width = get_bits (bitbuf, 12);
 677       s->height = get_bits (bitbuf, 12);
 678
 679       if (!s->width || !s->height)
 680         return -1;
 681     } else {
 682       /* get width, height from table */
 683       s->width = svq1_frame_size_table[frame_size_code].width;
 684       s->height = svq1_frame_size_table[frame_size_code].height;
 685     }
 686   }
 687
 688   /* unknown fields */
 689   if (get_bits (bitbuf, 1) == 1) {
 690     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 691     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 692
 693     if (get_bits (bitbuf, 2) != 0)
 694       return -1;
 695   }
 696
 697   if (get_bits (bitbuf, 1) == 1) {
 698     skip_bits1 (bitbuf);
 699     skip_bits (bitbuf, 4);
 700     skip_bits1 (bitbuf);
 701     skip_bits (bitbuf, 2);
 702
 703     while (get_bits (bitbuf, 1) == 1) {
 704       skip_bits (bitbuf, 8);
 705     }
 706   }
 707
 708   return 0;
 709 }
 710
 711 static int svq1_decode_frame(AVCodecContext *avctx,
 712                              void *data, int *data_size,
 713                              uint8_t *buf, int buf_size)
 714 {
 715   MpegEncContext *s=avctx->priv_data;
 716   uint8_t        *current, *previous;
 717   int             result, i, x, y, width, height;
 718   AVFrame *pict = data;
 719
 720   /* initialize bit buffer */
 721   init_get_bits(&s->gb,buf,buf_size*8);
 722
 723   /* decode frame header */
 724   s->f_code = get_bits (&s->gb, 22);
 725
 726   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 727     return -1;
 728
 729   /* swap some header bytes (why?) */
 730   if (s->f_code != 0x20) {
 731     uint32_t *src = (uint32_t *) (buf + 4);
 732
 733     for (i=0; i < 4; i++) {
 734       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 735     }
 736   }
 737
 738   result = svq1_decode_frame_header (&s->gb, s);
 739
 740   if (result != 0)
 741   {
 742 #ifdef DEBUG_SVQ1
 743     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 744 #endif
 745     return result;
 746   }
 747
 748   //FIXME this avoids some confusion for "B frames" without 2 references
 749   //this should be removed after libavcodec can handle more flexible picture types & ordering
 750   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 751
 752   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 753   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
 754      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
 755      || avctx->skip_frame >= AVDISCARD_ALL)
 756       return buf_size;
 757
 758   if(MPV_frame_start(s, avctx) < 0)
 759       return -1;
 760
 761   /* decode y, u and v components */
 762   for (i=0; i < 3; i++) {
 763     int linesize;
 764     if (i == 0) {
 765       width  = (s->width+15)&~15;
 766       height = (s->height+15)&~15;
 767       linesize= s->linesize;
 768     } else {
 769       if(s->flags&CODEC_FLAG_GRAY) break;
 770       width  = (s->width/4+15)&~15;
 771       height = (s->height/4+15)&~15;
 772       linesize= s->uvlinesize;
 773     }
 774
 775     current  = s->current_picture.data[i];
 776
 777     if(s->pict_type==B_TYPE){
 778         previous = s->next_picture.data[i];
 779     }else{
 780         previous = s->last_picture.data[i];
 781     }
 782
 783     if (s->pict_type == I_TYPE) {
 784       /* keyframe */
 785       for (y=0; y < height; y+=16) {
 786         for (x=0; x < width; x+=16) {
 787           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 788           if (result != 0)
 789           {
 790 //#ifdef DEBUG_SVQ1
 791             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 792 //#endif
 793             return result;
 794           }
 795         }
 796         current += 16*linesize;
 797       }
 798     } else {
 799       svq1_pmv_t pmv[width/8+3];
 800       /* delta frame */
 801       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 802
 803       for (y=0; y < height; y+=16) {
 804         for (x=0; x < width; x+=16) {
 805           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 806                                             linesize, pmv, x, y);
 807           if (result != 0)
 808           {
 809 #ifdef DEBUG_SVQ1
 810     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 811 #endif
 812             return result;
 813           }
 814         }
 815
 816         pmv[0].x =
 817         pmv[0].y = 0;
 818
 819         current += 16*linesize;
 820       }
 821     }
 822   }
 823
 824   *pict = *(AVFrame*)&s->current_picture;
 825
 826
 827   MPV_frame_end(s);
 828
 829   *data_size=sizeof(AVFrame);
 830   return buf_size;
 831 }
 832
 833 static int svq1_decode_init(AVCodecContext *avctx)
 834 {
 835     MpegEncContext *s = avctx->priv_data;
 836     int i;
 837
 838     MPV_decode_defaults(s);
 839
 840     s->avctx = avctx;
 841     s->width = (avctx->width+3)&~3;
 842     s->height = (avctx->height+3)&~3;
 843     s->codec_id= avctx->codec->id;
 844     avctx->pix_fmt = PIX_FMT_YUV410P;
 845     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 846     s->flags= avctx->flags;
 847     if (MPV_common_init(s) < 0) return -1;
 848
 849     init_vlc(&svq1_block_type, 2, 4,
 850         &svq1_block_type_vlc[0][1], 2, 1,
 851         &svq1_block_type_vlc[0][0], 2, 1, 1);
 852
 853     init_vlc(&svq1_motion_component, 7, 33,
 854         &mvtab[0][1], 2, 1,
 855         &mvtab[0][0], 2, 1, 1);
 856
 857     for (i = 0; i < 6; i++) {
 858         init_vlc(&svq1_intra_multistage[i], 3, 8,
 859             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 860             &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
 861         init_vlc(&svq1_inter_multistage[i], 3, 8,
 862             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 863             &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
 864     }
 865
 866     init_vlc(&svq1_intra_mean, 8, 256,
 867         &svq1_intra_mean_vlc[0][1], 4, 2,
 868         &svq1_intra_mean_vlc[0][0], 4, 2, 1);
 869
 870     init_vlc(&svq1_inter_mean, 9, 512,
 871         &svq1_inter_mean_vlc[0][1], 4, 2,
 872         &svq1_inter_mean_vlc[0][0], 4, 2, 1);
 873
 874     return 0;
 875 }
 876
 877 static int svq1_decode_end(AVCodecContext *avctx)
 878 {
 879     MpegEncContext *s = avctx->priv_data;
 880
 881     MPV_common_end(s);
 882     return 0;
 883 }
 884 #endif /* CONFIG_DECODERS */
 885
 886 #ifdef CONFIG_ENCODERS
 887 static void svq1_write_header(SVQ1Context *s, int frame_type)
 888 {
 889     int i;
 890
 891     /* frame code */
 892     put_bits(&s->pb, 22, 0x20);
 893
 894     /* temporal reference (sure hope this is a "don't care") */
 895     put_bits(&s->pb, 8, 0x00);
 896
 897     /* frame type */
 898     put_bits(&s->pb, 2, frame_type - 1);
 899
 900     if (frame_type == I_TYPE) {
 901
 902         /* no checksum since frame code is 0x20 */
 903
 904         /* no embedded string either */
 905
 906         /* output 5 unknown bits (2 + 2 + 1) */
 907         put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
 908
 909         for (i = 0; i < 7; i++)
 910         {
 911             if ((svq1_frame_size_table[i].width == s->frame_width) &&
 912                 (svq1_frame_size_table[i].height == s->frame_height))
 913             {
 914                 put_bits(&s->pb, 3, i);
 915                 break;
 916             }
 917         }
 918
 919         if (i == 7)
 920         {
 921             put_bits(&s->pb, 3, 7);
 922                 put_bits(&s->pb, 12, s->frame_width);
 923                 put_bits(&s->pb, 12, s->frame_height);
 924         }
 925     }
 926
 927     /* no checksum or extra data (next 2 bits get 0) */
 928     put_bits(&s->pb, 2, 0);
 929 }
 930
 931
 932 #define QUALITY_THRESHOLD 100
 933 #define THRESHOLD_MULTIPLIER 0.6
 934
 935 #if defined(HAVE_ALTIVEC)
 936 #undef vector
 937 #endif
 938
 939 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 940     int count, y, x, i, j, split, best_mean, best_score, best_count;
 941     int best_vector[6];
 942     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 943     int w= 2<<((level+2)>>1);
 944     int h= 2<<((level+1)>>1);
 945     int size=w*h;
 946     int16_t block[7][256];
 947     const int8_t *codebook_sum, *codebook;
 948     const uint16_t (*mean_vlc)[2];
 949     const uint8_t (*multistage_vlc)[2];
 950
 951     best_score=0;
 952     //FIXME optimize, this doenst need to be done multiple times
 953     if(intra){
 954         codebook_sum= svq1_intra_codebook_sum[level];
 955         codebook= svq1_intra_codebooks[level];
 956         mean_vlc= svq1_intra_mean_vlc;
 957         multistage_vlc= svq1_intra_multistage_vlc[level];
 958         for(y=0; y<h; y++){
 959             for(x=0; x<w; x++){
 960                 int v= src[x + y*stride];
 961                 block[0][x + w*y]= v;
 962                 best_score += v*v;
 963                 block_sum[0] += v;
 964             }
 965         }
 966     }else{
 967         codebook_sum= svq1_inter_codebook_sum[level];
 968         codebook= svq1_inter_codebooks[level];
 969         mean_vlc= svq1_inter_mean_vlc + 256;
 970         multistage_vlc= svq1_inter_multistage_vlc[level];
 971         for(y=0; y<h; y++){
 972             for(x=0; x<w; x++){
 973                 int v= src[x + y*stride] - ref[x + y*stride];
 974                 block[0][x + w*y]= v;
 975                 best_score += v*v;
 976                 block_sum[0] += v;
 977             }
 978         }
 979     }
 980
 981     best_count=0;
 982     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 983     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 984
 985     if(level<4){
 986         for(count=1; count<7; count++){
 987             int best_vector_score= INT_MAX;
 988             int best_vector_sum=-999, best_vector_mean=-999;
 989             const int stage= count-1;
 990             const int8_t *vector;
 991
 992             for(i=0; i<16; i++){
 993                 int sum= codebook_sum[stage*16 + i];
 994                 int sqr, diff, score;
 995
 996                 vector = codebook + stage*size*16 + i*size;
 997                 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
 998                 diff= block_sum[stage] - sum;
 999                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1000                 if(score < best_vector_score){
1001                     int mean= (diff + (size>>1)) >> (level+3);
1002                     assert(mean >-300 && mean<300);
1003                     mean= av_clip(mean, intra?0:-256, 255);
1004                     best_vector_score= score;
1005                     best_vector[stage]= i;
1006                     best_vector_sum= sum;
1007                     best_vector_mean= mean;
1008                 }
1009             }
1010             assert(best_vector_mean != -999);
1011             vector= codebook + stage*size*16 + best_vector[stage]*size;
1012             for(j=0; j<size; j++){
1013                 block[stage+1][j] = block[stage][j] - vector[j];
1014             }
1015             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1016             best_vector_score +=
1017                 lambda*(+ 1 + 4*count
1018                         + multistage_vlc[1+count][1]
1019                         + mean_vlc[best_vector_mean][1]);
1020
1021             if(best_vector_score < best_score){
1022                 best_score= best_vector_score;
1023                 best_count= count;
1024                 best_mean= best_vector_mean;
1025             }
1026         }
1027     }
1028
1029     split=0;
1030     if(best_score > threshold && level){
1031         int score=0;
1032         int offset= (level&1) ? stride*h/2 : w/2;
1033         PutBitContext backup[6];
1034
1035         for(i=level-1; i>=0; i--){
1036             backup[i]= s->reorder_pb[i];
1037         }
1038         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1039         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1040         score += lambda;
1041
1042         if(score < best_score){
1043             best_score= score;
1044             split=1;
1045         }else{
1046             for(i=level-1; i>=0; i--){
1047                 s->reorder_pb[i]= backup[i];
1048             }
1049         }
1050     }
1051     if (level > 0)
1052         put_bits(&s->reorder_pb[level], 1, split);
1053
1054     if(!split){
1055         assert((best_mean >= 0 && best_mean<256) || !intra);
1056         assert(best_mean >= -256 && best_mean<256);
1057         assert(best_count >=0 && best_count<7);
1058         assert(level<4 || best_count==0);
1059
1060         /* output the encoding */
1061         put_bits(&s->reorder_pb[level],
1062             multistage_vlc[1 + best_count][1],
1063             multistage_vlc[1 + best_count][0]);
1064         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1065             mean_vlc[best_mean][0]);
1066
1067         for (i = 0; i < best_count; i++){
1068             assert(best_vector[i]>=0 && best_vector[i]<16);
1069             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1070         }
1071
1072         for(y=0; y<h; y++){
1073             for(x=0; x<w; x++){
1074                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1075             }
1076         }
1077     }
1078
1079     return best_score;
1080 }
1081
1082
1083 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1084     int width, int height, int src_stride, int stride)
1085 {
1086     int x, y;
1087     int i;
1088     int block_width, block_height;
1089     int level;
1090     int threshold[6];
1091     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1092
1093     /* figure out the acceptable level thresholds in advance */
1094     threshold[5] = QUALITY_THRESHOLD;
1095     for (level = 4; level >= 0; level--)
1096         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1097
1098     block_width = (width + 15) / 16;
1099     block_height = (height + 15) / 16;
1100
1101     if(s->picture.pict_type == P_TYPE){
1102         s->m.avctx= s->avctx;
1103         s->m.current_picture_ptr= &s->m.current_picture;
1104         s->m.last_picture_ptr   = &s->m.last_picture;
1105         s->m.last_picture.data[0]= ref_plane;
1106         s->m.linesize=
1107         s->m.last_picture.linesize[0]=
1108         s->m.new_picture.linesize[0]=
1109         s->m.current_picture.linesize[0]= stride;
1110         s->m.width= width;
1111         s->m.height= height;
1112         s->m.mb_width= block_width;
1113         s->m.mb_height= block_height;
1114         s->m.mb_stride= s->m.mb_width+1;
1115         s->m.b8_stride= 2*s->m.mb_width+1;
1116         s->m.f_code=1;
1117         s->m.pict_type= s->picture.pict_type;
1118         s->m.me_method= s->avctx->me_method;
1119         s->m.me.scene_change_score=0;
1120         s->m.flags= s->avctx->flags;
1121 //        s->m.out_format = FMT_H263;
1122 //        s->m.unrestricted_mv= 1;
1123
1124         s->m.lambda= s->picture.quality;
1125         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1126         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1127
1128         if(!s->motion_val8[plane]){
1129             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
1130             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
1131         }
1132
1133         s->m.mb_type= s->mb_type;
1134
1135         //dummies, to avoid segfaults
1136         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1137         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1138         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1139         s->m.current_picture.mb_type= s->dummy;
1140
1141         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
1142         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
1143         s->m.dsp= s->dsp; //move
1144         ff_init_me(&s->m);
1145
1146         s->m.me.dia_size= s->avctx->dia_size;
1147         s->m.first_slice_line=1;
1148         for (y = 0; y < block_height; y++) {
1149             uint8_t src[stride*16];
1150
1151             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1152             s->m.mb_y= y;
1153
1154             for(i=0; i<16 && i + 16*y<height; i++){
1155                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1156                 for(x=width; x<16*block_width; x++)
1157                     src[i*stride+x]= src[i*stride+x-1];
1158             }
1159             for(; i<16 && i + 16*y<16*block_height; i++)
1160                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1161
1162             for (x = 0; x < block_width; x++) {
1163                 s->m.mb_x= x;
1164                 ff_init_block_index(&s->m);
1165                 ff_update_block_index(&s->m);
1166
1167                 ff_estimate_p_frame_motion(&s->m, x, y);
1168             }
1169             s->m.first_slice_line=0;
1170         }
1171
1172         ff_fix_long_p_mvs(&s->m);
1173         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1174     }
1175
1176     s->m.first_slice_line=1;
1177     for (y = 0; y < block_height; y++) {
1178         uint8_t src[stride*16];
1179
1180         for(i=0; i<16 && i + 16*y<height; i++){
1181             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1182             for(x=width; x<16*block_width; x++)
1183                 src[i*stride+x]= src[i*stride+x-1];
1184         }
1185         for(; i<16 && i + 16*y<16*block_height; i++)
1186             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1187
1188         s->m.mb_y= y;
1189         for (x = 0; x < block_width; x++) {
1190             uint8_t reorder_buffer[3][6][7*32];
1191             int count[3][6];
1192             int offset = y * 16 * stride + x * 16;
1193             uint8_t *decoded= decoded_plane + offset;
1194             uint8_t *ref= ref_plane + offset;
1195             int score[4]={0,0,0,0}, best;
1196             uint8_t temp[16*stride];
1197
1198             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
1199                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1200                 return -1;
1201             }
1202
1203             s->m.mb_x= x;
1204             ff_init_block_index(&s->m);
1205             ff_update_block_index(&s->m);
1206
1207             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1208                 for(i=0; i<6; i++){
1209                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1210                 }
1211                 if(s->picture.pict_type == P_TYPE){
1212                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1213                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1214                     score[0]= vlc[1]*lambda;
1215                 }
1216                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1217                 for(i=0; i<6; i++){
1218                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1219                     flush_put_bits(&s->reorder_pb[i]);
1220                 }
1221             }else
1222                 score[0]= INT_MAX;
1223
1224             best=0;
1225
1226             if(s->picture.pict_type == P_TYPE){
1227                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1228                 int mx, my, pred_x, pred_y, dxy;
1229                 int16_t *motion_ptr;
1230
1231                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1232                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1233                     for(i=0; i<6; i++)
1234                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1235
1236                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1237
1238                     s->m.pb= s->reorder_pb[5];
1239                     mx= motion_ptr[0];
1240                     my= motion_ptr[1];
1241                     assert(mx>=-32 && mx<=31);
1242                     assert(my>=-32 && my<=31);
1243                     assert(pred_x>=-32 && pred_x<=31);
1244                     assert(pred_y>=-32 && pred_y<=31);
1245                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1246                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1247                     s->reorder_pb[5]= s->m.pb;
1248                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1249
1250                     dxy= (mx&1) + 2*(my&1);
1251
1252                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1253
1254                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1255                     best= score[1] <= score[0];
1256
1257                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1258                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1259                     score[2]+= vlc[1]*lambda;
1260                     if(score[2] < score[best] && mx==0 && my==0){
1261                         best=2;
1262                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1263                         for(i=0; i<6; i++){
1264                             count[2][i]=0;
1265                         }
1266                         put_bits(&s->pb, vlc[1], vlc[0]);
1267                     }
1268                 }
1269
1270                 if(best==1){
1271                     for(i=0; i<6; i++){
1272                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1273                         flush_put_bits(&s->reorder_pb[i]);
1274                     }
1275                 }else{
1276                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1277                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1278                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1279                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1280                 }
1281             }
1282
1283             s->rd_total += score[best];
1284
1285             for(i=5; i>=0; i--){
1286                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1287             }
1288             if(best==0){
1289                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1290             }
1291         }
1292         s->m.first_slice_line=0;
1293     }
1294     return 0;
1295 }
1296
1297 static int svq1_encode_init(AVCodecContext *avctx)
1298 {
1299     SVQ1Context * const s = avctx->priv_data;
1300
1301     dsputil_init(&s->dsp, avctx);
1302     avctx->coded_frame= (AVFrame*)&s->picture;
1303
1304     s->frame_width = avctx->width;
1305     s->frame_height = avctx->height;
1306
1307     s->y_block_width = (s->frame_width + 15) / 16;
1308     s->y_block_height = (s->frame_height + 15) / 16;
1309
1310     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1311     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1312
1313     s->avctx= avctx;
1314     s->m.avctx= avctx;
1315     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1316     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1317     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1318     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1319     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1320     h263_encode_init(&s->m); //mv_penalty
1321
1322     return 0;
1323 }
1324
1325 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1326     int buf_size, void *data)
1327 {
1328     SVQ1Context * const s = avctx->priv_data;
1329     AVFrame *pict = data;
1330     AVFrame * const p= (AVFrame*)&s->picture;
1331     AVFrame temp;
1332     int i;
1333
1334     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1335         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1336         return -1;
1337     }
1338
1339     if(!s->current_picture.data[0]){
1340         avctx->get_buffer(avctx, &s->current_picture);
1341         avctx->get_buffer(avctx, &s->last_picture);
1342     }
1343
1344     temp= s->current_picture;
1345     s->current_picture= s->last_picture;
1346     s->last_picture= temp;
1347
1348     init_put_bits(&s->pb, buf, buf_size);
1349
1350     *p = *pict;
1351     p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1352     p->key_frame = p->pict_type == I_TYPE;
1353
1354     svq1_write_header(s, p->pict_type);
1355     for(i=0; i<3; i++){
1356         if(svq1_encode_plane(s, i,
1357             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1358             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1359             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
1360                 return -1;
1361     }
1362
1363 //    align_put_bits(&s->pb);
1364     while(put_bits_count(&s->pb) & 31)
1365         put_bits(&s->pb, 1, 0);
1366
1367     flush_put_bits(&s->pb);
1368
1369     return (put_bits_count(&s->pb) / 8);
1370 }
1371
1372 static int svq1_encode_end(AVCodecContext *avctx)
1373 {
1374     SVQ1Context * const s = avctx->priv_data;
1375     int i;
1376
1377     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1378
1379     av_freep(&s->m.me.scratchpad);
1380     av_freep(&s->m.me.map);
1381     av_freep(&s->m.me.score_map);
1382     av_freep(&s->mb_type);
1383     av_freep(&s->dummy);
1384
1385     for(i=0; i<3; i++){
1386         av_freep(&s->motion_val8[i]);
1387         av_freep(&s->motion_val16[i]);
1388     }
1389
1390     return 0;
1391 }
1392
1393 #endif //CONFIG_ENCODERS
1394
1395 #ifdef CONFIG_DECODERS
1396 AVCodec svq1_decoder = {
1397     "svq1",
1398     CODEC_TYPE_VIDEO,
1399     CODEC_ID_SVQ1,
1400     sizeof(MpegEncContext),
1401     svq1_decode_init,
1402     NULL,
1403     svq1_decode_end,
1404     svq1_decode_frame,
1405     CODEC_CAP_DR1,
1406     .flush= ff_mpeg_flush,
1407     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1408 };
1409 #endif
1410
1411 #ifdef CONFIG_ENCODERS
1412
1413 AVCodec svq1_encoder = {
1414     "svq1",
1415     CODEC_TYPE_VIDEO,
1416     CODEC_ID_SVQ1,
1417     sizeof(SVQ1Context),
1418     svq1_encode_init,
1419     svq1_encode_frame,
1420     svq1_encode_end,
1421     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1422 };
1423
1424 #endif //CONFIG_ENCODERS