git.sesse.net Git - ffmpeg/blob - libavcodec/svq1.c

   1 /*
   2  *
   3  * Copyright (C) 2002 the xine project
   4  * Copyright (C) 2002 the ffmpeg project
   5  *
   6  * This file is part of FFmpeg.
   7  *
   8  * FFmpeg is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU Lesser General Public
  10  * License as published by the Free Software Foundation; either
  11  * version 2.1 of the License, or (at your option) any later version.
  12  *
  13  * FFmpeg is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16  * Lesser General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with FFmpeg; if not, write to the Free Software
  20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  21  *
  22  * (SVQ1 Decoder)
  23  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  24  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
  25  *
  26  * SVQ1 Encoder (c) 2004 Mike Melanson <melanson@pcisys.net>
  27  */
  28
  29 /**
  30  * @file svq1.c
  31  * Sorenson Vector Quantizer #1 (SVQ1) video codec.
  32  * For more information of the SVQ1 algorithm, visit:
  33  *   http://www.pcisys.net/~melanson/codecs/
  34  */
  35
  36
  37 //#define DEBUG_SVQ1
  38 #include <stdio.h>
  39 #include <stdlib.h>
  40 #include <string.h>
  41 #include <unistd.h>
  42 #include <limits.h>
  43
  44 #include "common.h"
  45 #include "avcodec.h"
  46 #include "dsputil.h"
  47 #include "mpegvideo.h"
  48 #include "bswap.h"
  49
  50 #undef NDEBUG
  51 #include <assert.h>
  52
  53 extern const uint8_t mvtab[33][2];
  54
  55 static VLC svq1_block_type;
  56 static VLC svq1_motion_component;
  57 static VLC svq1_intra_multistage[6];
  58 static VLC svq1_inter_multistage[6];
  59 static VLC svq1_intra_mean;
  60 static VLC svq1_inter_mean;
  61
  62 #define SVQ1_BLOCK_SKIP         0
  63 #define SVQ1_BLOCK_INTER        1
  64 #define SVQ1_BLOCK_INTER_4V     2
  65 #define SVQ1_BLOCK_INTRA        3
  66
  67 typedef struct SVQ1Context {
  68     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
  69     AVCodecContext *avctx;
  70     DSPContext dsp;
  71     AVFrame picture;
  72     AVFrame current_picture;
  73     AVFrame last_picture;
  74     PutBitContext pb;
  75     GetBitContext gb;
  76
  77     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
  78
  79     int frame_width;
  80     int frame_height;
  81
  82     /* Y plane block dimensions */
  83     int y_block_width;
  84     int y_block_height;
  85
  86     /* U & V plane (C planes) block dimensions */
  87     int c_block_width;
  88     int c_block_height;
  89
  90     uint16_t *mb_type;
  91     uint32_t *dummy;
  92     int16_t (*motion_val8[3])[2];
  93     int16_t (*motion_val16[3])[2];
  94
  95     int64_t rd_total;
  96 } SVQ1Context;
  97
  98 /* motion vector (prediction) */
  99 typedef struct svq1_pmv_s {
 100   int           x;
 101   int           y;
 102 } svq1_pmv_t;
 103
 104 #include "svq1_cb.h"
 105 #include "svq1_vlc.h"
 106
 107 static const uint16_t checksum_table[256] = {
 108   0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 109   0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 110   0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 111   0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 112   0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 113   0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 114   0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 115   0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 116   0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 117   0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 118   0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 119   0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 120   0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 121   0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 122   0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 123   0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 124   0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 125   0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 126   0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 127   0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 128   0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 129   0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 130   0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 131   0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 132   0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 133   0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 134   0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 135   0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 136   0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 137   0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 138   0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 139   0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 140 };
 141
 142 static const uint8_t string_table[256] = {
 143   0x00, 0xD5, 0x7F, 0xAA, 0xFE, 0x2B, 0x81, 0x54,
 144   0x29, 0xFC, 0x56, 0x83, 0xD7, 0x02, 0xA8, 0x7D,
 145   0x52, 0x87, 0x2D, 0xF8, 0xAC, 0x79, 0xD3, 0x06,
 146   0x7B, 0xAE, 0x04, 0xD1, 0x85, 0x50, 0xFA, 0x2F,
 147   0xA4, 0x71, 0xDB, 0x0E, 0x5A, 0x8F, 0x25, 0xF0,
 148   0x8D, 0x58, 0xF2, 0x27, 0x73, 0xA6, 0x0C, 0xD9,
 149   0xF6, 0x23, 0x89, 0x5C, 0x08, 0xDD, 0x77, 0xA2,
 150   0xDF, 0x0A, 0xA0, 0x75, 0x21, 0xF4, 0x5E, 0x8B,
 151   0x9D, 0x48, 0xE2, 0x37, 0x63, 0xB6, 0x1C, 0xC9,
 152   0xB4, 0x61, 0xCB, 0x1E, 0x4A, 0x9F, 0x35, 0xE0,
 153   0xCF, 0x1A, 0xB0, 0x65, 0x31, 0xE4, 0x4E, 0x9B,
 154   0xE6, 0x33, 0x99, 0x4C, 0x18, 0xCD, 0x67, 0xB2,
 155   0x39, 0xEC, 0x46, 0x93, 0xC7, 0x12, 0xB8, 0x6D,
 156   0x10, 0xC5, 0x6F, 0xBA, 0xEE, 0x3B, 0x91, 0x44,
 157   0x6B, 0xBE, 0x14, 0xC1, 0x95, 0x40, 0xEA, 0x3F,
 158   0x42, 0x97, 0x3D, 0xE8, 0xBC, 0x69, 0xC3, 0x16,
 159   0xEF, 0x3A, 0x90, 0x45, 0x11, 0xC4, 0x6E, 0xBB,
 160   0xC6, 0x13, 0xB9, 0x6C, 0x38, 0xED, 0x47, 0x92,
 161   0xBD, 0x68, 0xC2, 0x17, 0x43, 0x96, 0x3C, 0xE9,
 162   0x94, 0x41, 0xEB, 0x3E, 0x6A, 0xBF, 0x15, 0xC0,
 163   0x4B, 0x9E, 0x34, 0xE1, 0xB5, 0x60, 0xCA, 0x1F,
 164   0x62, 0xB7, 0x1D, 0xC8, 0x9C, 0x49, 0xE3, 0x36,
 165   0x19, 0xCC, 0x66, 0xB3, 0xE7, 0x32, 0x98, 0x4D,
 166   0x30, 0xE5, 0x4F, 0x9A, 0xCE, 0x1B, 0xB1, 0x64,
 167   0x72, 0xA7, 0x0D, 0xD8, 0x8C, 0x59, 0xF3, 0x26,
 168   0x5B, 0x8E, 0x24, 0xF1, 0xA5, 0x70, 0xDA, 0x0F,
 169   0x20, 0xF5, 0x5F, 0x8A, 0xDE, 0x0B, 0xA1, 0x74,
 170   0x09, 0xDC, 0x76, 0xA3, 0xF7, 0x22, 0x88, 0x5D,
 171   0xD6, 0x03, 0xA9, 0x7C, 0x28, 0xFD, 0x57, 0x82,
 172   0xFF, 0x2A, 0x80, 0x55, 0x01, 0xD4, 0x7E, 0xAB,
 173   0x84, 0x51, 0xFB, 0x2E, 0x7A, 0xAF, 0x05, 0xD0,
 174   0xAD, 0x78, 0xD2, 0x07, 0x53, 0x86, 0x2C, 0xF9
 175 };
 176
 177 #define SVQ1_PROCESS_VECTOR()\
 178     for (; level > 0; i++) {\
 179       /* process next depth */\
 180       if (i == m) {\
 181         m = n;\
 182         if (--level == 0)\
 183           break;\
 184       }\
 185       /* divide block if next bit set */\
 186       if (get_bits (bitbuf, 1) == 0)\
 187         break;\
 188       /* add child nodes */\
 189       list[n++] = list[i];\
 190       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
 191     }
 192
 193 #define SVQ1_ADD_CODEBOOK()\
 194           /* add codebook entries to vector */\
 195           for (j=0; j < stages; j++) {\
 196             n3  = codebook[entries[j]] ^ 0x80808080;\
 197             n1 += ((n3 & 0xFF00FF00) >> 8);\
 198             n2 +=  (n3 & 0x00FF00FF);\
 199           }\
 200 \
 201           /* clip to [0..255] */\
 202           if (n1 & 0xFF00FF00) {\
 203             n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 204             n1 += 0x7F007F00;\
 205             n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 206             n1 &= (n3 & 0x00FF00FF);\
 207           }\
 208 \
 209           if (n2 & 0xFF00FF00) {\
 210             n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 211             n2 += 0x7F007F00;\
 212             n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
 213             n2 &= (n3 & 0x00FF00FF);\
 214           }
 215
 216 #define SVQ1_DO_CODEBOOK_INTRA()\
 217       for (y=0; y < height; y++) {\
 218         for (x=0; x < (width / 4); x++, codebook++) {\
 219         n1 = n4;\
 220         n2 = n4;\
 221         SVQ1_ADD_CODEBOOK()\
 222         /* store result */\
 223         dst[x] = (n1 << 8) | n2;\
 224         }\
 225         dst += (pitch / 4);\
 226       }
 227
 228 #define SVQ1_DO_CODEBOOK_NONINTRA()\
 229       for (y=0; y < height; y++) {\
 230         for (x=0; x < (width / 4); x++, codebook++) {\
 231         n3 = dst[x];\
 232         /* add mean value to vector */\
 233         n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
 234         n2 =  (n3 & 0x00FF00FF)          + n4;\
 235         SVQ1_ADD_CODEBOOK()\
 236         /* store result */\
 237         dst[x] = (n1 << 8) | n2;\
 238         }\
 239         dst += (pitch / 4);\
 240       }
 241
 242 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
 243       codebook = (const uint32_t *) cbook[level];\
 244       bit_cache = get_bits (bitbuf, 4*stages);\
 245       /* calculate codebook entries for this vector */\
 246       for (j=0; j < stages; j++) {\
 247         entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
 248       }\
 249       mean -= (stages * 128);\
 250       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
 251
 252 static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 253   uint32_t    bit_cache;
 254   uint8_t    *list[63];
 255   uint32_t   *dst;
 256   const uint32_t *codebook;
 257   int         entries[6];
 258   int         i, j, m, n;
 259   int         mean, stages;
 260   unsigned    x, y, width, height, level;
 261   uint32_t    n1, n2, n3, n4;
 262
 263   /* initialize list for breadth first processing of vectors */
 264   list[0] = pixels;
 265
 266   /* recursively process vector */
 267   for (i=0, m=1, n=1, level=5; i < n; i++) {
 268     SVQ1_PROCESS_VECTOR();
 269
 270     /* destination address and vector size */
 271     dst = (uint32_t *) list[i];
 272     width = 1 << ((4 + level) /2);
 273     height = 1 << ((3 + level) /2);
 274
 275     /* get number of stages (-1 skips vector, 0 for mean only) */
 276     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 277
 278     if (stages == -1) {
 279         for (y=0; y < height; y++) {
 280           memset (&dst[y*(pitch / 4)], 0, width);
 281         }
 282       continue;                 /* skip vector */
 283     }
 284
 285     if ((stages > 0) && (level >= 4)) {
 286 #ifdef DEBUG_SVQ1
 287     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 288 #endif
 289       return -1;        /* invalid vector */
 290     }
 291
 292     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 293
 294     if (stages == 0) {
 295       for (y=0; y < height; y++) {
 296         memset (&dst[y*(pitch / 4)], mean, width);
 297       }
 298     } else {
 299       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
 300       SVQ1_DO_CODEBOOK_INTRA()
 301     }
 302   }
 303
 304   return 0;
 305 }
 306
 307 static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels, int pitch ) {
 308   uint32_t    bit_cache;
 309   uint8_t    *list[63];
 310   uint32_t   *dst;
 311   const uint32_t *codebook;
 312   int         entries[6];
 313   int         i, j, m, n;
 314   int         mean, stages;
 315   int         x, y, width, height, level;
 316   uint32_t    n1, n2, n3, n4;
 317
 318   /* initialize list for breadth first processing of vectors */
 319   list[0] = pixels;
 320
 321   /* recursively process vector */
 322   for (i=0, m=1, n=1, level=5; i < n; i++) {
 323     SVQ1_PROCESS_VECTOR();
 324
 325     /* destination address and vector size */
 326     dst = (uint32_t *) list[i];
 327     width = 1 << ((4 + level) /2);
 328     height = 1 << ((3 + level) /2);
 329
 330     /* get number of stages (-1 skips vector, 0 for mean only) */
 331     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 332
 333     if (stages == -1) continue; /* skip vector */
 334
 335     if ((stages > 0) && (level >= 4)) {
 336 #ifdef DEBUG_SVQ1
 337     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 338 #endif
 339       return -1;        /* invalid vector */
 340     }
 341
 342     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
 343
 344     SVQ1_CALC_CODEBOOK_ENTRIES(svq1_inter_codebooks);
 345     SVQ1_DO_CODEBOOK_NONINTRA()
 346   }
 347   return 0;
 348 }
 349
 350 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
 351   int        diff;
 352   int        i;
 353
 354   for (i=0; i < 2; i++) {
 355
 356     /* get motion code */
 357     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
 358     if(diff<0)
 359         return -1;
 360     else if(diff){
 361         if(get_bits1(bitbuf)) diff= -diff;
 362     }
 363
 364     /* add median of motion vector predictors and clip result */
 365     if (i == 1)
 366       mv->y = ((diff + mid_pred(pmv[0]->y, pmv[1]->y, pmv[2]->y)) << 26) >> 26;
 367     else
 368       mv->x = ((diff + mid_pred(pmv[0]->x, pmv[1]->x, pmv[2]->x)) << 26) >> 26;
 369   }
 370
 371   return 0;
 372 }
 373
 374 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
 375   uint8_t *src;
 376   uint8_t *dst;
 377   int      i;
 378
 379   src = &previous[x + y*pitch];
 380   dst = current;
 381
 382   for (i=0; i < 16; i++) {
 383     memcpy (dst, src, 16);
 384     src += pitch;
 385     dst += pitch;
 386   }
 387 }
 388
 389 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 390                                uint8_t *current, uint8_t *previous, int pitch,
 391                                svq1_pmv_t *motion, int x, int y) {
 392   uint8_t    *src;
 393   uint8_t    *dst;
 394   svq1_pmv_t  mv;
 395   svq1_pmv_t *pmv[3];
 396   int         result;
 397
 398   /* predict and decode motion vector */
 399   pmv[0] = &motion[0];
 400   if (y == 0) {
 401     pmv[1] =
 402     pmv[2] = pmv[0];
 403   }
 404   else {
 405     pmv[1] = &motion[(x / 8) + 2];
 406     pmv[2] = &motion[(x / 8) + 4];
 407   }
 408
 409   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 410
 411   if (result != 0)
 412     return result;
 413
 414   motion[0].x                =
 415   motion[(x / 8) + 2].x      =
 416   motion[(x / 8) + 3].x      = mv.x;
 417   motion[0].y                =
 418   motion[(x / 8) + 2].y      =
 419   motion[(x / 8) + 3].y      = mv.y;
 420
 421   if(y + (mv.y >> 1)<0)
 422      mv.y= 0;
 423   if(x + (mv.x >> 1)<0)
 424      mv.x= 0;
 425
 426 #if 0
 427   int w= (s->width+15)&~15;
 428   int h= (s->height+15)&~15;
 429   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
 430       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 431 #endif
 432
 433   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
 434   dst = current;
 435
 436   s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 437
 438   return 0;
 439 }
 440
 441 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 442                                   uint8_t *current, uint8_t *previous, int pitch,
 443                                   svq1_pmv_t *motion,int x, int y) {
 444   uint8_t    *src;
 445   uint8_t    *dst;
 446   svq1_pmv_t  mv;
 447   svq1_pmv_t *pmv[4];
 448   int         i, result;
 449
 450   /* predict and decode motion vector (0) */
 451   pmv[0] = &motion[0];
 452   if (y == 0) {
 453     pmv[1] =
 454     pmv[2] = pmv[0];
 455   }
 456   else {
 457     pmv[1] = &motion[(x / 8) + 2];
 458     pmv[2] = &motion[(x / 8) + 4];
 459   }
 460
 461   result = svq1_decode_motion_vector (bitbuf, &mv, pmv);
 462
 463   if (result != 0)
 464     return result;
 465
 466   /* predict and decode motion vector (1) */
 467   pmv[0] = &mv;
 468   if (y == 0) {
 469     pmv[1] =
 470     pmv[2] = pmv[0];
 471   }
 472   else {
 473     pmv[1] = &motion[(x / 8) + 3];
 474   }
 475   result = svq1_decode_motion_vector (bitbuf, &motion[0], pmv);
 476
 477   if (result != 0)
 478     return result;
 479
 480   /* predict and decode motion vector (2) */
 481   pmv[1] = &motion[0];
 482   pmv[2] = &motion[(x / 8) + 1];
 483
 484   result = svq1_decode_motion_vector (bitbuf, &motion[(x / 8) + 2], pmv);
 485
 486   if (result != 0)
 487     return result;
 488
 489   /* predict and decode motion vector (3) */
 490   pmv[2] = &motion[(x / 8) + 2];
 491   pmv[3] = &motion[(x / 8) + 3];
 492
 493   result = svq1_decode_motion_vector (bitbuf, pmv[3], pmv);
 494
 495   if (result != 0)
 496     return result;
 497
 498   /* form predictions */
 499   for (i=0; i < 4; i++) {
 500     int mvx= pmv[i]->x + (i&1)*16;
 501     int mvy= pmv[i]->y + (i>>1)*16;
 502
 503     ///XXX /FIXME cliping or padding?
 504     if(y + (mvy >> 1)<0)
 505        mvy= 0;
 506     if(x + (mvx >> 1)<0)
 507        mvx= 0;
 508
 509 #if 0
 510   int w= (s->width+15)&~15;
 511   int h= (s->height+15)&~15;
 512   if(x + (mvx >> 1)<0 || y + (mvy >> 1)<0 || x + (mvx >> 1) + 8 > w || y + (mvy >> 1) + 8> h)
 513       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mvx >> 1), y + (mvy >> 1));
 514 #endif
 515     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
 516     dst = current;
 517
 518     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 519
 520     /* select next block */
 521     if (i & 1) {
 522       current  += 8*(pitch - 1);
 523     } else {
 524       current  += 8;
 525     }
 526   }
 527
 528   return 0;
 529 }
 530
 531 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
 532                         uint8_t *current, uint8_t *previous, int pitch,
 533                         svq1_pmv_t *motion, int x, int y) {
 534   uint32_t block_type;
 535   int      result = 0;
 536
 537   /* get block type */
 538   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 539
 540   /* reset motion vectors */
 541   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
 542     motion[0].x                 =
 543     motion[0].y                 =
 544     motion[(x / 8) + 2].x =
 545     motion[(x / 8) + 2].y =
 546     motion[(x / 8) + 3].x =
 547     motion[(x / 8) + 3].y = 0;
 548   }
 549
 550   switch (block_type) {
 551   case SVQ1_BLOCK_SKIP:
 552     svq1_skip_block (current, previous, pitch, x, y);
 553     break;
 554
 555   case SVQ1_BLOCK_INTER:
 556     result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 557
 558     if (result != 0)
 559     {
 560 #ifdef DEBUG_SVQ1
 561     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_block %i\n",result);
 562 #endif
 563       break;
 564     }
 565     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 566     break;
 567
 568   case SVQ1_BLOCK_INTER_4V:
 569     result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 570
 571     if (result != 0)
 572     {
 573 #ifdef DEBUG_SVQ1
 574     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_motion_inter_4v_block %i\n",result);
 575 #endif
 576       break;
 577     }
 578     result = svq1_decode_block_non_intra (bitbuf, current, pitch);
 579     break;
 580
 581   case SVQ1_BLOCK_INTRA:
 582     result = svq1_decode_block_intra (bitbuf, current, pitch);
 583     break;
 584   }
 585
 586   return result;
 587 }
 588
 589 /* standard video sizes */
 590 static struct { int width; int height; } svq1_frame_size_table[8] = {
 591   { 160, 120 }, { 128,  96 }, { 176, 144 }, { 352, 288 },
 592   { 704, 576 }, { 240, 180 }, { 320, 240 }, {  -1,  -1 }
 593 };
 594
 595 static uint16_t svq1_packet_checksum (uint8_t *data, int length, int value) {
 596   int i;
 597
 598   for (i=0; i < length; i++) {
 599     value = checksum_table[data[i] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 600   }
 601
 602   return value;
 603 }
 604
 605 #if 0 /* unused, remove? */
 606 static uint16_t svq1_component_checksum (uint16_t *pixels, int pitch,
 607                                          int width, int height, int value) {
 608   int x, y;
 609
 610   for (y=0; y < height; y++) {
 611     for (x=0; x < width; x++) {
 612       value = checksum_table[pixels[x] ^ (value >> 8)] ^ ((value & 0xFF) << 8);
 613     }
 614
 615     pixels += pitch;
 616   }
 617
 618   return value;
 619 }
 620 #endif
 621
 622 #ifdef CONFIG_DECODERS
 623 static void svq1_parse_string (GetBitContext *bitbuf, uint8_t *out) {
 624   uint8_t seed;
 625   int     i;
 626
 627   out[0] = get_bits (bitbuf, 8);
 628
 629   seed = string_table[out[0]];
 630
 631   for (i=1; i <= out[0]; i++) {
 632     out[i] = get_bits (bitbuf, 8) ^ seed;
 633     seed   = string_table[out[i] ^ seed];
 634   }
 635 }
 636
 637 static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 638   int frame_size_code;
 639   int temporal_reference;
 640
 641   temporal_reference = get_bits (bitbuf, 8);
 642
 643   /* frame type */
 644   s->pict_type= get_bits (bitbuf, 2)+1;
 645   if(s->pict_type==4)
 646       return -1;
 647
 648   if (s->pict_type == I_TYPE) {
 649
 650     /* unknown fields */
 651     if (s->f_code == 0x50 || s->f_code == 0x60) {
 652       int csum = get_bits (bitbuf, 16);
 653
 654       csum = svq1_packet_checksum ((uint8_t *)bitbuf->buffer, bitbuf->size_in_bits>>3, csum);
 655
 656 //      av_log(s->avctx, AV_LOG_INFO, "%s checksum (%02x) for packet data\n",
 657 //              (csum == 0) ? "correct" : "incorrect", csum);
 658     }
 659
 660     if ((s->f_code ^ 0x10) >= 0x50) {
 661       uint8_t msg[256];
 662
 663       svq1_parse_string (bitbuf, msg);
 664
 665       av_log(s->avctx, AV_LOG_INFO, "embedded message: \"%s\"\n", (char *) msg);
 666     }
 667
 668     skip_bits (bitbuf, 2);
 669     skip_bits (bitbuf, 2);
 670     skip_bits1 (bitbuf);
 671
 672     /* load frame size */
 673     frame_size_code = get_bits (bitbuf, 3);
 674
 675     if (frame_size_code == 7) {
 676       /* load width, height (12 bits each) */
 677       s->width = get_bits (bitbuf, 12);
 678       s->height = get_bits (bitbuf, 12);
 679
 680       if (!s->width || !s->height)
 681         return -1;
 682     } else {
 683       /* get width, height from table */
 684       s->width = svq1_frame_size_table[frame_size_code].width;
 685       s->height = svq1_frame_size_table[frame_size_code].height;
 686     }
 687   }
 688
 689   /* unknown fields */
 690   if (get_bits (bitbuf, 1) == 1) {
 691     skip_bits1 (bitbuf);       /* use packet checksum if (1) */
 692     skip_bits1 (bitbuf);       /* component checksums after image data if (1) */
 693
 694     if (get_bits (bitbuf, 2) != 0)
 695       return -1;
 696   }
 697
 698   if (get_bits (bitbuf, 1) == 1) {
 699     skip_bits1 (bitbuf);
 700     skip_bits (bitbuf, 4);
 701     skip_bits1 (bitbuf);
 702     skip_bits (bitbuf, 2);
 703
 704     while (get_bits (bitbuf, 1) == 1) {
 705       skip_bits (bitbuf, 8);
 706     }
 707   }
 708
 709   return 0;
 710 }
 711
 712 static int svq1_decode_frame(AVCodecContext *avctx,
 713                              void *data, int *data_size,
 714                              uint8_t *buf, int buf_size)
 715 {
 716   MpegEncContext *s=avctx->priv_data;
 717   uint8_t        *current, *previous;
 718   int             result, i, x, y, width, height;
 719   AVFrame *pict = data;
 720
 721   /* initialize bit buffer */
 722   init_get_bits(&s->gb,buf,buf_size*8);
 723
 724   /* decode frame header */
 725   s->f_code = get_bits (&s->gb, 22);
 726
 727   if ((s->f_code & ~0x70) || !(s->f_code & 0x60))
 728     return -1;
 729
 730   /* swap some header bytes (why?) */
 731   if (s->f_code != 0x20) {
 732     uint32_t *src = (uint32_t *) (buf + 4);
 733
 734     for (i=0; i < 4; i++) {
 735       src[i] = ((src[i] << 16) | (src[i] >> 16)) ^ src[7 - i];
 736     }
 737   }
 738
 739   result = svq1_decode_frame_header (&s->gb, s);
 740
 741   if (result != 0)
 742   {
 743 #ifdef DEBUG_SVQ1
 744     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_frame_header %i\n",result);
 745 #endif
 746     return result;
 747   }
 748
 749   //FIXME this avoids some confusion for "B frames" without 2 references
 750   //this should be removed after libavcodec can handle more flexible picture types & ordering
 751   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
 752
 753   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
 754   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
 755      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
 756      || avctx->skip_frame >= AVDISCARD_ALL)
 757       return buf_size;
 758
 759   if(MPV_frame_start(s, avctx) < 0)
 760       return -1;
 761
 762   /* decode y, u and v components */
 763   for (i=0; i < 3; i++) {
 764     int linesize;
 765     if (i == 0) {
 766       width  = (s->width+15)&~15;
 767       height = (s->height+15)&~15;
 768       linesize= s->linesize;
 769     } else {
 770       if(s->flags&CODEC_FLAG_GRAY) break;
 771       width  = (s->width/4+15)&~15;
 772       height = (s->height/4+15)&~15;
 773       linesize= s->uvlinesize;
 774     }
 775
 776     current  = s->current_picture.data[i];
 777
 778     if(s->pict_type==B_TYPE){
 779         previous = s->next_picture.data[i];
 780     }else{
 781         previous = s->last_picture.data[i];
 782     }
 783
 784     if (s->pict_type == I_TYPE) {
 785       /* keyframe */
 786       for (y=0; y < height; y+=16) {
 787         for (x=0; x < width; x+=16) {
 788           result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
 789           if (result != 0)
 790           {
 791 //#ifdef DEBUG_SVQ1
 792             av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 793 //#endif
 794             return result;
 795           }
 796         }
 797         current += 16*linesize;
 798       }
 799     } else {
 800       svq1_pmv_t pmv[width/8+3];
 801       /* delta frame */
 802       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 803
 804       for (y=0; y < height; y+=16) {
 805         for (x=0; x < width; x+=16) {
 806           result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
 807                                             linesize, pmv, x, y);
 808           if (result != 0)
 809           {
 810 #ifdef DEBUG_SVQ1
 811     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 812 #endif
 813             return result;
 814           }
 815         }
 816
 817         pmv[0].x =
 818         pmv[0].y = 0;
 819
 820         current += 16*linesize;
 821       }
 822     }
 823   }
 824
 825   *pict = *(AVFrame*)&s->current_picture;
 826
 827
 828   MPV_frame_end(s);
 829
 830   *data_size=sizeof(AVFrame);
 831   return buf_size;
 832 }
 833
 834 static int svq1_decode_init(AVCodecContext *avctx)
 835 {
 836     MpegEncContext *s = avctx->priv_data;
 837     int i;
 838
 839     MPV_decode_defaults(s);
 840
 841     s->avctx = avctx;
 842     s->width = (avctx->width+3)&~3;
 843     s->height = (avctx->height+3)&~3;
 844     s->codec_id= avctx->codec->id;
 845     avctx->pix_fmt = PIX_FMT_YUV410P;
 846     avctx->has_b_frames= 1; // not true, but DP frames and these behave like unidirectional b frames
 847     s->flags= avctx->flags;
 848     if (MPV_common_init(s) < 0) return -1;
 849
 850     init_vlc(&svq1_block_type, 2, 4,
 851         &svq1_block_type_vlc[0][1], 2, 1,
 852         &svq1_block_type_vlc[0][0], 2, 1, 1);
 853
 854     init_vlc(&svq1_motion_component, 7, 33,
 855         &mvtab[0][1], 2, 1,
 856         &mvtab[0][0], 2, 1, 1);
 857
 858     for (i = 0; i < 6; i++) {
 859         init_vlc(&svq1_intra_multistage[i], 3, 8,
 860             &svq1_intra_multistage_vlc[i][0][1], 2, 1,
 861             &svq1_intra_multistage_vlc[i][0][0], 2, 1, 1);
 862         init_vlc(&svq1_inter_multistage[i], 3, 8,
 863             &svq1_inter_multistage_vlc[i][0][1], 2, 1,
 864             &svq1_inter_multistage_vlc[i][0][0], 2, 1, 1);
 865     }
 866
 867     init_vlc(&svq1_intra_mean, 8, 256,
 868         &svq1_intra_mean_vlc[0][1], 4, 2,
 869         &svq1_intra_mean_vlc[0][0], 4, 2, 1);
 870
 871     init_vlc(&svq1_inter_mean, 9, 512,
 872         &svq1_inter_mean_vlc[0][1], 4, 2,
 873         &svq1_inter_mean_vlc[0][0], 4, 2, 1);
 874
 875     return 0;
 876 }
 877
 878 static int svq1_decode_end(AVCodecContext *avctx)
 879 {
 880     MpegEncContext *s = avctx->priv_data;
 881
 882     MPV_common_end(s);
 883     return 0;
 884 }
 885 #endif /* CONFIG_DECODERS */
 886
 887 #ifdef CONFIG_ENCODERS
 888 static void svq1_write_header(SVQ1Context *s, int frame_type)
 889 {
 890     int i;
 891
 892     /* frame code */
 893     put_bits(&s->pb, 22, 0x20);
 894
 895     /* temporal reference (sure hope this is a "don't care") */
 896     put_bits(&s->pb, 8, 0x00);
 897
 898     /* frame type */
 899     put_bits(&s->pb, 2, frame_type - 1);
 900
 901     if (frame_type == I_TYPE) {
 902
 903         /* no checksum since frame code is 0x20 */
 904
 905         /* no embedded string either */
 906
 907         /* output 5 unknown bits (2 + 2 + 1) */
 908         put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */
 909
 910         for (i = 0; i < 7; i++)
 911         {
 912             if ((svq1_frame_size_table[i].width == s->frame_width) &&
 913                 (svq1_frame_size_table[i].height == s->frame_height))
 914             {
 915                 put_bits(&s->pb, 3, i);
 916                 break;
 917             }
 918         }
 919
 920         if (i == 7)
 921         {
 922             put_bits(&s->pb, 3, 7);
 923                 put_bits(&s->pb, 12, s->frame_width);
 924                 put_bits(&s->pb, 12, s->frame_height);
 925         }
 926     }
 927
 928     /* no checksum or extra data (next 2 bits get 0) */
 929     put_bits(&s->pb, 2, 0);
 930 }
 931
 932
 933 #define QUALITY_THRESHOLD 100
 934 #define THRESHOLD_MULTIPLIER 0.6
 935
 936 #if defined(HAVE_ALTIVEC)
 937 #undef vector
 938 #endif
 939
 940 static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){
 941     int count, y, x, i, j, split, best_mean, best_score, best_count;
 942     int best_vector[6];
 943     int block_sum[7]= {0, 0, 0, 0, 0, 0};
 944     int w= 2<<((level+2)>>1);
 945     int h= 2<<((level+1)>>1);
 946     int size=w*h;
 947     int16_t block[7][256];
 948     const int8_t *codebook_sum, *codebook;
 949     const uint16_t (*mean_vlc)[2];
 950     const uint8_t (*multistage_vlc)[2];
 951
 952     best_score=0;
 953     //FIXME optimize, this doenst need to be done multiple times
 954     if(intra){
 955         codebook_sum= svq1_intra_codebook_sum[level];
 956         codebook= svq1_intra_codebooks[level];
 957         mean_vlc= svq1_intra_mean_vlc;
 958         multistage_vlc= svq1_intra_multistage_vlc[level];
 959         for(y=0; y<h; y++){
 960             for(x=0; x<w; x++){
 961                 int v= src[x + y*stride];
 962                 block[0][x + w*y]= v;
 963                 best_score += v*v;
 964                 block_sum[0] += v;
 965             }
 966         }
 967     }else{
 968         codebook_sum= svq1_inter_codebook_sum[level];
 969         codebook= svq1_inter_codebooks[level];
 970         mean_vlc= svq1_inter_mean_vlc + 256;
 971         multistage_vlc= svq1_inter_multistage_vlc[level];
 972         for(y=0; y<h; y++){
 973             for(x=0; x<w; x++){
 974                 int v= src[x + y*stride] - ref[x + y*stride];
 975                 block[0][x + w*y]= v;
 976                 best_score += v*v;
 977                 block_sum[0] += v;
 978             }
 979         }
 980     }
 981
 982     best_count=0;
 983     best_score -= ((block_sum[0]*block_sum[0])>>(level+3));
 984     best_mean= (block_sum[0] + (size>>1)) >> (level+3);
 985
 986     if(level<4){
 987         for(count=1; count<7; count++){
 988             int best_vector_score= INT_MAX;
 989             int best_vector_sum=-999, best_vector_mean=-999;
 990             const int stage= count-1;
 991             const int8_t *vector;
 992
 993             for(i=0; i<16; i++){
 994                 int sum= codebook_sum[stage*16 + i];
 995                 int sqr, diff, score;
 996
 997                 vector = codebook + stage*size*16 + i*size;
 998                 sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size);
 999                 diff= block_sum[stage] - sum;
1000                 score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow
1001                 if(score < best_vector_score){
1002                     int mean= (diff + (size>>1)) >> (level+3);
1003                     assert(mean >-300 && mean<300);
1004                     mean= av_clip(mean, intra?0:-256, 255);
1005                     best_vector_score= score;
1006                     best_vector[stage]= i;
1007                     best_vector_sum= sum;
1008                     best_vector_mean= mean;
1009                 }
1010             }
1011             assert(best_vector_mean != -999);
1012             vector= codebook + stage*size*16 + best_vector[stage]*size;
1013             for(j=0; j<size; j++){
1014                 block[stage+1][j] = block[stage][j] - vector[j];
1015             }
1016             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
1017             best_vector_score +=
1018                 lambda*(+ 1 + 4*count
1019                         + multistage_vlc[1+count][1]
1020                         + mean_vlc[best_vector_mean][1]);
1021
1022             if(best_vector_score < best_score){
1023                 best_score= best_vector_score;
1024                 best_count= count;
1025                 best_mean= best_vector_mean;
1026             }
1027         }
1028     }
1029
1030     split=0;
1031     if(best_score > threshold && level){
1032         int score=0;
1033         int offset= (level&1) ? stride*h/2 : w/2;
1034         PutBitContext backup[6];
1035
1036         for(i=level-1; i>=0; i--){
1037             backup[i]= s->reorder_pb[i];
1038         }
1039         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
1040         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
1041         score += lambda;
1042
1043         if(score < best_score){
1044             best_score= score;
1045             split=1;
1046         }else{
1047             for(i=level-1; i>=0; i--){
1048                 s->reorder_pb[i]= backup[i];
1049             }
1050         }
1051     }
1052     if (level > 0)
1053         put_bits(&s->reorder_pb[level], 1, split);
1054
1055     if(!split){
1056         assert((best_mean >= 0 && best_mean<256) || !intra);
1057         assert(best_mean >= -256 && best_mean<256);
1058         assert(best_count >=0 && best_count<7);
1059         assert(level<4 || best_count==0);
1060
1061         /* output the encoding */
1062         put_bits(&s->reorder_pb[level],
1063             multistage_vlc[1 + best_count][1],
1064             multistage_vlc[1 + best_count][0]);
1065         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
1066             mean_vlc[best_mean][0]);
1067
1068         for (i = 0; i < best_count; i++){
1069             assert(best_vector[i]>=0 && best_vector[i]<16);
1070             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
1071         }
1072
1073         for(y=0; y<h; y++){
1074             for(x=0; x<w; x++){
1075                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
1076             }
1077         }
1078     }
1079
1080     return best_score;
1081 }
1082
1083
1084 static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane,
1085     int width, int height, int src_stride, int stride)
1086 {
1087     int x, y;
1088     int i;
1089     int block_width, block_height;
1090     int level;
1091     int threshold[6];
1092     const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT);
1093
1094     /* figure out the acceptable level thresholds in advance */
1095     threshold[5] = QUALITY_THRESHOLD;
1096     for (level = 4; level >= 0; level--)
1097         threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER;
1098
1099     block_width = (width + 15) / 16;
1100     block_height = (height + 15) / 16;
1101
1102     if(s->picture.pict_type == P_TYPE){
1103         s->m.avctx= s->avctx;
1104         s->m.current_picture_ptr= &s->m.current_picture;
1105         s->m.last_picture_ptr   = &s->m.last_picture;
1106         s->m.last_picture.data[0]= ref_plane;
1107         s->m.linesize=
1108         s->m.last_picture.linesize[0]=
1109         s->m.new_picture.linesize[0]=
1110         s->m.current_picture.linesize[0]= stride;
1111         s->m.width= width;
1112         s->m.height= height;
1113         s->m.mb_width= block_width;
1114         s->m.mb_height= block_height;
1115         s->m.mb_stride= s->m.mb_width+1;
1116         s->m.b8_stride= 2*s->m.mb_width+1;
1117         s->m.f_code=1;
1118         s->m.pict_type= s->picture.pict_type;
1119         s->m.me_method= s->avctx->me_method;
1120         s->m.me.scene_change_score=0;
1121         s->m.flags= s->avctx->flags;
1122 //        s->m.out_format = FMT_H263;
1123 //        s->m.unrestricted_mv= 1;
1124
1125         s->m.lambda= s->picture.quality;
1126         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1127         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1128
1129         if(!s->motion_val8[plane]){
1130             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
1131             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
1132         }
1133
1134         s->m.mb_type= s->mb_type;
1135
1136         //dummies, to avoid segfaults
1137         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
1138         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
1139         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
1140         s->m.current_picture.mb_type= s->dummy;
1141
1142         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
1143         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
1144         s->m.dsp= s->dsp; //move
1145         ff_init_me(&s->m);
1146
1147         s->m.me.dia_size= s->avctx->dia_size;
1148         s->m.first_slice_line=1;
1149         for (y = 0; y < block_height; y++) {
1150             uint8_t src[stride*16];
1151
1152             s->m.new_picture.data[0]= src - y*16*stride; //ugly
1153             s->m.mb_y= y;
1154
1155             for(i=0; i<16 && i + 16*y<height; i++){
1156                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1157                 for(x=width; x<16*block_width; x++)
1158                     src[i*stride+x]= src[i*stride+x-1];
1159             }
1160             for(; i<16 && i + 16*y<16*block_height; i++)
1161                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1162
1163             for (x = 0; x < block_width; x++) {
1164                 s->m.mb_x= x;
1165                 ff_init_block_index(&s->m);
1166                 ff_update_block_index(&s->m);
1167
1168                 ff_estimate_p_frame_motion(&s->m, x, y);
1169             }
1170             s->m.first_slice_line=0;
1171         }
1172
1173         ff_fix_long_p_mvs(&s->m);
1174         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
1175     }
1176
1177     s->m.first_slice_line=1;
1178     for (y = 0; y < block_height; y++) {
1179         uint8_t src[stride*16];
1180
1181         for(i=0; i<16 && i + 16*y<height; i++){
1182             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
1183             for(x=width; x<16*block_width; x++)
1184                 src[i*stride+x]= src[i*stride+x-1];
1185         }
1186         for(; i<16 && i + 16*y<16*block_height; i++)
1187             memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
1188
1189         s->m.mb_y= y;
1190         for (x = 0; x < block_width; x++) {
1191             uint8_t reorder_buffer[3][6][7*32];
1192             int count[3][6];
1193             int offset = y * 16 * stride + x * 16;
1194             uint8_t *decoded= decoded_plane + offset;
1195             uint8_t *ref= ref_plane + offset;
1196             int score[4]={0,0,0,0}, best;
1197             uint8_t temp[16*stride];
1198
1199             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
1200                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1201                 return -1;
1202             }
1203
1204             s->m.mb_x= x;
1205             ff_init_block_index(&s->m);
1206             ff_update_block_index(&s->m);
1207
1208             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
1209                 for(i=0; i<6; i++){
1210                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
1211                 }
1212                 if(s->picture.pict_type == P_TYPE){
1213                     const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTRA];
1214                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1215                     score[0]= vlc[1]*lambda;
1216                 }
1217                 score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1);
1218                 for(i=0; i<6; i++){
1219                     count[0][i]= put_bits_count(&s->reorder_pb[i]);
1220                     flush_put_bits(&s->reorder_pb[i]);
1221                 }
1222             }else
1223                 score[0]= INT_MAX;
1224
1225             best=0;
1226
1227             if(s->picture.pict_type == P_TYPE){
1228                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
1229                 int mx, my, pred_x, pred_y, dxy;
1230                 int16_t *motion_ptr;
1231
1232                 motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y);
1233                 if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){
1234                     for(i=0; i<6; i++)
1235                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
1236
1237                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
1238
1239                     s->m.pb= s->reorder_pb[5];
1240                     mx= motion_ptr[0];
1241                     my= motion_ptr[1];
1242                     assert(mx>=-32 && mx<=31);
1243                     assert(my>=-32 && my<=31);
1244                     assert(pred_x>=-32 && pred_x<=31);
1245                     assert(pred_y>=-32 && pred_y<=31);
1246                     ff_h263_encode_motion(&s->m, mx - pred_x, 1);
1247                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
1248                     s->reorder_pb[5]= s->m.pb;
1249                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
1250
1251                     dxy= (mx&1) + 2*(my&1);
1252
1253                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
1254
1255                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
1256                     best= score[1] <= score[0];
1257
1258                     vlc= svq1_block_type_vlc[SVQ1_BLOCK_SKIP];
1259                     score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16);
1260                     score[2]+= vlc[1]*lambda;
1261                     if(score[2] < score[best] && mx==0 && my==0){
1262                         best=2;
1263                         s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16);
1264                         for(i=0; i<6; i++){
1265                             count[2][i]=0;
1266                         }
1267                         put_bits(&s->pb, vlc[1], vlc[0]);
1268                     }
1269                 }
1270
1271                 if(best==1){
1272                     for(i=0; i<6; i++){
1273                         count[1][i]= put_bits_count(&s->reorder_pb[i]);
1274                         flush_put_bits(&s->reorder_pb[i]);
1275                     }
1276                 }else{
1277                     motion_ptr[0                 ] = motion_ptr[1                 ]=
1278                     motion_ptr[2                 ] = motion_ptr[3                 ]=
1279                     motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]=
1280                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
1281                 }
1282             }
1283
1284             s->rd_total += score[best];
1285
1286             for(i=5; i>=0; i--){
1287                 ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]);
1288             }
1289             if(best==0){
1290                 s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16);
1291             }
1292         }
1293         s->m.first_slice_line=0;
1294     }
1295     return 0;
1296 }
1297
1298 static int svq1_encode_init(AVCodecContext *avctx)
1299 {
1300     SVQ1Context * const s = avctx->priv_data;
1301
1302     dsputil_init(&s->dsp, avctx);
1303     avctx->coded_frame= (AVFrame*)&s->picture;
1304
1305     s->frame_width = avctx->width;
1306     s->frame_height = avctx->height;
1307
1308     s->y_block_width = (s->frame_width + 15) / 16;
1309     s->y_block_height = (s->frame_height + 15) / 16;
1310
1311     s->c_block_width = (s->frame_width / 4 + 15) / 16;
1312     s->c_block_height = (s->frame_height / 4 + 15) / 16;
1313
1314     s->avctx= avctx;
1315     s->m.avctx= avctx;
1316     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
1317     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1318     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
1319     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
1320     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
1321     h263_encode_init(&s->m); //mv_penalty
1322
1323     return 0;
1324 }
1325
1326 static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
1327     int buf_size, void *data)
1328 {
1329     SVQ1Context * const s = avctx->priv_data;
1330     AVFrame *pict = data;
1331     AVFrame * const p= (AVFrame*)&s->picture;
1332     AVFrame temp;
1333     int i;
1334
1335     if(avctx->pix_fmt != PIX_FMT_YUV410P){
1336         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
1337         return -1;
1338     }
1339
1340     if(!s->current_picture.data[0]){
1341         avctx->get_buffer(avctx, &s->current_picture);
1342         avctx->get_buffer(avctx, &s->last_picture);
1343     }
1344
1345     temp= s->current_picture;
1346     s->current_picture= s->last_picture;
1347     s->last_picture= temp;
1348
1349     init_put_bits(&s->pb, buf, buf_size);
1350
1351     *p = *pict;
1352     p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? P_TYPE : I_TYPE;
1353     p->key_frame = p->pict_type == I_TYPE;
1354
1355     svq1_write_header(s, p->pict_type);
1356     for(i=0; i<3; i++){
1357         if(svq1_encode_plane(s, i,
1358             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
1359             s->frame_width / (i?4:1), s->frame_height / (i?4:1),
1360             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
1361                 return -1;
1362     }
1363
1364 //    align_put_bits(&s->pb);
1365     while(put_bits_count(&s->pb) & 31)
1366         put_bits(&s->pb, 1, 0);
1367
1368     flush_put_bits(&s->pb);
1369
1370     return (put_bits_count(&s->pb) / 8);
1371 }
1372
1373 static int svq1_encode_end(AVCodecContext *avctx)
1374 {
1375     SVQ1Context * const s = avctx->priv_data;
1376     int i;
1377
1378     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
1379
1380     av_freep(&s->m.me.scratchpad);
1381     av_freep(&s->m.me.map);
1382     av_freep(&s->m.me.score_map);
1383     av_freep(&s->mb_type);
1384     av_freep(&s->dummy);
1385
1386     for(i=0; i<3; i++){
1387         av_freep(&s->motion_val8[i]);
1388         av_freep(&s->motion_val16[i]);
1389     }
1390
1391     return 0;
1392 }
1393
1394 #endif //CONFIG_ENCODERS
1395
1396 #ifdef CONFIG_DECODERS
1397 AVCodec svq1_decoder = {
1398     "svq1",
1399     CODEC_TYPE_VIDEO,
1400     CODEC_ID_SVQ1,
1401     sizeof(MpegEncContext),
1402     svq1_decode_init,
1403     NULL,
1404     svq1_decode_end,
1405     svq1_decode_frame,
1406     CODEC_CAP_DR1,
1407     .flush= ff_mpeg_flush,
1408     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1409 };
1410 #endif
1411
1412 #ifdef CONFIG_ENCODERS
1413
1414 AVCodec svq1_encoder = {
1415     "svq1",
1416     CODEC_TYPE_VIDEO,
1417     CODEC_ID_SVQ1,
1418     sizeof(SVQ1Context),
1419     svq1_encode_init,
1420     svq1_encode_frame,
1421     svq1_encode_end,
1422     .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, -1},
1423 };
1424
1425 #endif //CONFIG_ENCODERS