git.sesse.net Git - ffmpeg/blob - libavcodec/cinepakenc.c

   1 /*
   2  * Cinepak encoder (c) 2011 Tomas Härdin
   3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
   4  *
   5  * Fixes and improvements, vintage decoders compatibility
   6  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a
   9 copy of this software and associated documentation files (the "Software"),
  10 to deal in the Software without restriction, including without limitation
  11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 and/or sell copies of the Software, and to permit persons to whom the
  13 Software is furnished to do so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice shall be included
  16 in all copies or substantial portions of the Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 OTHER DEALINGS IN THE SOFTWARE.
  25
  26  * TODO:
  27  * - optimize: color space conversion, ...
  28  * - implement options to set the min/max number of strips?
  29  * MAYBE:
  30  * - "optimally" split the frame into several non-regular areas
  31  *   using a separate codebook pair for each area and approximating
  32  *   the area by several rectangular strips (generally not full width ones)
  33  *   (use quadtree splitting? a simple fixed-granularity grid?)
  34  *
  35  *
  36  * version 2014-01-23 Rl
  37  * - added option handling for flexibility
  38  *
  39  * version 2014-01-21 Rl
  40  * - believe it or not, now we get even smaller files, with better quality
  41  *   (which means I missed an optimization earlier :)
  42  *
  43  * version 2014-01-20 Rl
  44  * - made the encoder compatible with vintage decoders
  45  *   and added some yet unused code for possible future
  46  *   incremental codebook updates
  47  * - fixed a small memory leak
  48  *
  49  * version 2013-04-28 Rl
  50  * - bugfixed codebook optimization logic
  51  *
  52  * version 2013-02-14 Rl
  53  * "Valentine's Day" version:
  54  * - made strip division more robust
  55  * - minimized bruteforcing the number of strips,
  56  *   (costs some R/D but speeds up compession a lot), the heuristic
  57  *   assumption is that score as a function of the number of strips has
  58  *   one wide minimum which moves slowly, of course not fully true
  59  * - simplified codebook generation,
  60  *   the old code was meant for other optimizations than we actually do
  61  * - optimized the codebook generation / error estimation for MODE_MC
  62  *
  63  * version 2013-02-12 Rl
  64  * - separated codebook training sets, avoided the transfer of wasted bytes,
  65  *   which yields both better quality and smaller files
  66  * - now using the correct colorspace (TODO: move conversion to libswscale)
  67  *
  68  * version 2013-02-08 Rl
  69  * - fixes/optimization in multistrip encoding and codebook size choice,
  70  *   quality/bitrate is now better than that of the binary proprietary encoder
  71  */
  72
  73 #include "libavutil/intreadwrite.h"
  74 #include "avcodec.h"
  75 #include "libavutil/lfg.h"
  76 #include "elbg.h"
  77 #include "internal.h"
  78
  79 #include "libavutil/avassert.h"
  80 #include "libavutil/opt.h"
  81
  82 #define CVID_HEADER_SIZE 10
  83 #define STRIP_HEADER_SIZE 12
  84 #define CHUNK_HEADER_SIZE 4
  85
  86 #define MB_SIZE 4           //4x4 MBs
  87 #define MB_AREA (MB_SIZE*MB_SIZE)
  88
  89 #define VECTOR_MAX 6        //six or four entries per vector depending on format
  90 #define CODEBOOK_MAX 256    //size of a codebook
  91
  92 #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
  93 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
  94 // MAX_STRIPS limits the maximum quality you can reach
  95 //            when you want hight quality on high resolutions,
  96 // MIN_STRIPS limits the minimum efficiently encodable bit rate
  97 //            on low resolutions
  98 // the numbers are only used for brute force optimization for the first frame,
  99 // for the following frames they are adaptively readjusted
 100 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
 101 // of strips, currently 32
 102
 103 typedef enum {
 104     MODE_V1_ONLY = 0,
 105     MODE_V1_V4,
 106     MODE_MC,
 107
 108     MODE_COUNT,
 109 } CinepakMode;
 110
 111 typedef enum {
 112     ENC_V1,
 113     ENC_V4,
 114     ENC_SKIP,
 115
 116     ENC_UNCERTAIN
 117 } mb_encoding;
 118
 119 typedef struct {
 120     int v1_vector;                  //index into v1 codebook
 121     int v1_error;                   //error when using V1 encoding
 122     int v4_vector[4];               //indices into v4 codebooks
 123     int v4_error;                   //error when using V4 encoding
 124     int skip_error;                 //error when block is skipped (aka copied from last frame)
 125     mb_encoding best_encoding;      //last result from calculate_mode_score()
 126 } mb_info;
 127
 128 typedef struct {
 129     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
 130     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
 131     int v1_size;
 132     int v4_size;
 133     CinepakMode mode;
 134 } strip_info;
 135
 136 typedef struct {
 137     const AVClass *class;
 138     AVCodecContext *avctx;
 139     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
 140     AVFrame *last_frame;
 141     AVFrame *best_frame;
 142     AVFrame *scratch_frame;
 143     AVFrame *input_frame;
 144     enum AVPixelFormat pix_fmt;
 145     int w, h;
 146     int frame_buf_size;
 147     int curframe, keyint;
 148     AVLFG randctx;
 149     uint64_t lambda;
 150     int *codebook_input;
 151     int *codebook_closest;
 152     mb_info *mb;                                //MB RD state
 153     int min_strips;          //the current limit
 154     int max_strips;          //the current limit
 155 #ifdef CINEPAKENC_DEBUG
 156     mb_info *best_mb;                           //TODO: remove. only used for printing stats
 157     int num_v1_mode, num_v4_mode, num_mc_mode;
 158     int num_v1_encs, num_v4_encs, num_skips;
 159 #endif
 160 // options
 161     int max_extra_cb_iterations;
 162     int skip_empty_cb;
 163     int min_min_strips;
 164     int max_max_strips;
 165     int strip_number_delta_range;
 166 } CinepakEncContext;
 167
 168 #define OFFSET(x) offsetof(CinepakEncContext, x)
 169 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 170 static const AVOption options[] = {
 171     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
 172     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
 173     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
 174     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
 175     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
 176     { NULL },
 177 };
 178
 179 static const AVClass cinepak_class = {
 180     .class_name = "cinepak",
 181     .item_name  = av_default_item_name,
 182     .option     = options,
 183     .version    = LIBAVUTIL_VERSION_INT,
 184 };
 185
 186 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
 187 {
 188     CinepakEncContext *s = avctx->priv_data;
 189     int x, mb_count, strip_buf_size, frame_buf_size;
 190
 191     if (avctx->width & 3 || avctx->height & 3) {
 192         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
 193                 avctx->width, avctx->height);
 194         return AVERROR(EINVAL);
 195     }
 196
 197     if (s->min_min_strips > s->max_max_strips) {
 198         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
 199                 s->min_min_strips, s->max_max_strips);
 200         return AVERROR(EINVAL);
 201     }
 202
 203     if (!(s->last_frame = av_frame_alloc()))
 204         return AVERROR(ENOMEM);
 205     if (!(s->best_frame = av_frame_alloc()))
 206         goto enomem;
 207     if (!(s->scratch_frame = av_frame_alloc()))
 208         goto enomem;
 209     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 210         if (!(s->input_frame = av_frame_alloc()))
 211             goto enomem;
 212
 213     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 214         goto enomem;
 215
 216     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
 217         goto enomem;
 218
 219     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 220         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 221             goto enomem;
 222
 223     mb_count = avctx->width * avctx->height / MB_AREA;
 224
 225     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
 226     //and full codebooks being replaced in INTER mode,
 227     // which is 34 bits per MB
 228     //and 2*256 extra flag bits per strip
 229     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
 230
 231     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
 232
 233     if (!(s->strip_buf = av_malloc(strip_buf_size)))
 234         goto enomem;
 235
 236     if (!(s->frame_buf = av_malloc(frame_buf_size)))
 237         goto enomem;
 238
 239     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
 240         goto enomem;
 241
 242 #ifdef CINEPAKENC_DEBUG
 243     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
 244         goto enomem;
 245 #endif
 246
 247     av_lfg_init(&s->randctx, 1);
 248     s->avctx = avctx;
 249     s->w = avctx->width;
 250     s->h = avctx->height;
 251     s->frame_buf_size = frame_buf_size;
 252     s->curframe = 0;
 253     s->keyint = avctx->keyint_min;
 254     s->pix_fmt = avctx->pix_fmt;
 255
 256     //set up AVFrames
 257     s->last_frame->data[0]        = s->pict_bufs[0];
 258     s->last_frame->linesize[0]    = s->w;
 259     s->best_frame->data[0]        = s->pict_bufs[1];
 260     s->best_frame->linesize[0]    = s->w;
 261     s->scratch_frame->data[0]     = s->pict_bufs[2];
 262     s->scratch_frame->linesize[0] = s->w;
 263
 264     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
 265         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
 266         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
 267         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
 268
 269         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
 270         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
 271         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
 272
 273         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
 274         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
 275         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
 276
 277         s->input_frame->data[0]       = s->pict_bufs[3];
 278         s->input_frame->linesize[0]   = s->w;
 279         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
 280         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
 281         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
 282     }
 283
 284     s->min_strips = s->min_min_strips;
 285     s->max_strips = s->max_max_strips;
 286
 287 #ifdef CINEPAKENC_DEBUG
 288     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
 289 #endif
 290
 291     return 0;
 292
 293 enomem:
 294     av_frame_free(&s->last_frame);
 295     av_frame_free(&s->best_frame);
 296     av_frame_free(&s->scratch_frame);
 297     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 298         av_frame_free(&s->input_frame);
 299     av_freep(&s->codebook_input);
 300     av_freep(&s->codebook_closest);
 301     av_freep(&s->strip_buf);
 302     av_freep(&s->frame_buf);
 303     av_freep(&s->mb);
 304 #ifdef CINEPAKENC_DEBUG
 305     av_freep(&s->best_mb);
 306 #endif
 307
 308     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 309         av_freep(&s->pict_bufs[x]);
 310
 311     return AVERROR(ENOMEM);
 312 }
 313
 314 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
 315 #ifdef CINEPAK_REPORT_SERR
 316 , int64_t *serr
 317 #endif
 318 )
 319 {
 320     //score = FF_LAMBDA_SCALE * error + lambda * bits
 321     int x;
 322     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 323     int mb_count = s->w * h / MB_AREA;
 324     mb_info *mb;
 325     int64_t score1, score2, score3;
 326     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
 327                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
 328                    CHUNK_HEADER_SIZE) << 3;
 329
 330     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
 331
 332 #ifdef CINEPAK_REPORT_SERR
 333     *serr = 0;
 334 #endif
 335
 336     switch(info->mode) {
 337     case MODE_V1_ONLY:
 338         //one byte per MB
 339         ret += s->lambda * 8 * mb_count;
 340
 341 // while calculating we assume all blocks are ENC_V1
 342         for(x = 0; x < mb_count; x++) {
 343             mb = &s->mb[x];
 344             ret += FF_LAMBDA_SCALE * mb->v1_error;
 345 #ifdef CINEPAK_REPORT_SERR
 346             *serr += mb->v1_error;
 347 #endif
 348 // this function is never called for report in MODE_V1_ONLY
 349 //            if(!report)
 350             mb->best_encoding = ENC_V1;
 351         }
 352
 353         break;
 354     case MODE_V1_V4:
 355         //9 or 33 bits per MB
 356         if(report) {
 357 // no moves between the corresponding training sets are allowed
 358             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
 359             for(x = 0; x < mb_count; x++) {
 360                 int mberr;
 361                 mb = &s->mb[x];
 362                 if(mb->best_encoding == ENC_V1)
 363                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
 364                 else
 365                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
 366                 ret += score1;
 367 #ifdef CINEPAK_REPORT_SERR
 368                 *serr += mberr;
 369 #endif
 370             }
 371         } else { // find best mode per block
 372             for(x = 0; x < mb_count; x++) {
 373                 mb = &s->mb[x];
 374                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
 375                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
 376
 377                 if(score1 <= score2) {
 378                     ret += score1;
 379 #ifdef CINEPAK_REPORT_SERR
 380                     *serr += mb->v1_error;
 381 #endif
 382                     mb->best_encoding = ENC_V1;
 383                 } else {
 384                     ret += score2;
 385 #ifdef CINEPAK_REPORT_SERR
 386                     *serr += mb->v4_error;
 387 #endif
 388                     mb->best_encoding = ENC_V4;
 389                 }
 390             }
 391         }
 392
 393         break;
 394     case MODE_MC:
 395         //1, 10 or 34 bits per MB
 396         if(report) {
 397             int v1_shrunk = 0, v4_shrunk = 0;
 398             for(x = 0; x < mb_count; x++) {
 399                 mb = &s->mb[x];
 400 // it is OK to move blocks to ENC_SKIP here
 401 // but not to any codebook encoding!
 402                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 403                 if(mb->best_encoding == ENC_SKIP) {
 404                     ret += score1;
 405 #ifdef CINEPAK_REPORT_SERR
 406                     *serr += mb->skip_error;
 407 #endif
 408                 } else if(mb->best_encoding == ENC_V1) {
 409                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
 410                         mb->best_encoding = ENC_SKIP;
 411                         ++v1_shrunk;
 412                         ret += score1;
 413 #ifdef CINEPAK_REPORT_SERR
 414                         *serr += mb->skip_error;
 415 #endif
 416                     } else {
 417                         ret += score2;
 418 #ifdef CINEPAK_REPORT_SERR
 419                         *serr += mb->v1_error;
 420 #endif
 421                     }
 422                 } else {
 423                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
 424                         mb->best_encoding = ENC_SKIP;
 425                         ++v4_shrunk;
 426                         ret += score1;
 427 #ifdef CINEPAK_REPORT_SERR
 428                         *serr += mb->skip_error;
 429 #endif
 430                     } else {
 431                         ret += score3;
 432 #ifdef CINEPAK_REPORT_SERR
 433                         *serr += mb->v4_error;
 434 #endif
 435                     }
 436                 }
 437             }
 438             *training_set_v1_shrunk = v1_shrunk;
 439             *training_set_v4_shrunk = v4_shrunk;
 440         } else { // find best mode per block
 441             for(x = 0; x < mb_count; x++) {
 442                 mb = &s->mb[x];
 443                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 444                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
 445                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
 446
 447                 if(score1 <= score2 && score1 <= score3) {
 448                     ret += score1;
 449 #ifdef CINEPAK_REPORT_SERR
 450                     *serr += mb->skip_error;
 451 #endif
 452                     mb->best_encoding = ENC_SKIP;
 453                 } else if(score2 <= score3) {
 454                     ret += score2;
 455 #ifdef CINEPAK_REPORT_SERR
 456                     *serr += mb->v1_error;
 457 #endif
 458                     mb->best_encoding = ENC_V1;
 459                 } else {
 460                     ret += score3;
 461 #ifdef CINEPAK_REPORT_SERR
 462                     *serr += mb->v4_error;
 463 #endif
 464                     mb->best_encoding = ENC_V4;
 465                 }
 466             }
 467         }
 468
 469         break;
 470     }
 471
 472     return ret;
 473 }
 474
 475 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
 476 {
 477     buf[0] = chunk_type;
 478     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
 479     return CHUNK_HEADER_SIZE;
 480 }
 481
 482 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
 483 {
 484     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 485     int incremental_codebook_replacement_mode = 0; // hardcoded here,
 486                 // the compiler should notice that this is a constant -- rl
 487
 488     ret = write_chunk_header(buf,
 489           s->pix_fmt == AV_PIX_FMT_RGB24 ?
 490            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
 491            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
 492           entry_size * size
 493            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
 494
 495 // we do codebook encoding according to the "intra" mode
 496 // but we keep the "dead" code for reference in case we will want
 497 // to use incremental codebook updates (which actually would give us
 498 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
 499 // (of course, the code will be not useful as-is)
 500     if(incremental_codebook_replacement_mode) {
 501         int flags = 0;
 502         int flagsind;
 503         for(x = 0; x < size; x++) {
 504             if(flags == 0) {
 505                 flagsind = ret;
 506                 ret += 4;
 507                 flags = 0x80000000;
 508             } else
 509                 flags = ((flags>>1) | 0x80000000);
 510             for(y = 0; y < entry_size; y++)
 511                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 512             if((flags&0xffffffff) == 0xffffffff) {
 513                 AV_WB32(&buf[flagsind], flags);
 514                 flags = 0;
 515             }
 516         }
 517         if(flags)
 518             AV_WB32(&buf[flagsind], flags);
 519     } else
 520         for(x = 0; x < size; x++)
 521             for(y = 0; y < entry_size; y++)
 522                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 523
 524     return ret;
 525 }
 526
 527 //sets out to the sub picture starting at (x,y) in in
 528 static void get_sub_picture(CinepakEncContext *s, int x, int y,
 529                             uint8_t * in_data[4], int  in_linesize[4],
 530                             uint8_t *out_data[4], int out_linesize[4])
 531 {
 532     out_data[0] = in_data[0] + x + y * in_linesize[0];
 533     out_linesize[0] = in_linesize[0];
 534
 535     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 536         out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
 537         out_linesize[1] = in_linesize[1];
 538
 539         out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
 540         out_linesize[2] = in_linesize[2];
 541     }
 542 }
 543
 544 //decodes the V1 vector in mb into the 4x4 MB pointed to by data
 545 static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
 546                              int linesize[4], int v1_vector, strip_info *info)
 547 {
 548     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 549
 550     data[0][0] =
 551             data[0][1] =
 552             data[0][    linesize[0]] =
 553             data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
 554
 555     data[0][2] =
 556             data[0][3] =
 557             data[0][2+  linesize[0]] =
 558             data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
 559
 560     data[0][2*linesize[0]] =
 561             data[0][1+2*linesize[0]] =
 562             data[0][  3*linesize[0]] =
 563             data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
 564
 565     data[0][2+2*linesize[0]] =
 566             data[0][3+2*linesize[0]] =
 567             data[0][2+3*linesize[0]] =
 568             data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
 569
 570     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 571         data[1][0] =
 572             data[1][1] =
 573             data[1][    linesize[1]] =
 574             data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
 575
 576         data[2][0] =
 577             data[2][1] =
 578             data[2][    linesize[2]] =
 579             data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
 580     }
 581 }
 582
 583 //decodes the V4 vectors in mb into the 4x4 MB pointed to by data
 584 static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
 585                              int linesize[4], int *v4_vector, strip_info *info)
 586 {
 587     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 588
 589     for(i = y = 0; y < 4; y += 2) {
 590         for(x = 0; x < 4; x += 2, i++) {
 591             data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
 592             data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
 593             data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
 594             data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
 595
 596             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 597                 data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
 598                 data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
 599             }
 600         }
 601     }
 602 }
 603
 604 static void copy_mb(CinepakEncContext *s,
 605                     uint8_t *a_data[4], int a_linesize[4],
 606                     uint8_t *b_data[4], int b_linesize[4])
 607 {
 608     int y, p;
 609
 610     for(y = 0; y < MB_SIZE; y++) {
 611         memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
 612                MB_SIZE);
 613     }
 614
 615     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 616         for(p = 1; p <= 2; p++) {
 617             for(y = 0; y < MB_SIZE/2; y++) {
 618                 memcpy(a_data[p] + y*a_linesize[p],
 619                        b_data[p] + y*b_linesize[p],
 620                        MB_SIZE/2);
 621             }
 622         }
 623     }
 624 }
 625
 626 static int encode_mode(CinepakEncContext *s, int h,
 627                        uint8_t *scratch_data[4], int scratch_linesize[4],
 628                        uint8_t *last_data[4], int last_linesize[4],
 629                        strip_info *info, unsigned char *buf)
 630 {
 631     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
 632     int needs_extra_bit, should_write_temp;
 633     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
 634     mb_info *mb;
 635     uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
 636     int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
 637
 638     //encode codebooks
 639 ////// MacOS vintage decoder compatibility dictates the presence of
 640 ////// the codebook chunk even when the codebook is empty - pretty dumb...
 641 ////// and also the certain order of the codebook chunks -- rl
 642     if(info->v4_size || !s->skip_empty_cb)
 643         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
 644
 645     if(info->v1_size || !s->skip_empty_cb)
 646         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
 647
 648     //update scratch picture
 649     for(z = y = 0; y < h; y += MB_SIZE) {
 650         for(x = 0; x < s->w; x += MB_SIZE, z++) {
 651             mb = &s->mb[z];
 652
 653             get_sub_picture(s, x, y, scratch_data, scratch_linesize,
 654                             sub_scratch_data, sub_scratch_linesize);
 655
 656             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
 657                 get_sub_picture(s, x, y,
 658                                 last_data, last_linesize,
 659                                 sub_last_data, sub_last_linesize);
 660                 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
 661                         sub_last_data, sub_last_linesize);
 662             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
 663                 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
 664                                  mb->v1_vector, info);
 665             else
 666                 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
 667                                  mb->v4_vector, info);
 668         }
 669     }
 670
 671     switch(info->mode) {
 672     case MODE_V1_ONLY:
 673         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
 674         ret += write_chunk_header(buf + ret, 0x32, mb_count);
 675
 676         for(x = 0; x < mb_count; x++)
 677             buf[ret++] = s->mb[x].v1_vector;
 678
 679         break;
 680     case MODE_V1_V4:
 681         //remember header position
 682         header_ofs = ret;
 683         ret += CHUNK_HEADER_SIZE;
 684
 685         for(x = 0; x < mb_count; x += 32) {
 686             flags = 0;
 687             for(y = x; y < FFMIN(x+32, mb_count); y++)
 688                 if(s->mb[y].best_encoding == ENC_V4)
 689                     flags |= 1 << (31 - y + x);
 690
 691             AV_WB32(&buf[ret], flags);
 692             ret += 4;
 693
 694             for(y = x; y < FFMIN(x+32, mb_count); y++) {
 695                 mb = &s->mb[y];
 696
 697                 if(mb->best_encoding == ENC_V1)
 698                     buf[ret++] = mb->v1_vector;
 699                 else
 700                     for(z = 0; z < 4; z++)
 701                         buf[ret++] = mb->v4_vector[z];
 702             }
 703         }
 704
 705         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
 706
 707         break;
 708     case MODE_MC:
 709         //remember header position
 710         header_ofs = ret;
 711         ret += CHUNK_HEADER_SIZE;
 712         flags = bits = temp_size = 0;
 713
 714         for(x = 0; x < mb_count; x++) {
 715             mb = &s->mb[x];
 716             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
 717             needs_extra_bit = 0;
 718             should_write_temp = 0;
 719
 720             if(mb->best_encoding != ENC_SKIP) {
 721                 if(bits < 32)
 722                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
 723                 else
 724                     needs_extra_bit = 1;
 725             }
 726
 727             if(bits == 32) {
 728                 AV_WB32(&buf[ret], flags);
 729                 ret += 4;
 730                 flags = bits = 0;
 731
 732                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
 733                     memcpy(&buf[ret], temp, temp_size);
 734                     ret += temp_size;
 735                     temp_size = 0;
 736                 } else
 737                     should_write_temp = 1;
 738             }
 739
 740             if(needs_extra_bit) {
 741                 flags = (mb->best_encoding == ENC_V4) << 31;
 742                 bits = 1;
 743             }
 744
 745             if(mb->best_encoding == ENC_V1)
 746                 temp[temp_size++] = mb->v1_vector;
 747             else if(mb->best_encoding == ENC_V4)
 748                 for(z = 0; z < 4; z++)
 749                     temp[temp_size++] = mb->v4_vector[z];
 750
 751             if(should_write_temp) {
 752                 memcpy(&buf[ret], temp, temp_size);
 753                 ret += temp_size;
 754                 temp_size = 0;
 755             }
 756         }
 757
 758         if(bits > 0) {
 759             AV_WB32(&buf[ret], flags);
 760             ret += 4;
 761             memcpy(&buf[ret], temp, temp_size);
 762             ret += temp_size;
 763         }
 764
 765         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
 766
 767         break;
 768     }
 769
 770     return ret;
 771 }
 772
 773 //computes distortion of 4x4 MB in b compared to a
 774 static int compute_mb_distortion(CinepakEncContext *s,
 775                                  uint8_t *a_data[4], int a_linesize[4],
 776                                  uint8_t *b_data[4], int b_linesize[4])
 777 {
 778     int x, y, p, d, ret = 0;
 779
 780     for(y = 0; y < MB_SIZE; y++) {
 781         for(x = 0; x < MB_SIZE; x++) {
 782             d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
 783             ret += d*d;
 784         }
 785     }
 786
 787     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 788         for(p = 1; p <= 2; p++) {
 789             for(y = 0; y < MB_SIZE/2; y++) {
 790                 for(x = 0; x < MB_SIZE/2; x++) {
 791                     d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
 792                     ret += d*d;
 793                 }
 794             }
 795         }
 796     }
 797
 798     return ret;
 799 }
 800
 801 // return the possibly adjusted size of the codebook
 802 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
 803 static int quantize(CinepakEncContext *s, int h,
 804                     uint8_t *data[4], int linesize[4],
 805                     int v1mode, strip_info *info,
 806                     mb_encoding encoding)
 807 {
 808     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
 809     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 810     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
 811     int size = v1mode ? info->v1_size : info->v4_size;
 812     int64_t total_error = 0;
 813     uint8_t vq_pict_buf[(MB_AREA*3)/2];
 814     uint8_t *sub_data    [4], *vq_data    [4];
 815     int      sub_linesize[4],  vq_linesize[4];
 816
 817     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
 818         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
 819             int *base;
 820
 821             if(CERTAIN(encoding)) {
 822 // use for the training only the blocks known to be to be encoded [sic:-]
 823                if(s->mb[mbn].best_encoding != encoding) continue;
 824             }
 825
 826             base = s->codebook_input + i*entry_size;
 827             if(v1mode) {
 828                 //subsample
 829                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
 830                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
 831                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
 832                         shift = y2 < 4 ? 0 : 1;
 833                         x3 = shift ? 0 : x2;
 834                         y3 = shift ? 0 : y2;
 835                         base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
 836                                    data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
 837                                    data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
 838                                    data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
 839                     }
 840                 }
 841             } else {
 842                 //copy
 843                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
 844                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
 845                         for(k = 0; k < entry_size; k++, j++) {
 846                             plane = k >= 4 ? k - 3 : 0;
 847
 848                             if(k >= 4) {
 849                                 x3 = (x+x2) >> 1;
 850                                 y3 = (y+y2) >> 1;
 851                             } else {
 852                                 x3 = x + x2 + (k & 1);
 853                                 y3 = y + y2 + (k >> 1);
 854                             }
 855
 856                             base[j] = data[plane][x3 + y3*linesize[plane]];
 857                         }
 858                     }
 859                 }
 860             }
 861             i += v1mode ? 1 : 4;
 862         }
 863     }
 864 //    if(i < mbn*(v1mode ? 1 : 4)) {
 865 //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
 866 //    }
 867
 868     if(i == 0) // empty training set, nothing to do
 869         return 0;
 870     if(i < size) {
 871         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
 872         size = i;
 873     }
 874
 875     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 876     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 877
 878     //setup vq_data, which contains a single MB
 879     vq_data[0] = vq_pict_buf;
 880     vq_linesize[0] = MB_SIZE;
 881     vq_data[1] = &vq_pict_buf[MB_AREA];
 882     vq_data[2] = vq_data[1] + (MB_AREA >> 2);
 883     vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
 884
 885     //copy indices
 886     for(i = j = y = 0; y < h; y += MB_SIZE) {
 887         for(x = 0; x < s->w; x += MB_SIZE, j++) {
 888             mb_info *mb = &s->mb[j];
 889 // skip uninteresting blocks if we know their preferred encoding
 890             if(CERTAIN(encoding) && mb->best_encoding != encoding)
 891                 continue;
 892
 893             //point sub_data to current MB
 894             get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
 895
 896             if(v1mode) {
 897                 mb->v1_vector = s->codebook_closest[i];
 898
 899                 //fill in vq_data with V1 data
 900                 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
 901
 902                 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
 903                                                      vq_data, vq_linesize);
 904                 total_error += mb->v1_error;
 905             } else {
 906                 for(k = 0; k < 4; k++)
 907                     mb->v4_vector[k] = s->codebook_closest[i+k];
 908
 909                 //fill in vq_data with V4 data
 910                 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
 911
 912                 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
 913                                                      vq_data, vq_linesize);
 914                 total_error += mb->v4_error;
 915             }
 916             i += v1mode ? 1 : 4;
 917         }
 918     }
 919 // check that we did it right in the beginning of the function
 920     av_assert0(i >= size); // training set is no smaller than the codebook
 921
 922     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
 923
 924     return size;
 925 }
 926
 927 static void calculate_skip_errors(CinepakEncContext *s, int h,
 928                                   uint8_t *last_data[4], int last_linesize[4],
 929                                   uint8_t *data[4], int linesize[4],
 930                                   strip_info *info)
 931 {
 932     int x, y, i;
 933     uint8_t *sub_last_data    [4], *sub_pict_data    [4];
 934     int      sub_last_linesize[4],  sub_pict_linesize[4];
 935
 936     for(i = y = 0; y < h; y += MB_SIZE) {
 937         for(x = 0; x < s->w; x += MB_SIZE, i++) {
 938             get_sub_picture(s, x, y, last_data,     last_linesize,
 939                                  sub_last_data, sub_last_linesize);
 940             get_sub_picture(s, x, y,      data,          linesize,
 941                                  sub_pict_data, sub_pict_linesize);
 942
 943             s->mb[i].skip_error = compute_mb_distortion(s,
 944                                             sub_last_data, sub_last_linesize,
 945                                             sub_pict_data, sub_pict_linesize);
 946         }
 947     }
 948 }
 949
 950 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
 951 {
 952 // actually we are exclusively using intra strip coding (how much can we win
 953 // otherwise? how to choose which part of a codebook to update?),
 954 // keyframes are different only because we disallow ENC_SKIP on them -- rl
 955 // (besides, the logic here used to be inverted: )
 956 //    buf[0] = keyframe ? 0x11: 0x10;
 957     buf[0] = keyframe ? 0x10: 0x11;
 958     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
 959 //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
 960     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
 961     AV_WB16(&buf[6], 0);
 962 //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
 963     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
 964     AV_WB16(&buf[10], s->w);
 965     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 966 }
 967
 968 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
 969                     uint8_t *last_data[4], int last_linesize[4],
 970                     uint8_t *data[4], int linesize[4],
 971                     uint8_t *scratch_data[4], int scratch_linesize[4],
 972                     unsigned char *buf, int64_t *best_score
 973 #ifdef CINEPAK_REPORT_SERR
 974 , int64_t *best_serr
 975 #endif
 976 )
 977 {
 978     int64_t score = 0;
 979 #ifdef CINEPAK_REPORT_SERR
 980     int64_t serr;
 981 #endif
 982     int best_size = 0;
 983     strip_info info;
 984 // for codebook optimization:
 985     int v1enough, v1_size, v4enough, v4_size;
 986     int new_v1_size, new_v4_size;
 987     int v1shrunk, v4shrunk;
 988
 989     if(!keyframe)
 990         calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
 991                               &info);
 992
 993     //try some powers of 4 for the size of the codebooks
 994     //constraint the v4 codebook to be no bigger than v1 one,
 995     //(and no less than v1_size/4)
 996     //thus making v1 preferable and possibly losing small details? should be ok
 997 #define SMALLEST_CODEBOOK 1
 998     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
 999         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
1000             //try all modes
1001             for(CinepakMode mode = 0; mode < MODE_COUNT; mode++) {
1002                 //don't allow MODE_MC in intra frames
1003                 if(keyframe && mode == MODE_MC)
1004                     continue;
1005
1006                 if(mode == MODE_V1_ONLY) {
1007                     info.v1_size = v1_size;
1008 // the size may shrink even before optimizations if the input is short:
1009                     info.v1_size = quantize(s, h, data, linesize, 1,
1010                                             &info, ENC_UNCERTAIN);
1011                     if(info.v1_size < v1_size)
1012 // too few eligible blocks, no sense in trying bigger sizes
1013                         v1enough = 1;
1014
1015                     info.v4_size = 0;
1016                 } else { // mode != MODE_V1_ONLY
1017                     // if v4 codebook is empty then only allow V1-only mode
1018                     if(!v4_size)
1019                         continue;
1020
1021                     if(mode == MODE_V1_V4) {
1022                         info.v4_size = v4_size;
1023                         info.v4_size = quantize(s, h, data, linesize, 0,
1024                                                 &info, ENC_UNCERTAIN);
1025                         if(info.v4_size < v4_size)
1026 // too few eligible blocks, no sense in trying bigger sizes
1027                             v4enough = 1;
1028                     }
1029                 }
1030
1031                 info.mode = mode;
1032 // choose the best encoding per block, based on current experience
1033                 score = calculate_mode_score(s, h, &info, 0,
1034                                              &v1shrunk, &v4shrunk
1035 #ifdef CINEPAK_REPORT_SERR
1036 , &serr
1037 #endif
1038 );
1039
1040                 if(mode != MODE_V1_ONLY){
1041                     int extra_iterations_limit = s->max_extra_cb_iterations;
1042 // recompute the codebooks, omitting the extra blocks
1043 // we assume we _may_ come here with more blocks to encode than before
1044                     info.v1_size = v1_size;
1045                     new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1046                     if(new_v1_size < info.v1_size){
1047                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1048                         info.v1_size = new_v1_size;
1049                     }
1050 // we assume we _may_ come here with more blocks to encode than before
1051                     info.v4_size = v4_size;
1052                     new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1053                     if(new_v4_size < info.v4_size) {
1054                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1055                         info.v4_size = new_v4_size;
1056                     }
1057 // calculate the resulting score
1058 // (do not move blocks to codebook encodings now, as some blocks may have
1059 // got bigger errors despite a smaller training set - but we do not
1060 // ever grow the training sets back)
1061                     for(;;) {
1062                         score = calculate_mode_score(s, h, &info, 1,
1063                                                      &v1shrunk, &v4shrunk
1064 #ifdef CINEPAK_REPORT_SERR
1065 , &serr
1066 #endif
1067 );
1068 // do we have a reason to reiterate? if so, have we reached the limit?
1069                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1070 // recompute the codebooks, omitting the extra blocks
1071                         if(v1shrunk) {
1072                             info.v1_size = v1_size;
1073                             new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1074                             if(new_v1_size < info.v1_size){
1075                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1076                                 info.v1_size = new_v1_size;
1077                             }
1078                         }
1079                         if(v4shrunk) {
1080                             info.v4_size = v4_size;
1081                             new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1082                             if(new_v4_size < info.v4_size) {
1083                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1084                                 info.v4_size = new_v4_size;
1085                             }
1086                         }
1087                     }
1088                 }
1089
1090                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1091
1092                 if(best_size == 0 || score < *best_score) {
1093
1094                     *best_score = score;
1095 #ifdef CINEPAK_REPORT_SERR
1096                     *best_serr = serr;
1097 #endif
1098                     best_size = encode_mode(s, h,
1099                                             scratch_data, scratch_linesize,
1100                                             last_data, last_linesize, &info,
1101                                             s->strip_buf + STRIP_HEADER_SIZE);
1102
1103                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1104                     //av_log(s->avctx, AV_LOG_INFO, "\n");
1105 #ifdef CINEPAK_REPORT_SERR
1106                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1107 #endif
1108
1109 #ifdef CINEPAKENC_DEBUG
1110                     //save MB encoding choices
1111                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1112 #endif
1113
1114                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1115                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1116
1117                 }
1118             }
1119         }
1120     }
1121
1122 #ifdef CINEPAKENC_DEBUG
1123     //gather stats. this will only work properly of MAX_STRIPS == 1
1124     if(best_info.mode == MODE_V1_ONLY) {
1125         s->num_v1_mode++;
1126         s->num_v1_encs += s->w*h/MB_AREA;
1127     } else {
1128         if(best_info.mode == MODE_V1_V4)
1129             s->num_v4_mode++;
1130         else
1131             s->num_mc_mode++;
1132
1133         int x;
1134         for(x = 0; x < s->w*h/MB_AREA; x++)
1135             if(s->best_mb[x].best_encoding == ENC_V1)
1136                 s->num_v1_encs++;
1137             else if(s->best_mb[x].best_encoding == ENC_V4)
1138                 s->num_v4_encs++;
1139             else
1140                 s->num_skips++;
1141     }
1142 #endif
1143
1144     best_size += STRIP_HEADER_SIZE;
1145     memcpy(buf, s->strip_buf, best_size);
1146
1147     return best_size;
1148 }
1149
1150 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1151 {
1152     buf[0] = isakeyframe ? 0 : 1;
1153     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1154     AV_WB16(&buf[4], s->w);
1155     AV_WB16(&buf[6], s->h);
1156     AV_WB16(&buf[8], num_strips);
1157
1158     return CVID_HEADER_SIZE;
1159 }
1160
1161 static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
1162                     int isakeyframe, unsigned char *buf, int buf_size)
1163 {
1164     int num_strips, strip, i, y, nexty, size, temp_size;
1165     uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
1166     int      last_linesize[4],  linesize[4],  scratch_linesize[4];
1167     int64_t best_score = 0, score, score_temp;
1168 #ifdef CINEPAK_REPORT_SERR
1169     int64_t best_serr = 0, serr, serr_temp;
1170 #endif
1171
1172     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1173
1174     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1175         int x;
1176 // build a copy of the given frame in the correct colorspace
1177         for(y = 0; y < s->h; y += 2) {
1178             for(x = 0; x < s->w; x += 2) {
1179                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1180                 ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
1181                 ir[1] = ir[0] + frame->linesize[0];
1182                 get_sub_picture(s, x, y,
1183                                 s->input_frame->data, s->input_frame->linesize,
1184                                 scratch_data, scratch_linesize);
1185                 r = g = b = 0;
1186                 for(i=0; i<4; ++i) {
1187                     int i1, i2;
1188                     i1 = (i&1); i2 = (i>=2);
1189                     rr = ir[i2][i1*3+0];
1190                     gg = ir[i2][i1*3+1];
1191                     bb = ir[i2][i1*3+2];
1192                     r += rr; g += gg; b += bb;
1193 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1194 // "Y"
1195 //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1196                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1197                     if(      rr <   0) rr =   0;
1198                     else if (rr > 255) rr = 255;
1199                     scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
1200                 }
1201 // let us scale down as late as possible
1202 //                r /= 4; g /= 4; b /= 4;
1203 // "U"
1204 //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1205                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1206                 if(      rr < -128) rr = -128;
1207                 else if (rr >  127) rr =  127;
1208                 scratch_data[1][0] = rr + 128; // quantize needs unsigned
1209 // "V"
1210 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1211                 rr = (748893*r - 599156*g - 149737*b) >> 23;
1212                 if(      rr < -128) rr = -128;
1213                 else if (rr >  127) rr =  127;
1214                 scratch_data[2][0] = rr + 128; // quantize needs unsigned
1215             }
1216         }
1217     }
1218
1219     //would be nice but quite certainly incompatible with vintage players:
1220     // support encoding zero strips (meaning skip the whole frame)
1221     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1222         score = 0;
1223         size = 0;
1224 #ifdef CINEPAK_REPORT_SERR
1225         serr = 0;
1226 #endif
1227
1228         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1229             int strip_height;
1230
1231             nexty = strip * s->h / num_strips; // <= s->h
1232             //make nexty the next multiple of 4 if not already there
1233             if(nexty & 3)
1234                 nexty += 4 - (nexty & 3);
1235
1236             strip_height = nexty - y;
1237             if(strip_height <= 0) { // can this ever happen?
1238                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1239                 continue;
1240             }
1241
1242             if(s->pix_fmt == AV_PIX_FMT_RGB24)
1243                 get_sub_picture(s, 0, y,
1244                                 s->input_frame->data, s->input_frame->linesize,
1245                                 data, linesize);
1246             else
1247                 get_sub_picture(s, 0, y,
1248                                 (uint8_t **)frame->data, (int*)frame->linesize,
1249                                 data, linesize);
1250             get_sub_picture(s, 0, y,
1251                             s->last_frame->data, s->last_frame->linesize,
1252                             last_data, last_linesize);
1253             get_sub_picture(s, 0, y,
1254                             s->scratch_frame->data, s->scratch_frame->linesize,
1255                             scratch_data, scratch_linesize);
1256
1257             if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
1258                                      last_data, last_linesize, data, linesize,
1259                                      scratch_data, scratch_linesize,
1260                                      s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1261 #ifdef CINEPAK_REPORT_SERR
1262 , &serr_temp
1263 #endif
1264 )) < 0)
1265                 return temp_size;
1266
1267             score += score_temp;
1268 #ifdef CINEPAK_REPORT_SERR
1269             serr += serr_temp;
1270 #endif
1271             size += temp_size;
1272             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1273             //av_log(s->avctx, AV_LOG_INFO, "\n");
1274         }
1275
1276         if(best_score == 0 || score < best_score) {
1277             best_score = score;
1278 #ifdef CINEPAK_REPORT_SERR
1279             best_serr = serr;
1280 #endif
1281             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1282             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1283 #ifdef CINEPAK_REPORT_SERR
1284             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1285 #endif
1286
1287             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1288             memcpy(buf, s->frame_buf, best_size);
1289             best_nstrips = num_strips;
1290         }
1291 // avoid trying too many strip numbers without a real reason
1292 // (this makes the processing of the very first frame faster)
1293         if(num_strips - best_nstrips > 4)
1294             break;
1295     }
1296
1297     av_assert0(best_nstrips >= 0 && best_size >= 0);
1298
1299 // let the number of strips slowly adapt to the changes in the contents,
1300 // compared to full bruteforcing every time this will occasionally lead
1301 // to some r/d performance loss but makes encoding up to several times faster
1302     if(!s->strip_number_delta_range) {
1303         if(best_nstrips == s->max_strips) { // let us try to step up
1304             s->max_strips = best_nstrips + 1;
1305             if(s->max_strips >= s->max_max_strips)
1306                 s->max_strips = s->max_max_strips;
1307         } else { // try to step down
1308             s->max_strips = best_nstrips;
1309         }
1310         s->min_strips = s->max_strips - 1;
1311         if(s->min_strips < s->min_min_strips)
1312             s->min_strips = s->min_min_strips;
1313     } else {
1314         s->max_strips = best_nstrips + s->strip_number_delta_range;
1315         if(s->max_strips >= s->max_max_strips)
1316             s->max_strips = s->max_max_strips;
1317         s->min_strips = best_nstrips - s->strip_number_delta_range;
1318         if(s->min_strips < s->min_min_strips)
1319             s->min_strips = s->min_min_strips;
1320     }
1321
1322     return best_size;
1323 }
1324
1325 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1326                                 const AVFrame *frame, int *got_packet)
1327 {
1328     CinepakEncContext *s = avctx->priv_data;
1329     int ret;
1330
1331     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1332
1333     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1334         return ret;
1335     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1336     pkt->size = ret;
1337     if (s->curframe == 0)
1338         pkt->flags |= AV_PKT_FLAG_KEY;
1339     *got_packet = 1;
1340
1341     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1342
1343     if (++s->curframe >= s->keyint)
1344         s->curframe = 0;
1345
1346     return 0;
1347 }
1348
1349 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1350 {
1351     CinepakEncContext *s = avctx->priv_data;
1352     int x;
1353
1354     av_frame_free(&s->last_frame);
1355     av_frame_free(&s->best_frame);
1356     av_frame_free(&s->scratch_frame);
1357     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1358         av_frame_free(&s->input_frame);
1359     av_freep(&s->codebook_input);
1360     av_freep(&s->codebook_closest);
1361     av_freep(&s->strip_buf);
1362     av_freep(&s->frame_buf);
1363     av_freep(&s->mb);
1364 #ifdef CINEPAKENC_DEBUG
1365     av_freep(&s->best_mb);
1366 #endif
1367
1368     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1369         av_freep(&s->pict_bufs[x]);
1370
1371 #ifdef CINEPAKENC_DEBUG
1372     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1373         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1374 #endif
1375
1376     return 0;
1377 }
1378
1379 AVCodec ff_cinepak_encoder = {
1380     .name           = "cinepak",
1381     .type           = AVMEDIA_TYPE_VIDEO,
1382     .id             = AV_CODEC_ID_CINEPAK,
1383     .priv_data_size = sizeof(CinepakEncContext),
1384     .init           = cinepak_encode_init,
1385     .encode2        = cinepak_encode_frame,
1386     .close          = cinepak_encode_end,
1387     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1388     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak / CVID"),
1389     .priv_class     = &cinepak_class,
1390 };