git.sesse.net Git - ffmpeg/blob - libavcodec/cinepakenc.c

   1 /*
   2  * Cinepak encoder (c) 2011 Tomas Härdin
   3  * http://titan.codemill.se/~tomhar/cinepakenc.patch
   4  *
   5  * Fixes and improvements, vintage decoders compatibility
   6  *  (c) 2013, 2014 Rl, Aetey Global Technologies AB
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a
   9 copy of this software and associated documentation files (the "Software"),
  10 to deal in the Software without restriction, including without limitation
  11 the rights to use, copy, modify, merge, publish, distribute, sublicense,
  12 and/or sell copies of the Software, and to permit persons to whom the
  13 Software is furnished to do so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice shall be included
  16 in all copies or substantial portions of the Software.
  17
  18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
  21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
  22 OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
  23 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  24 OTHER DEALINGS IN THE SOFTWARE.
  25
  26  * MAYBE:
  27  * - "optimally" split the frame into several non-regular areas
  28  *   using a separate codebook pair for each area and approximating
  29  *   the area by several rectangular strips (generally not full width ones)
  30  *   (use quadtree splitting? a simple fixed-granularity grid?)
  31  *
  32  *
  33  * version 2014-01-23 Rl
  34  * - added option handling for flexibility
  35  *
  36  * version 2014-01-21 Rl
  37  * - believe it or not, now we get even smaller files, with better quality
  38  *   (which means I missed an optimization earlier :)
  39  *
  40  * version 2014-01-20 Rl
  41  * - made the encoder compatible with vintage decoders
  42  *   and added some yet unused code for possible future
  43  *   incremental codebook updates
  44  * - fixed a small memory leak
  45  *
  46  * version 2013-04-28 Rl
  47  * - bugfixed codebook optimization logic
  48  *
  49  * version 2013-02-14 Rl
  50  * "Valentine's Day" version:
  51  * - made strip division more robust
  52  * - minimized bruteforcing the number of strips,
  53  *   (costs some R/D but speeds up compession a lot), the heuristic
  54  *   assumption is that score as a function of the number of strips has
  55  *   one wide minimum which moves slowly, of course not fully true
  56  * - simplified codebook generation,
  57  *   the old code was meant for other optimizations than we actually do
  58  * - optimized the codebook generation / error estimation for MODE_MC
  59  *
  60  * version 2013-02-12 Rl
  61  * - separated codebook training sets, avoided the transfer of wasted bytes,
  62  *   which yields both better quality and smaller files
  63  * - now using the correct colorspace (TODO: move conversion to libswscale)
  64  *
  65  * version 2013-02-08 Rl
  66  * - fixes/optimization in multistrip encoding and codebook size choice,
  67  *   quality/bitrate is now better than that of the binary proprietary encoder
  68  */
  69
  70 #include "libavutil/intreadwrite.h"
  71 #include "avcodec.h"
  72 #include "libavutil/lfg.h"
  73 #include "elbg.h"
  74 #include "internal.h"
  75
  76 #include "libavutil/avassert.h"
  77 #include "libavutil/opt.h"
  78
  79 #define CVID_HEADER_SIZE 10
  80 #define STRIP_HEADER_SIZE 12
  81 #define CHUNK_HEADER_SIZE 4
  82
  83 #define MB_SIZE 4           //4x4 MBs
  84 #define MB_AREA (MB_SIZE*MB_SIZE)
  85
  86 #define VECTOR_MAX 6        //six or four entries per vector depending on format
  87 #define CODEBOOK_MAX 256    //size of a codebook
  88
  89 #define MAX_STRIPS  32      //Note: having fewer choices regarding the number of strips speeds up encoding (obviously)
  90 #define MIN_STRIPS  1       //Note: having more strips speeds up encoding the frame (this is less obvious)
  91 // MAX_STRIPS limits the maximum quality you can reach
  92 //            when you want high quality on high resolutions,
  93 // MIN_STRIPS limits the minimum efficiently encodable bit rate
  94 //            on low resolutions
  95 // the numbers are only used for brute force optimization for the first frame,
  96 // for the following frames they are adaptively readjusted
  97 // NOTE the decoder in ffmpeg has its own arbitrary limitation on the number
  98 // of strips, currently 32
  99
 100 typedef enum {
 101     MODE_V1_ONLY = 0,
 102     MODE_V1_V4,
 103     MODE_MC,
 104
 105     MODE_COUNT,
 106 } CinepakMode;
 107
 108 typedef enum {
 109     ENC_V1,
 110     ENC_V4,
 111     ENC_SKIP,
 112
 113     ENC_UNCERTAIN
 114 } mb_encoding;
 115
 116 typedef struct {
 117     int v1_vector;                  //index into v1 codebook
 118     int v1_error;                   //error when using V1 encoding
 119     int v4_vector[4];               //indices into v4 codebook
 120     int v4_error;                   //error when using V4 encoding
 121     int skip_error;                 //error when block is skipped (aka copied from last frame)
 122     mb_encoding best_encoding;      //last result from calculate_mode_score()
 123 } mb_info;
 124
 125 typedef struct {
 126     int v1_codebook[CODEBOOK_MAX*VECTOR_MAX];
 127     int v4_codebook[CODEBOOK_MAX*VECTOR_MAX];
 128     int v1_size;
 129     int v4_size;
 130     CinepakMode mode;
 131 } strip_info;
 132
 133 typedef struct {
 134     const AVClass *class;
 135     AVCodecContext *avctx;
 136     unsigned char *pict_bufs[4], *strip_buf, *frame_buf;
 137     AVFrame *last_frame;
 138     AVFrame *best_frame;
 139     AVFrame *scratch_frame;
 140     AVFrame *input_frame;
 141     enum AVPixelFormat pix_fmt;
 142     int w, h;
 143     int frame_buf_size;
 144     int curframe, keyint;
 145     AVLFG randctx;
 146     uint64_t lambda;
 147     int *codebook_input;
 148     int *codebook_closest;
 149     mb_info *mb;                                //MB RD state
 150     int min_strips;          //the current limit
 151     int max_strips;          //the current limit
 152 #ifdef CINEPAKENC_DEBUG
 153     mb_info *best_mb;                           //TODO: remove. only used for printing stats
 154     int num_v1_mode, num_v4_mode, num_mc_mode;
 155     int num_v1_encs, num_v4_encs, num_skips;
 156 #endif
 157 // options
 158     int max_extra_cb_iterations;
 159     int skip_empty_cb;
 160     int min_min_strips;
 161     int max_max_strips;
 162     int strip_number_delta_range;
 163 } CinepakEncContext;
 164
 165 #define OFFSET(x) offsetof(CinepakEncContext, x)
 166 #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
 167 static const AVOption options[] = {
 168     { "max_extra_cb_iterations", "Max extra codebook recalculation passes, more is better and slower", OFFSET(max_extra_cb_iterations), AV_OPT_TYPE_INT, { .i64 = 2 }, 0, INT_MAX, VE },
 169     { "skip_empty_cb", "Avoid wasting bytes, ignore vintage MacOS decoder", OFFSET(skip_empty_cb), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
 170     { "max_strips", "Limit strips/frame, vintage compatible is 1..3, otherwise the more the better", OFFSET(max_max_strips), AV_OPT_TYPE_INT, { .i64 = 3 }, MIN_STRIPS, MAX_STRIPS, VE },
 171     { "min_strips", "Enforce min strips/frame, more is worse and faster, must be <= max_strips", OFFSET(min_min_strips), AV_OPT_TYPE_INT, { .i64 = MIN_STRIPS }, MIN_STRIPS, MAX_STRIPS, VE },
 172     { "strip_number_adaptivity", "How fast the strip number adapts, more is slightly better, much slower", OFFSET(strip_number_delta_range), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, MAX_STRIPS-MIN_STRIPS, VE },
 173     { NULL },
 174 };
 175
 176 static const AVClass cinepak_class = {
 177     .class_name = "cinepak",
 178     .item_name  = av_default_item_name,
 179     .option     = options,
 180     .version    = LIBAVUTIL_VERSION_INT,
 181 };
 182
 183 static av_cold int cinepak_encode_init(AVCodecContext *avctx)
 184 {
 185     CinepakEncContext *s = avctx->priv_data;
 186     int x, mb_count, strip_buf_size, frame_buf_size;
 187
 188     if (avctx->width & 3 || avctx->height & 3) {
 189         av_log(avctx, AV_LOG_ERROR, "width and height must be multiples of four (got %ix%i)\n",
 190                 avctx->width, avctx->height);
 191         return AVERROR(EINVAL);
 192     }
 193
 194     if (s->min_min_strips > s->max_max_strips) {
 195         av_log(avctx, AV_LOG_ERROR, "minimal number of strips can not exceed maximal (got %i and %i)\n",
 196                 s->min_min_strips, s->max_max_strips);
 197         return AVERROR(EINVAL);
 198     }
 199
 200     if (!(s->last_frame = av_frame_alloc()))
 201         return AVERROR(ENOMEM);
 202     if (!(s->best_frame = av_frame_alloc()))
 203         goto enomem;
 204     if (!(s->scratch_frame = av_frame_alloc()))
 205         goto enomem;
 206     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 207         if (!(s->input_frame = av_frame_alloc()))
 208             goto enomem;
 209
 210     if (!(s->codebook_input = av_malloc(sizeof(int) * (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 211         goto enomem;
 212
 213     if (!(s->codebook_closest = av_malloc(sizeof(int) * (avctx->width * avctx->height) >> 2)))
 214         goto enomem;
 215
 216     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 217         if(!(s->pict_bufs[x] = av_malloc((avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4) * (avctx->width * avctx->height) >> 2)))
 218             goto enomem;
 219
 220     mb_count = avctx->width * avctx->height / MB_AREA;
 221
 222     //the largest possible chunk is 0x31 with all MBs encoded in V4 mode
 223     //and full codebooks being replaced in INTER mode,
 224     // which is 34 bits per MB
 225     //and 2*256 extra flag bits per strip
 226     strip_buf_size = STRIP_HEADER_SIZE + 3 * CHUNK_HEADER_SIZE + 2 * VECTOR_MAX * CODEBOOK_MAX + 4 * (mb_count + (mb_count + 15) / 16) + (2 * CODEBOOK_MAX)/8;
 227
 228     frame_buf_size = CVID_HEADER_SIZE + s->max_max_strips * strip_buf_size;
 229
 230     if (!(s->strip_buf = av_malloc(strip_buf_size)))
 231         goto enomem;
 232
 233     if (!(s->frame_buf = av_malloc(frame_buf_size)))
 234         goto enomem;
 235
 236     if (!(s->mb = av_malloc_array(mb_count, sizeof(mb_info))))
 237         goto enomem;
 238
 239 #ifdef CINEPAKENC_DEBUG
 240     if (!(s->best_mb = av_malloc_array(mb_count, sizeof(mb_info))))
 241         goto enomem;
 242 #endif
 243
 244     av_lfg_init(&s->randctx, 1);
 245     s->avctx = avctx;
 246     s->w = avctx->width;
 247     s->h = avctx->height;
 248     s->frame_buf_size = frame_buf_size;
 249     s->curframe = 0;
 250     s->keyint = avctx->keyint_min;
 251     s->pix_fmt = avctx->pix_fmt;
 252
 253     //set up AVFrames
 254     s->last_frame->data[0]        = s->pict_bufs[0];
 255     s->last_frame->linesize[0]    = s->w;
 256     s->best_frame->data[0]        = s->pict_bufs[1];
 257     s->best_frame->linesize[0]    = s->w;
 258     s->scratch_frame->data[0]     = s->pict_bufs[2];
 259     s->scratch_frame->linesize[0] = s->w;
 260
 261     if (s->pix_fmt == AV_PIX_FMT_RGB24) {
 262         s->last_frame->data[1]        = s->last_frame->data[0] + s->w * s->h;
 263         s->last_frame->data[2]        = s->last_frame->data[1] + ((s->w * s->h) >> 2);
 264         s->last_frame->linesize[1]    = s->last_frame->linesize[2] = s->w >> 1;
 265
 266         s->best_frame->data[1]        = s->best_frame->data[0] + s->w * s->h;
 267         s->best_frame->data[2]        = s->best_frame->data[1] + ((s->w * s->h) >> 2);
 268         s->best_frame->linesize[1]    = s->best_frame->linesize[2] = s->w >> 1;
 269
 270         s->scratch_frame->data[1]     = s->scratch_frame->data[0] + s->w * s->h;
 271         s->scratch_frame->data[2]     = s->scratch_frame->data[1] + ((s->w * s->h) >> 2);
 272         s->scratch_frame->linesize[1] = s->scratch_frame->linesize[2] = s->w >> 1;
 273
 274         s->input_frame->data[0]       = s->pict_bufs[3];
 275         s->input_frame->linesize[0]   = s->w;
 276         s->input_frame->data[1]       = s->input_frame->data[0] + s->w * s->h;
 277         s->input_frame->data[2]       = s->input_frame->data[1] + ((s->w * s->h) >> 2);
 278         s->input_frame->linesize[1]   = s->input_frame->linesize[2] = s->w >> 1;
 279     }
 280
 281     s->min_strips = s->min_min_strips;
 282     s->max_strips = s->max_max_strips;
 283
 284 #ifdef CINEPAKENC_DEBUG
 285     s->num_v1_mode = s->num_v4_mode = s->num_mc_mode = s->num_v1_encs = s->num_v4_encs = s->num_skips = 0;
 286 #endif
 287
 288     return 0;
 289
 290 enomem:
 291     av_frame_free(&s->last_frame);
 292     av_frame_free(&s->best_frame);
 293     av_frame_free(&s->scratch_frame);
 294     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
 295         av_frame_free(&s->input_frame);
 296     av_freep(&s->codebook_input);
 297     av_freep(&s->codebook_closest);
 298     av_freep(&s->strip_buf);
 299     av_freep(&s->frame_buf);
 300     av_freep(&s->mb);
 301 #ifdef CINEPAKENC_DEBUG
 302     av_freep(&s->best_mb);
 303 #endif
 304
 305     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
 306         av_freep(&s->pict_bufs[x]);
 307
 308     return AVERROR(ENOMEM);
 309 }
 310
 311 static int64_t calculate_mode_score(CinepakEncContext *s, int h, strip_info *info, int report, int *training_set_v1_shrunk, int *training_set_v4_shrunk
 312 #ifdef CINEPAK_REPORT_SERR
 313 , int64_t *serr
 314 #endif
 315 )
 316 {
 317     //score = FF_LAMBDA_SCALE * error + lambda * bits
 318     int x;
 319     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 320     int mb_count = s->w * h / MB_AREA;
 321     mb_info *mb;
 322     int64_t score1, score2, score3;
 323     int64_t ret = s->lambda * ((info->v1_size ? CHUNK_HEADER_SIZE + info->v1_size * entry_size : 0) +
 324                    (info->v4_size ? CHUNK_HEADER_SIZE + info->v4_size * entry_size : 0) +
 325                    CHUNK_HEADER_SIZE) << 3;
 326
 327     //av_log(s->avctx, AV_LOG_INFO, "sizes %3i %3i -> %9"PRId64" score mb_count %i", info->v1_size, info->v4_size, ret, mb_count);
 328
 329 #ifdef CINEPAK_REPORT_SERR
 330     *serr = 0;
 331 #endif
 332
 333     switch(info->mode) {
 334     case MODE_V1_ONLY:
 335         //one byte per MB
 336         ret += s->lambda * 8 * mb_count;
 337
 338 // while calculating we assume all blocks are ENC_V1
 339         for(x = 0; x < mb_count; x++) {
 340             mb = &s->mb[x];
 341             ret += FF_LAMBDA_SCALE * mb->v1_error;
 342 #ifdef CINEPAK_REPORT_SERR
 343             *serr += mb->v1_error;
 344 #endif
 345 // this function is never called for report in MODE_V1_ONLY
 346 //            if(!report)
 347             mb->best_encoding = ENC_V1;
 348         }
 349
 350         break;
 351     case MODE_V1_V4:
 352         //9 or 33 bits per MB
 353         if(report) {
 354 // no moves between the corresponding training sets are allowed
 355             *training_set_v1_shrunk = *training_set_v4_shrunk = 0;
 356             for(x = 0; x < mb_count; x++) {
 357                 int mberr;
 358                 mb = &s->mb[x];
 359                 if(mb->best_encoding == ENC_V1)
 360                     score1 = s->lambda * 9  + FF_LAMBDA_SCALE * (mberr=mb->v1_error);
 361                 else
 362                     score1 = s->lambda * 33 + FF_LAMBDA_SCALE * (mberr=mb->v4_error);
 363                 ret += score1;
 364 #ifdef CINEPAK_REPORT_SERR
 365                 *serr += mberr;
 366 #endif
 367             }
 368         } else { // find best mode per block
 369             for(x = 0; x < mb_count; x++) {
 370                 mb = &s->mb[x];
 371                 score1 = s->lambda * 9  + FF_LAMBDA_SCALE * mb->v1_error;
 372                 score2 = s->lambda * 33 + FF_LAMBDA_SCALE * mb->v4_error;
 373
 374                 if(score1 <= score2) {
 375                     ret += score1;
 376 #ifdef CINEPAK_REPORT_SERR
 377                     *serr += mb->v1_error;
 378 #endif
 379                     mb->best_encoding = ENC_V1;
 380                 } else {
 381                     ret += score2;
 382 #ifdef CINEPAK_REPORT_SERR
 383                     *serr += mb->v4_error;
 384 #endif
 385                     mb->best_encoding = ENC_V4;
 386                 }
 387             }
 388         }
 389
 390         break;
 391     case MODE_MC:
 392         //1, 10 or 34 bits per MB
 393         if(report) {
 394             int v1_shrunk = 0, v4_shrunk = 0;
 395             for(x = 0; x < mb_count; x++) {
 396                 mb = &s->mb[x];
 397 // it is OK to move blocks to ENC_SKIP here
 398 // but not to any codebook encoding!
 399                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 400                 if(mb->best_encoding == ENC_SKIP) {
 401                     ret += score1;
 402 #ifdef CINEPAK_REPORT_SERR
 403                     *serr += mb->skip_error;
 404 #endif
 405                 } else if(mb->best_encoding == ENC_V1) {
 406                     if((score2=s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error) >= score1) {
 407                         mb->best_encoding = ENC_SKIP;
 408                         ++v1_shrunk;
 409                         ret += score1;
 410 #ifdef CINEPAK_REPORT_SERR
 411                         *serr += mb->skip_error;
 412 #endif
 413                     } else {
 414                         ret += score2;
 415 #ifdef CINEPAK_REPORT_SERR
 416                         *serr += mb->v1_error;
 417 #endif
 418                     }
 419                 } else {
 420                     if((score3=s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error) >= score1) {
 421                         mb->best_encoding = ENC_SKIP;
 422                         ++v4_shrunk;
 423                         ret += score1;
 424 #ifdef CINEPAK_REPORT_SERR
 425                         *serr += mb->skip_error;
 426 #endif
 427                     } else {
 428                         ret += score3;
 429 #ifdef CINEPAK_REPORT_SERR
 430                         *serr += mb->v4_error;
 431 #endif
 432                     }
 433                 }
 434             }
 435             *training_set_v1_shrunk = v1_shrunk;
 436             *training_set_v4_shrunk = v4_shrunk;
 437         } else { // find best mode per block
 438             for(x = 0; x < mb_count; x++) {
 439                 mb = &s->mb[x];
 440                 score1 = s->lambda * 1  + FF_LAMBDA_SCALE * mb->skip_error;
 441                 score2 = s->lambda * 10 + FF_LAMBDA_SCALE * mb->v1_error;
 442                 score3 = s->lambda * 34 + FF_LAMBDA_SCALE * mb->v4_error;
 443
 444                 if(score1 <= score2 && score1 <= score3) {
 445                     ret += score1;
 446 #ifdef CINEPAK_REPORT_SERR
 447                     *serr += mb->skip_error;
 448 #endif
 449                     mb->best_encoding = ENC_SKIP;
 450                 } else if(score2 <= score3) {
 451                     ret += score2;
 452 #ifdef CINEPAK_REPORT_SERR
 453                     *serr += mb->v1_error;
 454 #endif
 455                     mb->best_encoding = ENC_V1;
 456                 } else {
 457                     ret += score3;
 458 #ifdef CINEPAK_REPORT_SERR
 459                     *serr += mb->v4_error;
 460 #endif
 461                     mb->best_encoding = ENC_V4;
 462                 }
 463             }
 464         }
 465
 466         break;
 467     }
 468
 469     return ret;
 470 }
 471
 472 static int write_chunk_header(unsigned char *buf, int chunk_type, int chunk_size)
 473 {
 474     buf[0] = chunk_type;
 475     AV_WB24(&buf[1], chunk_size + CHUNK_HEADER_SIZE);
 476     return CHUNK_HEADER_SIZE;
 477 }
 478
 479 static int encode_codebook(CinepakEncContext *s, int *codebook, int size, int chunk_type_yuv, int chunk_type_gray, unsigned char *buf)
 480 {
 481     int x, y, ret, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 482     int incremental_codebook_replacement_mode = 0; // hardcoded here,
 483                 // the compiler should notice that this is a constant -- rl
 484
 485     ret = write_chunk_header(buf,
 486           s->pix_fmt == AV_PIX_FMT_RGB24 ?
 487            chunk_type_yuv+(incremental_codebook_replacement_mode?1:0) :
 488            chunk_type_gray+(incremental_codebook_replacement_mode?1:0),
 489           entry_size * size
 490            + (incremental_codebook_replacement_mode?(size+31)/32*4:0) );
 491
 492 // we do codebook encoding according to the "intra" mode
 493 // but we keep the "dead" code for reference in case we will want
 494 // to use incremental codebook updates (which actually would give us
 495 // "kind of" motion compensation, especially in 1 strip/frame case) -- rl
 496 // (of course, the code will be not useful as-is)
 497     if(incremental_codebook_replacement_mode) {
 498         int flags = 0;
 499         int flagsind;
 500         for(x = 0; x < size; x++) {
 501             if(flags == 0) {
 502                 flagsind = ret;
 503                 ret += 4;
 504                 flags = 0x80000000;
 505             } else
 506                 flags = ((flags>>1) | 0x80000000);
 507             for(y = 0; y < entry_size; y++)
 508                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 509             if((flags&0xffffffff) == 0xffffffff) {
 510                 AV_WB32(&buf[flagsind], flags);
 511                 flags = 0;
 512             }
 513         }
 514         if(flags)
 515             AV_WB32(&buf[flagsind], flags);
 516     } else
 517         for(x = 0; x < size; x++)
 518             for(y = 0; y < entry_size; y++)
 519                 buf[ret++] = codebook[y + x*entry_size] ^ (y >= 4 ? 0x80 : 0);
 520
 521     return ret;
 522 }
 523
 524 //sets out to the sub picture starting at (x,y) in in
 525 static void get_sub_picture(CinepakEncContext *s, int x, int y,
 526                             uint8_t * in_data[4], int  in_linesize[4],
 527                             uint8_t *out_data[4], int out_linesize[4])
 528 {
 529     out_data[0] = in_data[0] + x + y * in_linesize[0];
 530     out_linesize[0] = in_linesize[0];
 531
 532     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 533         out_data[1] = in_data[1] + (x >> 1) + (y >> 1) * in_linesize[1];
 534         out_linesize[1] = in_linesize[1];
 535
 536         out_data[2] = in_data[2] + (x >> 1) + (y >> 1) * in_linesize[2];
 537         out_linesize[2] = in_linesize[2];
 538     }
 539 }
 540
 541 //decodes the V1 vector in mb into the 4x4 MB pointed to by data
 542 static void decode_v1_vector(CinepakEncContext *s, uint8_t *data[4],
 543                              int linesize[4], int v1_vector, strip_info *info)
 544 {
 545     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 546
 547     data[0][0] =
 548             data[0][1] =
 549             data[0][    linesize[0]] =
 550             data[0][1+  linesize[0]] = info->v1_codebook[v1_vector*entry_size];
 551
 552     data[0][2] =
 553             data[0][3] =
 554             data[0][2+  linesize[0]] =
 555             data[0][3+  linesize[0]] = info->v1_codebook[v1_vector*entry_size+1];
 556
 557     data[0][2*linesize[0]] =
 558             data[0][1+2*linesize[0]] =
 559             data[0][  3*linesize[0]] =
 560             data[0][1+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+2];
 561
 562     data[0][2+2*linesize[0]] =
 563             data[0][3+2*linesize[0]] =
 564             data[0][2+3*linesize[0]] =
 565             data[0][3+3*linesize[0]] = info->v1_codebook[v1_vector*entry_size+3];
 566
 567     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 568         data[1][0] =
 569             data[1][1] =
 570             data[1][    linesize[1]] =
 571             data[1][1+  linesize[1]] = info->v1_codebook[v1_vector*entry_size+4];
 572
 573         data[2][0] =
 574             data[2][1] =
 575             data[2][    linesize[2]] =
 576             data[2][1+  linesize[2]] = info->v1_codebook[v1_vector*entry_size+5];
 577     }
 578 }
 579
 580 //decodes the V4 vectors in mb into the 4x4 MB pointed to by data
 581 static void decode_v4_vector(CinepakEncContext *s, uint8_t *data[4],
 582                              int linesize[4], int *v4_vector, strip_info *info)
 583 {
 584     int i, x, y, entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 585
 586     for(i = y = 0; y < 4; y += 2) {
 587         for(x = 0; x < 4; x += 2, i++) {
 588             data[0][x   +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size];
 589             data[0][x+1 +     y*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+1];
 590             data[0][x   + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+2];
 591             data[0][x+1 + (y+1)*linesize[0]] = info->v4_codebook[v4_vector[i]*entry_size+3];
 592
 593             if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 594                 data[1][(x>>1) + (y>>1)*linesize[1]] = info->v4_codebook[v4_vector[i]*entry_size+4];
 595                 data[2][(x>>1) + (y>>1)*linesize[2]] = info->v4_codebook[v4_vector[i]*entry_size+5];
 596             }
 597         }
 598     }
 599 }
 600
 601 static void copy_mb(CinepakEncContext *s,
 602                     uint8_t *a_data[4], int a_linesize[4],
 603                     uint8_t *b_data[4], int b_linesize[4])
 604 {
 605     int y, p;
 606
 607     for(y = 0; y < MB_SIZE; y++) {
 608         memcpy(a_data[0]+y*a_linesize[0], b_data[0]+y*b_linesize[0],
 609                MB_SIZE);
 610     }
 611
 612     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 613         for(p = 1; p <= 2; p++) {
 614             for(y = 0; y < MB_SIZE/2; y++) {
 615                 memcpy(a_data[p] + y*a_linesize[p],
 616                        b_data[p] + y*b_linesize[p],
 617                        MB_SIZE/2);
 618             }
 619         }
 620     }
 621 }
 622
 623 static int encode_mode(CinepakEncContext *s, int h,
 624                        uint8_t *scratch_data[4], int scratch_linesize[4],
 625                        uint8_t *last_data[4], int last_linesize[4],
 626                        strip_info *info, unsigned char *buf)
 627 {
 628     int x, y, z, flags, bits, temp_size, header_ofs, ret = 0, mb_count = s->w * h / MB_AREA;
 629     int needs_extra_bit, should_write_temp;
 630     unsigned char temp[64]; //32/2 = 16 V4 blocks at 4 B each -> 64 B
 631     mb_info *mb;
 632     uint8_t *sub_scratch_data[4] = {0}, *sub_last_data[4] = {0};
 633     int sub_scratch_linesize[4] = {0}, sub_last_linesize[4] = {0};
 634
 635     //encode codebooks
 636 ////// MacOS vintage decoder compatibility dictates the presence of
 637 ////// the codebook chunk even when the codebook is empty - pretty dumb...
 638 ////// and also the certain order of the codebook chunks -- rl
 639     if(info->v4_size || !s->skip_empty_cb)
 640         ret += encode_codebook(s, info->v4_codebook, info->v4_size, 0x20, 0x24, buf + ret);
 641
 642     if(info->v1_size || !s->skip_empty_cb)
 643         ret += encode_codebook(s, info->v1_codebook, info->v1_size, 0x22, 0x26, buf + ret);
 644
 645     //update scratch picture
 646     for(z = y = 0; y < h; y += MB_SIZE) {
 647         for(x = 0; x < s->w; x += MB_SIZE, z++) {
 648             mb = &s->mb[z];
 649
 650             get_sub_picture(s, x, y, scratch_data, scratch_linesize,
 651                             sub_scratch_data, sub_scratch_linesize);
 652
 653             if(info->mode == MODE_MC && mb->best_encoding == ENC_SKIP) {
 654                 get_sub_picture(s, x, y,
 655                                 last_data, last_linesize,
 656                                 sub_last_data, sub_last_linesize);
 657                 copy_mb(s, sub_scratch_data, sub_scratch_linesize,
 658                         sub_last_data, sub_last_linesize);
 659             } else if(info->mode == MODE_V1_ONLY || mb->best_encoding == ENC_V1)
 660                 decode_v1_vector(s, sub_scratch_data, sub_scratch_linesize,
 661                                  mb->v1_vector, info);
 662             else
 663                 decode_v4_vector(s, sub_scratch_data, sub_scratch_linesize,
 664                                  mb->v4_vector, info);
 665         }
 666     }
 667
 668     switch(info->mode) {
 669     case MODE_V1_ONLY:
 670         //av_log(s->avctx, AV_LOG_INFO, "mb_count = %i\n", mb_count);
 671         ret += write_chunk_header(buf + ret, 0x32, mb_count);
 672
 673         for(x = 0; x < mb_count; x++)
 674             buf[ret++] = s->mb[x].v1_vector;
 675
 676         break;
 677     case MODE_V1_V4:
 678         //remember header position
 679         header_ofs = ret;
 680         ret += CHUNK_HEADER_SIZE;
 681
 682         for(x = 0; x < mb_count; x += 32) {
 683             flags = 0;
 684             for(y = x; y < FFMIN(x+32, mb_count); y++)
 685                 if(s->mb[y].best_encoding == ENC_V4)
 686                     flags |= 1 << (31 - y + x);
 687
 688             AV_WB32(&buf[ret], flags);
 689             ret += 4;
 690
 691             for(y = x; y < FFMIN(x+32, mb_count); y++) {
 692                 mb = &s->mb[y];
 693
 694                 if(mb->best_encoding == ENC_V1)
 695                     buf[ret++] = mb->v1_vector;
 696                 else
 697                     for(z = 0; z < 4; z++)
 698                         buf[ret++] = mb->v4_vector[z];
 699             }
 700         }
 701
 702         write_chunk_header(buf + header_ofs, 0x30, ret - header_ofs - CHUNK_HEADER_SIZE);
 703
 704         break;
 705     case MODE_MC:
 706         //remember header position
 707         header_ofs = ret;
 708         ret += CHUNK_HEADER_SIZE;
 709         flags = bits = temp_size = 0;
 710
 711         for(x = 0; x < mb_count; x++) {
 712             mb = &s->mb[x];
 713             flags |= (mb->best_encoding != ENC_SKIP) << (31 - bits++);
 714             needs_extra_bit = 0;
 715             should_write_temp = 0;
 716
 717             if(mb->best_encoding != ENC_SKIP) {
 718                 if(bits < 32)
 719                     flags |= (mb->best_encoding == ENC_V4) << (31 - bits++);
 720                 else
 721                     needs_extra_bit = 1;
 722             }
 723
 724             if(bits == 32) {
 725                 AV_WB32(&buf[ret], flags);
 726                 ret += 4;
 727                 flags = bits = 0;
 728
 729                 if(mb->best_encoding == ENC_SKIP || needs_extra_bit) {
 730                     memcpy(&buf[ret], temp, temp_size);
 731                     ret += temp_size;
 732                     temp_size = 0;
 733                 } else
 734                     should_write_temp = 1;
 735             }
 736
 737             if(needs_extra_bit) {
 738                 flags = (mb->best_encoding == ENC_V4) << 31;
 739                 bits = 1;
 740             }
 741
 742             if(mb->best_encoding == ENC_V1)
 743                 temp[temp_size++] = mb->v1_vector;
 744             else if(mb->best_encoding == ENC_V4)
 745                 for(z = 0; z < 4; z++)
 746                     temp[temp_size++] = mb->v4_vector[z];
 747
 748             if(should_write_temp) {
 749                 memcpy(&buf[ret], temp, temp_size);
 750                 ret += temp_size;
 751                 temp_size = 0;
 752             }
 753         }
 754
 755         if(bits > 0) {
 756             AV_WB32(&buf[ret], flags);
 757             ret += 4;
 758             memcpy(&buf[ret], temp, temp_size);
 759             ret += temp_size;
 760         }
 761
 762         write_chunk_header(buf + header_ofs, 0x31, ret - header_ofs - CHUNK_HEADER_SIZE);
 763
 764         break;
 765     }
 766
 767     return ret;
 768 }
 769
 770 //computes distortion of 4x4 MB in b compared to a
 771 static int compute_mb_distortion(CinepakEncContext *s,
 772                                  uint8_t *a_data[4], int a_linesize[4],
 773                                  uint8_t *b_data[4], int b_linesize[4])
 774 {
 775     int x, y, p, d, ret = 0;
 776
 777     for(y = 0; y < MB_SIZE; y++) {
 778         for(x = 0; x < MB_SIZE; x++) {
 779             d = a_data[0][x + y*a_linesize[0]] - b_data[0][x + y*b_linesize[0]];
 780             ret += d*d;
 781         }
 782     }
 783
 784     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
 785         for(p = 1; p <= 2; p++) {
 786             for(y = 0; y < MB_SIZE/2; y++) {
 787                 for(x = 0; x < MB_SIZE/2; x++) {
 788                     d = a_data[p][x + y*a_linesize[p]] - b_data[p][x + y*b_linesize[p]];
 789                     ret += d*d;
 790                 }
 791             }
 792         }
 793     }
 794
 795     return ret;
 796 }
 797
 798 // return the possibly adjusted size of the codebook
 799 #define CERTAIN(x) ((x)!=ENC_UNCERTAIN)
 800 static int quantize(CinepakEncContext *s, int h,
 801                     uint8_t *data[4], int linesize[4],
 802                     int v1mode, strip_info *info,
 803                     mb_encoding encoding)
 804 {
 805     int x, y, i, j, k, x2, y2, x3, y3, plane, shift, mbn;
 806     int entry_size = s->pix_fmt == AV_PIX_FMT_RGB24 ? 6 : 4;
 807     int *codebook = v1mode ? info->v1_codebook : info->v4_codebook;
 808     int size = v1mode ? info->v1_size : info->v4_size;
 809     int64_t total_error = 0;
 810     uint8_t vq_pict_buf[(MB_AREA*3)/2];
 811     uint8_t *sub_data    [4], *vq_data    [4];
 812     int      sub_linesize[4],  vq_linesize[4];
 813
 814     for(mbn = i = y = 0; y < h; y += MB_SIZE) {
 815         for(x = 0; x < s->w; x += MB_SIZE, ++mbn) {
 816             int *base;
 817
 818             if(CERTAIN(encoding)) {
 819 // use for the training only the blocks known to be to be encoded [sic:-]
 820                if(s->mb[mbn].best_encoding != encoding) continue;
 821             }
 822
 823             base = s->codebook_input + i*entry_size;
 824             if(v1mode) {
 825                 //subsample
 826                 for(j = y2 = 0; y2 < entry_size; y2 += 2) {
 827                     for(x2 = 0; x2 < 4; x2 += 2, j++) {
 828                         plane = y2 < 4 ? 0 : 1 + (x2 >> 1);
 829                         shift = y2 < 4 ? 0 : 1;
 830                         x3 = shift ? 0 : x2;
 831                         y3 = shift ? 0 : y2;
 832                         base[j] = (data[plane][((x+x3) >> shift) +      ((y+y3) >> shift)      * linesize[plane]] +
 833                                    data[plane][((x+x3) >> shift) + 1 +  ((y+y3) >> shift)      * linesize[plane]] +
 834                                    data[plane][((x+x3) >> shift) +     (((y+y3) >> shift) + 1) * linesize[plane]] +
 835                                    data[plane][((x+x3) >> shift) + 1 + (((y+y3) >> shift) + 1) * linesize[plane]]) >> 2;
 836                     }
 837                 }
 838             } else {
 839                 //copy
 840                 for(j = y2 = 0; y2 < MB_SIZE; y2 += 2) {
 841                     for(x2 = 0; x2 < MB_SIZE; x2 += 2) {
 842                         for(k = 0; k < entry_size; k++, j++) {
 843                             plane = k >= 4 ? k - 3 : 0;
 844
 845                             if(k >= 4) {
 846                                 x3 = (x+x2) >> 1;
 847                                 y3 = (y+y2) >> 1;
 848                             } else {
 849                                 x3 = x + x2 + (k & 1);
 850                                 y3 = y + y2 + (k >> 1);
 851                             }
 852
 853                             base[j] = data[plane][x3 + y3*linesize[plane]];
 854                         }
 855                     }
 856                 }
 857             }
 858             i += v1mode ? 1 : 4;
 859         }
 860     }
 861 //    if(i < mbn*(v1mode ? 1 : 4)) {
 862 //        av_log(s->avctx, AV_LOG_INFO, "reducing training set for %s from %i to %i (encoding %i)\n", v1mode?"v1":"v4", mbn*(v1mode ? 1 : 4), i, encoding);
 863 //    }
 864
 865     if(i == 0) // empty training set, nothing to do
 866         return 0;
 867     if(i < size) {
 868         //av_log(s->avctx, (CERTAIN(encoding) ? AV_LOG_ERROR : AV_LOG_INFO), "WOULD WASTE: %s cbsize %i bigger than training set size %i (encoding %i)\n", v1mode?"v1":"v4", size, i, encoding);
 869         size = i;
 870     }
 871
 872     avpriv_init_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 873     avpriv_do_elbg(s->codebook_input, entry_size, i, codebook, size, 1, s->codebook_closest, &s->randctx);
 874
 875     //setup vq_data, which contains a single MB
 876     vq_data[0] = vq_pict_buf;
 877     vq_linesize[0] = MB_SIZE;
 878     vq_data[1] = &vq_pict_buf[MB_AREA];
 879     vq_data[2] = vq_data[1] + (MB_AREA >> 2);
 880     vq_linesize[1] = vq_linesize[2] = MB_SIZE >> 1;
 881
 882     //copy indices
 883     for(i = j = y = 0; y < h; y += MB_SIZE) {
 884         for(x = 0; x < s->w; x += MB_SIZE, j++) {
 885             mb_info *mb = &s->mb[j];
 886 // skip uninteresting blocks if we know their preferred encoding
 887             if(CERTAIN(encoding) && mb->best_encoding != encoding)
 888                 continue;
 889
 890             //point sub_data to current MB
 891             get_sub_picture(s, x, y, data, linesize, sub_data, sub_linesize);
 892
 893             if(v1mode) {
 894                 mb->v1_vector = s->codebook_closest[i];
 895
 896                 //fill in vq_data with V1 data
 897                 decode_v1_vector(s, vq_data, vq_linesize, mb->v1_vector, info);
 898
 899                 mb->v1_error = compute_mb_distortion(s, sub_data, sub_linesize,
 900                                                      vq_data, vq_linesize);
 901                 total_error += mb->v1_error;
 902             } else {
 903                 for(k = 0; k < 4; k++)
 904                     mb->v4_vector[k] = s->codebook_closest[i+k];
 905
 906                 //fill in vq_data with V4 data
 907                 decode_v4_vector(s, vq_data, vq_linesize, mb->v4_vector, info);
 908
 909                 mb->v4_error = compute_mb_distortion(s, sub_data, sub_linesize,
 910                                                      vq_data, vq_linesize);
 911                 total_error += mb->v4_error;
 912             }
 913             i += v1mode ? 1 : 4;
 914         }
 915     }
 916 // check that we did it right in the beginning of the function
 917     av_assert0(i >= size); // training set is no smaller than the codebook
 918
 919     //av_log(s->avctx, AV_LOG_INFO, "isv1 %i size= %i i= %i error %"PRId64"\n", v1mode, size, i, total_error);
 920
 921     return size;
 922 }
 923
 924 static void calculate_skip_errors(CinepakEncContext *s, int h,
 925                                   uint8_t *last_data[4], int last_linesize[4],
 926                                   uint8_t *data[4], int linesize[4],
 927                                   strip_info *info)
 928 {
 929     int x, y, i;
 930     uint8_t *sub_last_data    [4], *sub_pict_data    [4];
 931     int      sub_last_linesize[4],  sub_pict_linesize[4];
 932
 933     for(i = y = 0; y < h; y += MB_SIZE) {
 934         for(x = 0; x < s->w; x += MB_SIZE, i++) {
 935             get_sub_picture(s, x, y, last_data,     last_linesize,
 936                                  sub_last_data, sub_last_linesize);
 937             get_sub_picture(s, x, y,      data,          linesize,
 938                                  sub_pict_data, sub_pict_linesize);
 939
 940             s->mb[i].skip_error = compute_mb_distortion(s,
 941                                             sub_last_data, sub_last_linesize,
 942                                             sub_pict_data, sub_pict_linesize);
 943         }
 944     }
 945 }
 946
 947 static void write_strip_header(CinepakEncContext *s, int y, int h, int keyframe, unsigned char *buf, int strip_size)
 948 {
 949 // actually we are exclusively using intra strip coding (how much can we win
 950 // otherwise? how to choose which part of a codebook to update?),
 951 // keyframes are different only because we disallow ENC_SKIP on them -- rl
 952 // (besides, the logic here used to be inverted: )
 953 //    buf[0] = keyframe ? 0x11: 0x10;
 954     buf[0] = keyframe ? 0x10: 0x11;
 955     AV_WB24(&buf[1], strip_size + STRIP_HEADER_SIZE);
 956 //    AV_WB16(&buf[4], y); /* using absolute y values works -- rl */
 957     AV_WB16(&buf[4], 0); /* using relative values works as well -- rl */
 958     AV_WB16(&buf[6], 0);
 959 //    AV_WB16(&buf[8], y+h); /* using absolute y values works -- rl */
 960     AV_WB16(&buf[8], h); /* using relative values works as well -- rl */
 961     AV_WB16(&buf[10], s->w);
 962     //av_log(s->avctx, AV_LOG_INFO, "write_strip_header() %x keyframe=%d\n", buf[0], keyframe);
 963 }
 964
 965 static int rd_strip(CinepakEncContext *s, int y, int h, int keyframe,
 966                     uint8_t *last_data[4], int last_linesize[4],
 967                     uint8_t *data[4], int linesize[4],
 968                     uint8_t *scratch_data[4], int scratch_linesize[4],
 969                     unsigned char *buf, int64_t *best_score
 970 #ifdef CINEPAK_REPORT_SERR
 971 , int64_t *best_serr
 972 #endif
 973 )
 974 {
 975     int64_t score = 0;
 976 #ifdef CINEPAK_REPORT_SERR
 977     int64_t serr;
 978 #endif
 979     int best_size = 0;
 980     strip_info info;
 981 // for codebook optimization:
 982     int v1enough, v1_size, v4enough, v4_size;
 983     int new_v1_size, new_v4_size;
 984     int v1shrunk, v4shrunk;
 985
 986     if(!keyframe)
 987         calculate_skip_errors(s, h, last_data, last_linesize, data, linesize,
 988                               &info);
 989
 990     //try some powers of 4 for the size of the codebooks
 991     //constraint the v4 codebook to be no bigger than v1 one,
 992     //(and no less than v1_size/4)
 993     //thus making v1 preferable and possibly losing small details? should be ok
 994 #define SMALLEST_CODEBOOK 1
 995     for(v1enough = 0, v1_size = SMALLEST_CODEBOOK; v1_size <= CODEBOOK_MAX && !v1enough; v1_size <<= 2) {
 996         for(v4enough = 0, v4_size = 0; v4_size <= v1_size && !v4enough; v4_size = v4_size ? v4_size << 2 : v1_size >= SMALLEST_CODEBOOK << 2 ? v1_size >> 2 : SMALLEST_CODEBOOK) {
 997             CinepakMode mode;
 998             //try all modes
 999             for(mode = 0; mode < MODE_COUNT; mode++) {
1000                 //don't allow MODE_MC in intra frames
1001                 if(keyframe && mode == MODE_MC)
1002                     continue;
1003
1004                 if(mode == MODE_V1_ONLY) {
1005                     info.v1_size = v1_size;
1006 // the size may shrink even before optimizations if the input is short:
1007                     info.v1_size = quantize(s, h, data, linesize, 1,
1008                                             &info, ENC_UNCERTAIN);
1009                     if(info.v1_size < v1_size)
1010 // too few eligible blocks, no sense in trying bigger sizes
1011                         v1enough = 1;
1012
1013                     info.v4_size = 0;
1014                 } else { // mode != MODE_V1_ONLY
1015                     // if v4 codebook is empty then only allow V1-only mode
1016                     if(!v4_size)
1017                         continue;
1018
1019                     if(mode == MODE_V1_V4) {
1020                         info.v4_size = v4_size;
1021                         info.v4_size = quantize(s, h, data, linesize, 0,
1022                                                 &info, ENC_UNCERTAIN);
1023                         if(info.v4_size < v4_size)
1024 // too few eligible blocks, no sense in trying bigger sizes
1025                             v4enough = 1;
1026                     }
1027                 }
1028
1029                 info.mode = mode;
1030 // choose the best encoding per block, based on current experience
1031                 score = calculate_mode_score(s, h, &info, 0,
1032                                              &v1shrunk, &v4shrunk
1033 #ifdef CINEPAK_REPORT_SERR
1034 , &serr
1035 #endif
1036 );
1037
1038                 if(mode != MODE_V1_ONLY){
1039                     int extra_iterations_limit = s->max_extra_cb_iterations;
1040 // recompute the codebooks, omitting the extra blocks
1041 // we assume we _may_ come here with more blocks to encode than before
1042                     info.v1_size = v1_size;
1043                     new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1044                     if(new_v1_size < info.v1_size){
1045                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1046                         info.v1_size = new_v1_size;
1047                     }
1048 // we assume we _may_ come here with more blocks to encode than before
1049                     info.v4_size = v4_size;
1050                     new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1051                     if(new_v4_size < info.v4_size) {
1052                         //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries at first iteration\n", mode, v1_size, v4_size, new_v4_size);
1053                         info.v4_size = new_v4_size;
1054                     }
1055 // calculate the resulting score
1056 // (do not move blocks to codebook encodings now, as some blocks may have
1057 // got bigger errors despite a smaller training set - but we do not
1058 // ever grow the training sets back)
1059                     for(;;) {
1060                         score = calculate_mode_score(s, h, &info, 1,
1061                                                      &v1shrunk, &v4shrunk
1062 #ifdef CINEPAK_REPORT_SERR
1063 , &serr
1064 #endif
1065 );
1066 // do we have a reason to reiterate? if so, have we reached the limit?
1067                         if((!v1shrunk && !v4shrunk) || !extra_iterations_limit--) break;
1068 // recompute the codebooks, omitting the extra blocks
1069                         if(v1shrunk) {
1070                             info.v1_size = v1_size;
1071                             new_v1_size = quantize(s, h, data, linesize, 1, &info, ENC_V1);
1072                             if(new_v1_size < info.v1_size){
1073                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v1 codebook to %i entries\n", mode, v1_size, v4_size, new_v1_size);
1074                                 info.v1_size = new_v1_size;
1075                             }
1076                         }
1077                         if(v4shrunk) {
1078                             info.v4_size = v4_size;
1079                             new_v4_size = quantize(s, h, data, linesize, 0, &info, ENC_V4);
1080                             if(new_v4_size < info.v4_size) {
1081                                 //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: cut v4 codebook to %i entries\n", mode, v1_size, v4_size, new_v4_size);
1082                                 info.v4_size = new_v4_size;
1083                             }
1084                         }
1085                     }
1086                 }
1087
1088                 //av_log(s->avctx, AV_LOG_INFO, "%3i %3i score = %"PRId64"\n", v1_size, v4_size, score);
1089
1090                 if(best_size == 0 || score < *best_score) {
1091
1092                     *best_score = score;
1093 #ifdef CINEPAK_REPORT_SERR
1094                     *best_serr = serr;
1095 #endif
1096                     best_size = encode_mode(s, h,
1097                                             scratch_data, scratch_linesize,
1098                                             last_data, last_linesize, &info,
1099                                             s->strip_buf + STRIP_HEADER_SIZE);
1100
1101                     //av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B", mode, info.v1_size, info.v4_size, score, best_size);
1102                     //av_log(s->avctx, AV_LOG_INFO, "\n");
1103 #ifdef CINEPAK_REPORT_SERR
1104                     av_log(s->avctx, AV_LOG_INFO, "mode %i, %3i, %3i: %18"PRId64" %i B\n", mode, v1_size, v4_size, serr, best_size);
1105 #endif
1106
1107 #ifdef CINEPAKENC_DEBUG
1108                     //save MB encoding choices
1109                     memcpy(s->best_mb, s->mb, mb_count*sizeof(mb_info));
1110 #endif
1111
1112                     //memcpy(strip_temp + STRIP_HEADER_SIZE, strip_temp, best_size);
1113                     write_strip_header(s, y, h, keyframe, s->strip_buf, best_size);
1114
1115                 }
1116             }
1117         }
1118     }
1119
1120 #ifdef CINEPAKENC_DEBUG
1121     //gather stats. this will only work properly of MAX_STRIPS == 1
1122     if(best_info.mode == MODE_V1_ONLY) {
1123         s->num_v1_mode++;
1124         s->num_v1_encs += s->w*h/MB_AREA;
1125     } else {
1126         if(best_info.mode == MODE_V1_V4)
1127             s->num_v4_mode++;
1128         else
1129             s->num_mc_mode++;
1130
1131         int x;
1132         for(x = 0; x < s->w*h/MB_AREA; x++)
1133             if(s->best_mb[x].best_encoding == ENC_V1)
1134                 s->num_v1_encs++;
1135             else if(s->best_mb[x].best_encoding == ENC_V4)
1136                 s->num_v4_encs++;
1137             else
1138                 s->num_skips++;
1139     }
1140 #endif
1141
1142     best_size += STRIP_HEADER_SIZE;
1143     memcpy(buf, s->strip_buf, best_size);
1144
1145     return best_size;
1146 }
1147
1148 static int write_cvid_header(CinepakEncContext *s, unsigned char *buf, int num_strips, int data_size, int isakeyframe)
1149 {
1150     buf[0] = isakeyframe ? 0 : 1;
1151     AV_WB24(&buf[1], data_size + CVID_HEADER_SIZE);
1152     AV_WB16(&buf[4], s->w);
1153     AV_WB16(&buf[6], s->h);
1154     AV_WB16(&buf[8], num_strips);
1155
1156     return CVID_HEADER_SIZE;
1157 }
1158
1159 static int rd_frame(CinepakEncContext *s, const AVFrame *frame,
1160                     int isakeyframe, unsigned char *buf, int buf_size)
1161 {
1162     int num_strips, strip, i, y, nexty, size, temp_size;
1163     uint8_t *last_data    [4], *data    [4], *scratch_data    [4];
1164     int      last_linesize[4],  linesize[4],  scratch_linesize[4];
1165     int64_t best_score = 0, score, score_temp;
1166 #ifdef CINEPAK_REPORT_SERR
1167     int64_t best_serr = 0, serr, serr_temp;
1168 #endif
1169
1170     int best_nstrips = -1, best_size = -1; // mark as uninitialzed
1171
1172     if(s->pix_fmt == AV_PIX_FMT_RGB24) {
1173         int x;
1174 // build a copy of the given frame in the correct colorspace
1175         for(y = 0; y < s->h; y += 2) {
1176             for(x = 0; x < s->w; x += 2) {
1177                 uint8_t *ir[2]; int32_t r, g, b, rr, gg, bb;
1178                 ir[0] = frame->data[0] + x*3 + y*frame->linesize[0];
1179                 ir[1] = ir[0] + frame->linesize[0];
1180                 get_sub_picture(s, x, y,
1181                                 s->input_frame->data, s->input_frame->linesize,
1182                                 scratch_data, scratch_linesize);
1183                 r = g = b = 0;
1184                 for(i=0; i<4; ++i) {
1185                     int i1, i2;
1186                     i1 = (i&1); i2 = (i>=2);
1187                     rr = ir[i2][i1*3+0];
1188                     gg = ir[i2][i1*3+1];
1189                     bb = ir[i2][i1*3+2];
1190                     r += rr; g += gg; b += bb;
1191 // using fixed point arithmetic for portable repeatability, scaling by 2^23
1192 // "Y"
1193 //                    rr = 0.2857*rr + 0.5714*gg + 0.1429*bb;
1194                     rr = (2396625*rr + 4793251*gg + 1198732*bb) >> 23;
1195                     if(      rr <   0) rr =   0;
1196                     else if (rr > 255) rr = 255;
1197                     scratch_data[0][i1 + i2*scratch_linesize[0]] = rr;
1198                 }
1199 // let us scale down as late as possible
1200 //                r /= 4; g /= 4; b /= 4;
1201 // "U"
1202 //                rr = -0.1429*r - 0.2857*g + 0.4286*b;
1203                 rr = (-299683*r - 599156*g + 898839*b) >> 23;
1204                 if(      rr < -128) rr = -128;
1205                 else if (rr >  127) rr =  127;
1206                 scratch_data[1][0] = rr + 128; // quantize needs unsigned
1207 // "V"
1208 //                rr = 0.3571*r - 0.2857*g - 0.0714*b;
1209                 rr = (748893*r - 599156*g - 149737*b) >> 23;
1210                 if(      rr < -128) rr = -128;
1211                 else if (rr >  127) rr =  127;
1212                 scratch_data[2][0] = rr + 128; // quantize needs unsigned
1213             }
1214         }
1215     }
1216
1217     //would be nice but quite certainly incompatible with vintage players:
1218     // support encoding zero strips (meaning skip the whole frame)
1219     for(num_strips = s->min_strips; num_strips <= s->max_strips && num_strips <= s->h / MB_SIZE; num_strips++) {
1220         score = 0;
1221         size = 0;
1222 #ifdef CINEPAK_REPORT_SERR
1223         serr = 0;
1224 #endif
1225
1226         for(y = 0, strip = 1; y < s->h; strip++, y = nexty) {
1227             int strip_height;
1228
1229             nexty = strip * s->h / num_strips; // <= s->h
1230             //make nexty the next multiple of 4 if not already there
1231             if(nexty & 3)
1232                 nexty += 4 - (nexty & 3);
1233
1234             strip_height = nexty - y;
1235             if(strip_height <= 0) { // can this ever happen?
1236                 av_log(s->avctx, AV_LOG_INFO, "skipping zero height strip %i of %i\n", strip, num_strips);
1237                 continue;
1238             }
1239
1240             if(s->pix_fmt == AV_PIX_FMT_RGB24)
1241                 get_sub_picture(s, 0, y,
1242                                 s->input_frame->data, s->input_frame->linesize,
1243                                 data, linesize);
1244             else
1245                 get_sub_picture(s, 0, y,
1246                                 (uint8_t **)frame->data, (int*)frame->linesize,
1247                                 data, linesize);
1248             get_sub_picture(s, 0, y,
1249                             s->last_frame->data, s->last_frame->linesize,
1250                             last_data, last_linesize);
1251             get_sub_picture(s, 0, y,
1252                             s->scratch_frame->data, s->scratch_frame->linesize,
1253                             scratch_data, scratch_linesize);
1254
1255             if((temp_size = rd_strip(s, y, strip_height, isakeyframe,
1256                                      last_data, last_linesize, data, linesize,
1257                                      scratch_data, scratch_linesize,
1258                                      s->frame_buf + size + CVID_HEADER_SIZE, &score_temp
1259 #ifdef CINEPAK_REPORT_SERR
1260 , &serr_temp
1261 #endif
1262 )) < 0)
1263                 return temp_size;
1264
1265             score += score_temp;
1266 #ifdef CINEPAK_REPORT_SERR
1267             serr += serr_temp;
1268 #endif
1269             size += temp_size;
1270             //av_log(s->avctx, AV_LOG_INFO, "strip %d, isakeyframe=%d", strip, isakeyframe);
1271             //av_log(s->avctx, AV_LOG_INFO, "\n");
1272         }
1273
1274         if(best_score == 0 || score < best_score) {
1275             best_score = score;
1276 #ifdef CINEPAK_REPORT_SERR
1277             best_serr = serr;
1278 #endif
1279             best_size = size + write_cvid_header(s, s->frame_buf, num_strips, size, isakeyframe);
1280             //av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, score, best_size);
1281 #ifdef CINEPAK_REPORT_SERR
1282             av_log(s->avctx, AV_LOG_INFO, "best number of strips so far: %2i, %12"PRId64", %i B\n", num_strips, serr, best_size);
1283 #endif
1284
1285             FFSWAP(AVFrame *, s->best_frame, s->scratch_frame);
1286             memcpy(buf, s->frame_buf, best_size);
1287             best_nstrips = num_strips;
1288         }
1289 // avoid trying too many strip numbers without a real reason
1290 // (this makes the processing of the very first frame faster)
1291         if(num_strips - best_nstrips > 4)
1292             break;
1293     }
1294
1295     av_assert0(best_nstrips >= 0 && best_size >= 0);
1296
1297 // let the number of strips slowly adapt to the changes in the contents,
1298 // compared to full bruteforcing every time this will occasionally lead
1299 // to some r/d performance loss but makes encoding up to several times faster
1300     if(!s->strip_number_delta_range) {
1301         if(best_nstrips == s->max_strips) { // let us try to step up
1302             s->max_strips = best_nstrips + 1;
1303             if(s->max_strips >= s->max_max_strips)
1304                 s->max_strips = s->max_max_strips;
1305         } else { // try to step down
1306             s->max_strips = best_nstrips;
1307         }
1308         s->min_strips = s->max_strips - 1;
1309         if(s->min_strips < s->min_min_strips)
1310             s->min_strips = s->min_min_strips;
1311     } else {
1312         s->max_strips = best_nstrips + s->strip_number_delta_range;
1313         if(s->max_strips >= s->max_max_strips)
1314             s->max_strips = s->max_max_strips;
1315         s->min_strips = best_nstrips - s->strip_number_delta_range;
1316         if(s->min_strips < s->min_min_strips)
1317             s->min_strips = s->min_min_strips;
1318     }
1319
1320     return best_size;
1321 }
1322
1323 static int cinepak_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1324                                 const AVFrame *frame, int *got_packet)
1325 {
1326     CinepakEncContext *s = avctx->priv_data;
1327     int ret;
1328
1329     s->lambda = frame->quality ? frame->quality - 1 : 2 * FF_LAMBDA_SCALE;
1330
1331     if ((ret = ff_alloc_packet2(avctx, pkt, s->frame_buf_size, 0)) < 0)
1332         return ret;
1333     ret = rd_frame(s, frame, (s->curframe == 0), pkt->data, s->frame_buf_size);
1334     pkt->size = ret;
1335     if (s->curframe == 0)
1336         pkt->flags |= AV_PKT_FLAG_KEY;
1337     *got_packet = 1;
1338
1339     FFSWAP(AVFrame *, s->last_frame, s->best_frame);
1340
1341     if (++s->curframe >= s->keyint)
1342         s->curframe = 0;
1343
1344     return 0;
1345 }
1346
1347 static av_cold int cinepak_encode_end(AVCodecContext *avctx)
1348 {
1349     CinepakEncContext *s = avctx->priv_data;
1350     int x;
1351
1352     av_frame_free(&s->last_frame);
1353     av_frame_free(&s->best_frame);
1354     av_frame_free(&s->scratch_frame);
1355     if (avctx->pix_fmt == AV_PIX_FMT_RGB24)
1356         av_frame_free(&s->input_frame);
1357     av_freep(&s->codebook_input);
1358     av_freep(&s->codebook_closest);
1359     av_freep(&s->strip_buf);
1360     av_freep(&s->frame_buf);
1361     av_freep(&s->mb);
1362 #ifdef CINEPAKENC_DEBUG
1363     av_freep(&s->best_mb);
1364 #endif
1365
1366     for(x = 0; x < (avctx->pix_fmt == AV_PIX_FMT_RGB24 ? 4 : 3); x++)
1367         av_freep(&s->pict_bufs[x]);
1368
1369 #ifdef CINEPAKENC_DEBUG
1370     av_log(avctx, AV_LOG_INFO, "strip coding stats: %i V1 mode, %i V4 mode, %i MC mode (%i V1 encs, %i V4 encs, %i skips)\n",
1371         s->num_v1_mode, s->num_v4_mode, s->num_mc_mode, s->num_v1_encs, s->num_v4_encs, s->num_skips);
1372 #endif
1373
1374     return 0;
1375 }
1376
1377 AVCodec ff_cinepak_encoder = {
1378     .name           = "cinepak",
1379     .type           = AVMEDIA_TYPE_VIDEO,
1380     .id             = AV_CODEC_ID_CINEPAK,
1381     .priv_data_size = sizeof(CinepakEncContext),
1382     .init           = cinepak_encode_init,
1383     .encode2        = cinepak_encode_frame,
1384     .close          = cinepak_encode_end,
1385     .pix_fmts       = (const enum AVPixelFormat[]){AV_PIX_FMT_RGB24, AV_PIX_FMT_GRAY8, AV_PIX_FMT_NONE},
1386     .long_name      = NULL_IF_CONFIG_SMALL("Cinepak"),
1387     .priv_class     = &cinepak_class,
1388 };