git.sesse.net Git - ffmpeg/blob - libavcodec/snow.c

   1 /*
   2  * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
   3  *
   4  * This file is part of FFmpeg.
   5  *
   6  * FFmpeg is free software; you can redistribute it and/or
   7  * modify it under the terms of the GNU Lesser General Public
   8  * License as published by the Free Software Foundation; either
   9  * version 2.1 of the License, or (at your option) any later version.
  10  *
  11  * FFmpeg is distributed in the hope that it will be useful,
  12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14  * Lesser General Public License for more details.
  15  *
  16  * You should have received a copy of the GNU Lesser General Public
  17  * License along with FFmpeg; if not, write to the Free Software
  18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19  */
  20
  21 #include "avcodec.h"
  22 #include "dsputil.h"
  23 #include "snow.h"
  24
  25 #include "rangecoder.h"
  26 #include "mathops.h"
  27
  28 #include "mpegvideo.h"
  29
  30 #undef NDEBUG
  31 #include <assert.h>
  32
  33 static const int8_t quant3[256]={
  34  0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  35  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  36  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  37  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  38  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  39  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  40  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  41  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
  50 };
  51 static const int8_t quant3b[256]={
  52  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  53  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  54  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  55  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  56  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  57  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  58  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  59  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
  68 };
  69 static const int8_t quant3bA[256]={
  70  0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  71  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  72  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  73  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  74  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  75  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  76  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  77  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  78  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  79  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  80  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  81  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  82  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  83  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  84  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  85  1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
  86 };
  87 static const int8_t quant5[256]={
  88  0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  89  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  90  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  91  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  92  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  93  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  94  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  95  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
  99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
 104 };
 105 static const int8_t quant7[256]={
 106  0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 107  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 108  2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
 109  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 110  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 111  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 112  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 113  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
 119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
 120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
 121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
 122 };
 123 static const int8_t quant9[256]={
 124  0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 125  3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 126  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 127  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 128  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 129  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 130  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 131  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
 139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
 140 };
 141 static const int8_t quant11[256]={
 142  0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
 143  4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 144  4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 145  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 146  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 147  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 148  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 149  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
 156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
 157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
 158 };
 159 static const int8_t quant13[256]={
 160  0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
 161  4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 162  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
 163  5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 164  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 165  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 166  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 167  6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
 168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
 172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
 173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
 175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
 176 };
 177
 178 #if 0 //64*cubic
 179 static const uint8_t obmc32[1024]={
 180   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 181   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 182   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 183   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 184   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 185   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 186   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 187   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 188   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 189   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 190   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 191   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 192   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 193   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 194   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 195   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 196   1,  8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16,  8,  1,
 197   0,  4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16,  4,  0,
 198   0,  8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16,  8,  0,
 199   0,  4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16,  4,  0,
 200   0,  4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12,  4,  0,
 201   0,  4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12,  4,  0,
 202   0,  4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12,  4,  0,
 203   0,  4,  8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16,  8,  4,  0,
 204   0,  4,  8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16,  8,  4,  0,
 205   0,  4,  4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12,  4,  4,  0,
 206   0,  4,  4,  8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12,  8,  4,  4,  0,
 207   0,  0,  4,  8,  8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12,  8,  8,  4,  0,  0,
 208   0,  0,  4,  4,  8,  8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12,  8,  8,  4,  4,  0,  0,
 209   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 210   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 211   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 212 //error:0.000022
 213 };
 214 static const uint8_t obmc16[256]={
 215   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 216   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 217   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 218   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 219   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 220   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 221   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 222   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 223   4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24,  4,
 224   4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20,  4,
 225   0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20,  0,
 226   0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16,  0,
 227   0,  8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24,  8,  0,
 228   0,  4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16,  4,  0,
 229   0,  4,  4,  8, 16, 20, 20, 24, 24, 20, 20, 16,  8,  4,  4,  0,
 230   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 231 //error:0.000033
 232 };
 233 #elif 1 // 64*linear
 234 static const uint8_t obmc32[1024]={
 235   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 236   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 237   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 238   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 239   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 240   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 241   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 242   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 243   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 244   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 245   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 246   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 247   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 248   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 249   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 250   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 251   8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24,  8,
 252   8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20,  8,
 253   8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20,  8,
 254   8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20,  8,
 255   4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16,  4,
 256   4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16,  4,
 257   4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16,  4,
 258   4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12,  4,
 259   4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12,  4,
 260   4,  8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16,  8,  4,
 261   4,  8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12,  8,  4,
 262   4,  8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12,  8,  4,
 263   0,  4,  8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12,  8,  4,  0,
 264   0,  4,  8,  8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12,  8,  8,  4,  0,
 265   0,  4,  4,  4,  8,  8,  8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12,  8,  8,  8,  4,  4,  4,  0,
 266   0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  8,  8,  8,  8,  8,  8,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,
 267  //error:0.000020
 268 };
 269 static const uint8_t obmc16[256]={
 270   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 271   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 272   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 273   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 274   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 275  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 276  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 277  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 278  16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
 279  12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
 280  12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
 281   8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28,  8,
 282   8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20,  8,
 283   4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16,  4,
 284   4,  8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16,  8,  4,
 285   0,  4,  4,  8,  8, 12, 12, 16, 16, 12, 12,  8,  8,  4,  4,  0,
 286 //error:0.000015
 287 };
 288 #else //64*cos
 289 static const uint8_t obmc32[1024]={
 290   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 291   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 292   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 293   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 294   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 295   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 296   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 297   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 298   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 299   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 300   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 301   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 302   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 303   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 304   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 305   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 306   1,  4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16,  4,  1,
 307   0,  4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16,  4,  0,
 308   0,  4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12,  4,  0,
 309   0,  4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12,  4,  0,
 310   0,  4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12,  4,  0,
 311   0,  4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12,  4,  0,
 312   0,  4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12,  4,  0,
 313   0,  4,  8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16,  8,  4,  0,
 314   0,  4,  8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12,  8,  4,  0,
 315   0,  4,  4,  8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16,  8,  4,  4,  0,
 316   0,  0,  4,  8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12,  8,  4,  0,  0,
 317   0,  0,  4,  4,  8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12,  8,  4,  4,  0,  0,
 318   0,  0,  4,  4,  4,  8,  8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12,  8,  8,  4,  4,  4,  0,  0,
 319   0,  0,  0,  4,  4,  4,  4,  8,  8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12,  8,  8,  4,  4,  4,  4,  0,  0,  0,
 320   0,  0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  4,  4,  8,  4,  4,  8,  4,  4,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,  0,
 321   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
 322 //error:0.000022
 323 };
 324 static const uint8_t obmc16[256]={
 325   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 326   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 327   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 328   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 329   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 330   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 331   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 332   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 333   0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20,  0,
 334   4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20,  4,
 335   4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16,  4,
 336   0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12,  0,
 337   0,  8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24,  8,  0,
 338   0,  4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12,  4,  0,
 339   0,  0,  4,  8, 12, 16, 20, 20, 20, 20, 16, 12,  8,  4,  0,  0,
 340   0,  0,  0,  0,  0,  4,  4,  4,  4,  4,  4,  0,  0,  0,  0,  0,
 341 //error:0.000022
 342 };
 343 #endif /* 0 */
 344
 345 //linear *64
 346 static const uint8_t obmc8[64]={
 347   4, 12, 20, 28, 28, 20, 12,  4,
 348  12, 36, 60, 84, 84, 60, 36, 12,
 349  20, 60,100,140,140,100, 60, 20,
 350  28, 84,140,196,196,140, 84, 28,
 351  28, 84,140,196,196,140, 84, 28,
 352  20, 60,100,140,140,100, 60, 20,
 353  12, 36, 60, 84, 84, 60, 36, 12,
 354   4, 12, 20, 28, 28, 20, 12,  4,
 355 //error:0.000000
 356 };
 357
 358 //linear *64
 359 static const uint8_t obmc4[16]={
 360  16, 48, 48, 16,
 361  48,144,144, 48,
 362  48,144,144, 48,
 363  16, 48, 48, 16,
 364 //error:0.000000
 365 };
 366
 367 static const uint8_t * const obmc_tab[4]={
 368     obmc32, obmc16, obmc8, obmc4
 369 };
 370
 371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
 372
 373 typedef struct BlockNode{
 374     int16_t mx;
 375     int16_t my;
 376     uint8_t ref;
 377     uint8_t color[3];
 378     uint8_t type;
 379 //#define TYPE_SPLIT    1
 380 #define BLOCK_INTRA   1
 381 #define BLOCK_OPT     2
 382 //#define TYPE_NOCOLOR  4
 383     uint8_t level; //FIXME merge into type?
 384 }BlockNode;
 385
 386 static const BlockNode null_block= { //FIXME add border maybe
 387     .color= {128,128,128},
 388     .mx= 0,
 389     .my= 0,
 390     .ref= 0,
 391     .type= 0,
 392     .level= 0,
 393 };
 394
 395 #define LOG2_MB_SIZE 4
 396 #define MB_SIZE (1<<LOG2_MB_SIZE)
 397 #define ENCODER_EXTRA_BITS 4
 398 #define HTAPS_MAX 8
 399
 400 typedef struct x_and_coeff{
 401     int16_t x;
 402     uint16_t coeff;
 403 } x_and_coeff;
 404
 405 typedef struct SubBand{
 406     int level;
 407     int stride;
 408     int width;
 409     int height;
 410     int qlog;        ///< log(qscale)/log[2^(1/6)]
 411     DWTELEM *buf;
 412     IDWTELEM *ibuf;
 413     int buf_x_offset;
 414     int buf_y_offset;
 415     int stride_line; ///< Stride measured in lines, not pixels.
 416     x_and_coeff * x_coeff;
 417     struct SubBand *parent;
 418     uint8_t state[/*7*2*/ 7 + 512][32];
 419 }SubBand;
 420
 421 typedef struct Plane{
 422     int width;
 423     int height;
 424     SubBand band[MAX_DECOMPOSITIONS][4];
 425
 426     int htaps;
 427     int8_t hcoeff[HTAPS_MAX/2];
 428     int diag_mc;
 429     int fast_mc;
 430
 431     int last_htaps;
 432     int8_t last_hcoeff[HTAPS_MAX/2];
 433     int last_diag_mc;
 434 }Plane;
 435
 436 typedef struct SnowContext{
 437 //    MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 438
 439     AVCodecContext *avctx;
 440     RangeCoder c;
 441     DSPContext dsp;
 442     AVFrame new_picture;
 443     AVFrame input_picture;              ///< new_picture with the internal linesizes
 444     AVFrame current_picture;
 445     AVFrame last_picture[MAX_REF_FRAMES];
 446     uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
 447     AVFrame mconly_picture;
 448 //     uint8_t q_context[16];
 449     uint8_t header_state[32];
 450     uint8_t block_state[128 + 32*128];
 451     int keyframe;
 452     int always_reset;
 453     int version;
 454     int spatial_decomposition_type;
 455     int last_spatial_decomposition_type;
 456     int temporal_decomposition_type;
 457     int spatial_decomposition_count;
 458     int last_spatial_decomposition_count;
 459     int temporal_decomposition_count;
 460     int max_ref_frames;
 461     int ref_frames;
 462     int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
 463     uint32_t *ref_scores[MAX_REF_FRAMES];
 464     DWTELEM *spatial_dwt_buffer;
 465     IDWTELEM *spatial_idwt_buffer;
 466     int colorspace_type;
 467     int chroma_h_shift;
 468     int chroma_v_shift;
 469     int spatial_scalability;
 470     int qlog;
 471     int last_qlog;
 472     int lambda;
 473     int lambda2;
 474     int pass1_rc;
 475     int mv_scale;
 476     int last_mv_scale;
 477     int qbias;
 478     int last_qbias;
 479 #define QBIAS_SHIFT 3
 480     int b_width;
 481     int b_height;
 482     int block_max_depth;
 483     int last_block_max_depth;
 484     Plane plane[MAX_PLANES];
 485     BlockNode *block;
 486 #define ME_CACHE_SIZE 1024
 487     int me_cache[ME_CACHE_SIZE];
 488     int me_cache_generation;
 489     slice_buffer sb;
 490
 491     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
 492
 493     uint8_t *scratchbuf;
 494 }SnowContext;
 495
 496 typedef struct {
 497     IDWTELEM *b0;
 498     IDWTELEM *b1;
 499     IDWTELEM *b2;
 500     IDWTELEM *b3;
 501     int y;
 502 } DWTCompose;
 503
 504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
 506
 507 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
 508 {
 509     int i;
 510
 511     buf->base_buffer = base_buffer;
 512     buf->line_count = line_count;
 513     buf->line_width = line_width;
 514     buf->data_count = max_allocated_lines;
 515     buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
 516     buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
 517
 518     for(i = 0; i < max_allocated_lines; i++){
 519         buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
 520     }
 521
 522     buf->data_stack_top = max_allocated_lines - 1;
 523 }
 524
 525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
 526 {
 527     IDWTELEM * buffer;
 528
 529     assert(buf->data_stack_top >= 0);
 530 //  assert(!buf->line[line]);
 531     if (buf->line[line])
 532         return buf->line[line];
 533
 534     buffer = buf->data_stack[buf->data_stack_top];
 535     buf->data_stack_top--;
 536     buf->line[line] = buffer;
 537
 538     return buffer;
 539 }
 540
 541 static void slice_buffer_release(slice_buffer * buf, int line)
 542 {
 543     IDWTELEM * buffer;
 544
 545     assert(line >= 0 && line < buf->line_count);
 546     assert(buf->line[line]);
 547
 548     buffer = buf->line[line];
 549     buf->data_stack_top++;
 550     buf->data_stack[buf->data_stack_top] = buffer;
 551     buf->line[line] = NULL;
 552 }
 553
 554 static void slice_buffer_flush(slice_buffer * buf)
 555 {
 556     int i;
 557     for(i = 0; i < buf->line_count; i++){
 558         if (buf->line[i])
 559             slice_buffer_release(buf, i);
 560     }
 561 }
 562
 563 static void slice_buffer_destroy(slice_buffer * buf)
 564 {
 565     int i;
 566     slice_buffer_flush(buf);
 567
 568     for(i = buf->data_count - 1; i >= 0; i--){
 569         av_freep(&buf->data_stack[i]);
 570     }
 571     av_freep(&buf->data_stack);
 572     av_freep(&buf->line);
 573 }
 574
 575 #ifdef __sgi
 576 // Avoid a name clash on SGI IRIX
 577 #undef qexp
 578 #endif
 579 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
 580 static uint8_t qexp[QROOT];
 581
 582 static inline int mirror(int v, int m){
 583     while((unsigned)v > (unsigned)m){
 584         v=-v;
 585         if(v<0) v+= 2*m;
 586     }
 587     return v;
 588 }
 589
 590 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
 591     int i;
 592
 593     if(v){
 594         const int a= FFABS(v);
 595         const int e= av_log2(a);
 596 #if 1
 597         const int el= FFMIN(e, 10);
 598         put_rac(c, state+0, 0);
 599
 600         for(i=0; i<el; i++){
 601             put_rac(c, state+1+i, 1);  //1..10
 602         }
 603         for(; i<e; i++){
 604             put_rac(c, state+1+9, 1);  //1..10
 605         }
 606         put_rac(c, state+1+FFMIN(i,9), 0);
 607
 608         for(i=e-1; i>=el; i--){
 609             put_rac(c, state+22+9, (a>>i)&1); //22..31
 610         }
 611         for(; i>=0; i--){
 612             put_rac(c, state+22+i, (a>>i)&1); //22..31
 613         }
 614
 615         if(is_signed)
 616             put_rac(c, state+11 + el, v < 0); //11..21
 617 #else
 618
 619         put_rac(c, state+0, 0);
 620         if(e<=9){
 621             for(i=0; i<e; i++){
 622                 put_rac(c, state+1+i, 1);  //1..10
 623             }
 624             put_rac(c, state+1+i, 0);
 625
 626             for(i=e-1; i>=0; i--){
 627                 put_rac(c, state+22+i, (a>>i)&1); //22..31
 628             }
 629
 630             if(is_signed)
 631                 put_rac(c, state+11 + e, v < 0); //11..21
 632         }else{
 633             for(i=0; i<e; i++){
 634                 put_rac(c, state+1+FFMIN(i,9), 1);  //1..10
 635             }
 636             put_rac(c, state+1+9, 0);
 637
 638             for(i=e-1; i>=0; i--){
 639                 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
 640             }
 641
 642             if(is_signed)
 643                 put_rac(c, state+11 + 10, v < 0); //11..21
 644         }
 645 #endif /* 1 */
 646     }else{
 647         put_rac(c, state+0, 1);
 648     }
 649 }
 650
 651 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
 652     if(get_rac(c, state+0))
 653         return 0;
 654     else{
 655         int i, e, a;
 656         e= 0;
 657         while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
 658             e++;
 659         }
 660
 661         a= 1;
 662         for(i=e-1; i>=0; i--){
 663             a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
 664         }
 665
 666         e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
 667         return (a^e)-e;
 668     }
 669 }
 670
 671 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
 672     int i;
 673     int r= log2>=0 ? 1<<log2 : 1;
 674
 675     assert(v>=0);
 676     assert(log2>=-4);
 677
 678     while(v >= r){
 679         put_rac(c, state+4+log2, 1);
 680         v -= r;
 681         log2++;
 682         if(log2>0) r+=r;
 683     }
 684     put_rac(c, state+4+log2, 0);
 685
 686     for(i=log2-1; i>=0; i--){
 687         put_rac(c, state+31-i, (v>>i)&1);
 688     }
 689 }
 690
 691 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
 692     int i;
 693     int r= log2>=0 ? 1<<log2 : 1;
 694     int v=0;
 695
 696     assert(log2>=-4);
 697
 698     while(get_rac(c, state+4+log2)){
 699         v+= r;
 700         log2++;
 701         if(log2>0) r+=r;
 702     }
 703
 704     for(i=log2-1; i>=0; i--){
 705         v+= get_rac(c, state+31-i)<<i;
 706     }
 707
 708     return v;
 709 }
 710
 711 static av_always_inline void
 712 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 713      int dst_step, int src_step, int ref_step,
 714      int width, int mul, int add, int shift,
 715      int highpass, int inverse){
 716     const int mirror_left= !highpass;
 717     const int mirror_right= (width&1) ^ highpass;
 718     const int w= (width>>1) - 1 + (highpass & width);
 719     int i;
 720
 721 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 722     if(mirror_left){
 723         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 724         dst += dst_step;
 725         src += src_step;
 726     }
 727
 728     for(i=0; i<w; i++){
 729         dst[i*dst_step] =
 730             LIFT(src[i*src_step],
 731                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 732                  inverse);
 733     }
 734
 735     if(mirror_right){
 736         dst[w*dst_step] =
 737             LIFT(src[w*src_step],
 738                  ((mul*2*ref[w*ref_step]+add)>>shift),
 739                  inverse);
 740     }
 741 }
 742
 743 static av_always_inline void
 744 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 745          int dst_step, int src_step, int ref_step,
 746          int width, int mul, int add, int shift,
 747          int highpass, int inverse){
 748     const int mirror_left= !highpass;
 749     const int mirror_right= (width&1) ^ highpass;
 750     const int w= (width>>1) - 1 + (highpass & width);
 751     int i;
 752
 753 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
 754     if(mirror_left){
 755         dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
 756         dst += dst_step;
 757         src += src_step;
 758     }
 759
 760     for(i=0; i<w; i++){
 761         dst[i*dst_step] =
 762             LIFT(src[i*src_step],
 763                  ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
 764                  inverse);
 765     }
 766
 767     if(mirror_right){
 768         dst[w*dst_step] =
 769             LIFT(src[w*src_step],
 770                  ((mul*2*ref[w*ref_step]+add)>>shift),
 771                  inverse);
 772     }
 773 }
 774
 775 #ifndef liftS
 776 static av_always_inline void
 777 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
 778       int dst_step, int src_step, int ref_step,
 779       int width, int mul, int add, int shift,
 780       int highpass, int inverse){
 781     const int mirror_left= !highpass;
 782     const int mirror_right= (width&1) ^ highpass;
 783     const int w= (width>>1) - 1 + (highpass & width);
 784     int i;
 785
 786     assert(shift == 4);
 787 #define LIFTS(src, ref, inv) \
 788         ((inv) ? \
 789             (src) + (((ref) + 4*(src))>>shift): \
 790             -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 791     if(mirror_left){
 792         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 793         dst += dst_step;
 794         src += src_step;
 795     }
 796
 797     for(i=0; i<w; i++){
 798         dst[i*dst_step] =
 799             LIFTS(src[i*src_step],
 800                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 801                   inverse);
 802     }
 803
 804     if(mirror_right){
 805         dst[w*dst_step] =
 806             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 807     }
 808 }
 809 static av_always_inline void
 810 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
 811           int dst_step, int src_step, int ref_step,
 812           int width, int mul, int add, int shift,
 813           int highpass, int inverse){
 814     const int mirror_left= !highpass;
 815     const int mirror_right= (width&1) ^ highpass;
 816     const int w= (width>>1) - 1 + (highpass & width);
 817     int i;
 818
 819     assert(shift == 4);
 820 #define LIFTS(src, ref, inv) \
 821     ((inv) ? \
 822         (src) + (((ref) + 4*(src))>>shift): \
 823         -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
 824     if(mirror_left){
 825         dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
 826         dst += dst_step;
 827         src += src_step;
 828     }
 829
 830     for(i=0; i<w; i++){
 831         dst[i*dst_step] =
 832             LIFTS(src[i*src_step],
 833                   mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
 834                   inverse);
 835     }
 836
 837     if(mirror_right){
 838         dst[w*dst_step] =
 839             LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
 840     }
 841 }
 842 #endif /* ! liftS */
 843
 844 static void horizontal_decompose53i(DWTELEM *b, int width){
 845     DWTELEM temp[width];
 846     const int width2= width>>1;
 847     int x;
 848     const int w2= (width+1)>>1;
 849
 850     for(x=0; x<width2; x++){
 851         temp[x   ]= b[2*x    ];
 852         temp[x+w2]= b[2*x + 1];
 853     }
 854     if(width&1)
 855         temp[x   ]= b[2*x    ];
 856 #if 0
 857     {
 858     int A1,A2,A3,A4;
 859     A2= temp[1       ];
 860     A4= temp[0       ];
 861     A1= temp[0+width2];
 862     A1 -= (A2 + A4)>>1;
 863     A4 += (A1 + 1)>>1;
 864     b[0+width2] = A1;
 865     b[0       ] = A4;
 866     for(x=1; x+1<width2; x+=2){
 867         A3= temp[x+width2];
 868         A4= temp[x+1     ];
 869         A3 -= (A2 + A4)>>1;
 870         A2 += (A1 + A3 + 2)>>2;
 871         b[x+width2] = A3;
 872         b[x       ] = A2;
 873
 874         A1= temp[x+1+width2];
 875         A2= temp[x+2       ];
 876         A1 -= (A2 + A4)>>1;
 877         A4 += (A1 + A3 + 2)>>2;
 878         b[x+1+width2] = A1;
 879         b[x+1       ] = A4;
 880     }
 881     A3= temp[width-1];
 882     A3 -= A2;
 883     A2 += (A1 + A3 + 2)>>2;
 884     b[width -1] = A3;
 885     b[width2-1] = A2;
 886     }
 887 #else
 888     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
 889     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
 890 #endif /* 0 */
 891 }
 892
 893 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 894     int i;
 895
 896     for(i=0; i<width; i++){
 897         b1[i] -= (b0[i] + b2[i])>>1;
 898     }
 899 }
 900
 901 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 902     int i;
 903
 904     for(i=0; i<width; i++){
 905         b1[i] += (b0[i] + b2[i] + 2)>>2;
 906     }
 907 }
 908
 909 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
 910     int y;
 911     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
 912     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
 913
 914     for(y=-2; y<height; y+=2){
 915         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
 916         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 917
 918         if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
 919         if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
 920
 921         if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
 922         if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
 923
 924         b0=b2;
 925         b1=b3;
 926     }
 927 }
 928
 929 static void horizontal_decompose97i(DWTELEM *b, int width){
 930     DWTELEM temp[width];
 931     const int w2= (width+1)>>1;
 932
 933     lift (temp+w2, b    +1, b      , 1, 2, 2, width,  W_AM, W_AO, W_AS, 1, 1);
 934     liftS(temp   , b      , temp+w2, 1, 2, 1, width,  W_BM, W_BO, W_BS, 0, 0);
 935     lift (b   +w2, temp+w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 0);
 936     lift (b      , temp   , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 0);
 937 }
 938
 939
 940 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 941     int i;
 942
 943     for(i=0; i<width; i++){
 944         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
 945     }
 946 }
 947
 948 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 949     int i;
 950
 951     for(i=0; i<width; i++){
 952         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
 953     }
 954 }
 955
 956 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 957     int i;
 958
 959     for(i=0; i<width; i++){
 960 #ifdef liftS
 961         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
 962 #else
 963         b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
 964 #endif
 965     }
 966 }
 967
 968 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
 969     int i;
 970
 971     for(i=0; i<width; i++){
 972         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
 973     }
 974 }
 975
 976 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
 977     int y;
 978     DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
 979     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
 980     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
 981     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
 982
 983     for(y=-4; y<height; y+=2){
 984         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
 985         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
 986
 987         if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
 988         if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
 989
 990         if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
 991         if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
 992         if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
 993         if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
 994
 995         b0=b2;
 996         b1=b3;
 997         b2=b4;
 998         b3=b5;
 999     }
1000 }
1001
1002 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1003     int level;
1004
1005     for(level=0; level<decomposition_count; level++){
1006         switch(type){
1007         case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1008         case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1009         }
1010     }
1011 }
1012
1013 static void horizontal_compose53i(IDWTELEM *b, int width){
1014     IDWTELEM temp[width];
1015     const int width2= width>>1;
1016     const int w2= (width+1)>>1;
1017     int x;
1018
1019 #if 0
1020     int A1,A2,A3,A4;
1021     A2= temp[1       ];
1022     A4= temp[0       ];
1023     A1= temp[0+width2];
1024     A1 -= (A2 + A4)>>1;
1025     A4 += (A1 + 1)>>1;
1026     b[0+width2] = A1;
1027     b[0       ] = A4;
1028     for(x=1; x+1<width2; x+=2){
1029         A3= temp[x+width2];
1030         A4= temp[x+1     ];
1031         A3 -= (A2 + A4)>>1;
1032         A2 += (A1 + A3 + 2)>>2;
1033         b[x+width2] = A3;
1034         b[x       ] = A2;
1035
1036         A1= temp[x+1+width2];
1037         A2= temp[x+2       ];
1038         A1 -= (A2 + A4)>>1;
1039         A4 += (A1 + A3 + 2)>>2;
1040         b[x+1+width2] = A1;
1041         b[x+1       ] = A4;
1042     }
1043     A3= temp[width-1];
1044     A3 -= A2;
1045     A2 += (A1 + A3 + 2)>>2;
1046     b[width -1] = A3;
1047     b[width2-1] = A2;
1048 #else
1049     inv_lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
1050     inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1051 #endif /* 0 */
1052     for(x=0; x<width2; x++){
1053         b[2*x    ]= temp[x   ];
1054         b[2*x + 1]= temp[x+w2];
1055     }
1056     if(width&1)
1057         b[2*x    ]= temp[x   ];
1058 }
1059
1060 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1061     int i;
1062
1063     for(i=0; i<width; i++){
1064         b1[i] += (b0[i] + b2[i])>>1;
1065     }
1066 }
1067
1068 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069     int i;
1070
1071     for(i=0; i<width; i++){
1072         b1[i] -= (b0[i] + b2[i] + 2)>>2;
1073     }
1074 }
1075
1076 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1077     cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1078     cs->b1 = slice_buffer_get_line(sb, mirror(-1  , height-1) * stride_line);
1079     cs->y = -1;
1080 }
1081
1082 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1083     cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1084     cs->b1 = buffer + mirror(-1  , height-1)*stride;
1085     cs->y = -1;
1086 }
1087
1088 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1089     int y= cs->y;
1090
1091     IDWTELEM *b0= cs->b0;
1092     IDWTELEM *b1= cs->b1;
1093     IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1094     IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1095
1096         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1097         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1098
1099         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1100         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1101
1102     cs->b0 = b2;
1103     cs->b1 = b3;
1104     cs->y += 2;
1105 }
1106
1107 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1108     int y= cs->y;
1109     IDWTELEM *b0= cs->b0;
1110     IDWTELEM *b1= cs->b1;
1111     IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1112     IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1113
1114         if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1115         if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1116
1117         if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1118         if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1119
1120     cs->b0 = b2;
1121     cs->b1 = b3;
1122     cs->y += 2;
1123 }
1124
1125 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1126     DWTCompose cs;
1127     spatial_compose53i_init(&cs, buffer, height, stride);
1128     while(cs.y <= height)
1129         spatial_compose53i_dy(&cs, buffer, width, height, stride);
1130 }
1131
1132
1133 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1134     IDWTELEM temp[width];
1135     const int w2= (width+1)>>1;
1136
1137     inv_lift (temp   , b      , b   +w2, 1, 1, 1, width,  W_DM, W_DO, W_DS, 0, 1);
1138     inv_lift (temp+w2, b   +w2, temp   , 1, 1, 1, width,  W_CM, W_CO, W_CS, 1, 1);
1139     inv_liftS(b      , temp   , temp+w2, 2, 1, 1, width,  W_BM, W_BO, W_BS, 0, 1);
1140     inv_lift (b+1    , temp+w2, b      , 2, 1, 2, width,  W_AM, W_AO, W_AS, 1, 0);
1141 }
1142
1143 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1144     int i;
1145
1146     for(i=0; i<width; i++){
1147         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1148     }
1149 }
1150
1151 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152     int i;
1153
1154     for(i=0; i<width; i++){
1155         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1156     }
1157 }
1158
1159 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160     int i;
1161
1162     for(i=0; i<width; i++){
1163 #ifdef liftS
1164         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1165 #else
1166         b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1167 #endif
1168     }
1169 }
1170
1171 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1172     int i;
1173
1174     for(i=0; i<width; i++){
1175         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1176     }
1177 }
1178
1179 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1180     int i;
1181
1182     for(i=0; i<width; i++){
1183         b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1184         b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1185 #ifdef liftS
1186         b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1187 #else
1188         b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1189 #endif
1190         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1191     }
1192 }
1193
1194 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1195     cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1196     cs->b1 = slice_buffer_get_line(sb, mirror(-3  , height-1) * stride_line);
1197     cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1198     cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1199     cs->y = -3;
1200 }
1201
1202 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1203     cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1204     cs->b1 = buffer + mirror(-3  , height-1)*stride;
1205     cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1206     cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1207     cs->y = -3;
1208 }
1209
1210 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1211     int y = cs->y;
1212
1213     IDWTELEM *b0= cs->b0;
1214     IDWTELEM *b1= cs->b1;
1215     IDWTELEM *b2= cs->b2;
1216     IDWTELEM *b3= cs->b3;
1217     IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1218     IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1219
1220     if(y>0 && y+4<height){
1221         dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1222     }else{
1223         if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1224         if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1225         if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1226         if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1227     }
1228
1229     if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1230     if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1231
1232     cs->b0=b2;
1233     cs->b1=b3;
1234     cs->b2=b4;
1235     cs->b3=b5;
1236     cs->y += 2;
1237 }
1238
1239 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1240     int y = cs->y;
1241     IDWTELEM *b0= cs->b0;
1242     IDWTELEM *b1= cs->b1;
1243     IDWTELEM *b2= cs->b2;
1244     IDWTELEM *b3= cs->b3;
1245     IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1246     IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1247
1248     if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1249     if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1250     if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1251     if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1252
1253     if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1254     if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1255
1256     cs->b0=b2;
1257     cs->b1=b3;
1258     cs->b2=b4;
1259     cs->b3=b5;
1260     cs->y += 2;
1261 }
1262
1263 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1264     DWTCompose cs;
1265     spatial_compose97i_init(&cs, buffer, height, stride);
1266     while(cs.y <= height)
1267         spatial_compose97i_dy(&cs, buffer, width, height, stride);
1268 }
1269
1270 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1271     int level;
1272     for(level=decomposition_count-1; level>=0; level--){
1273         switch(type){
1274         case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1275         case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1276         }
1277     }
1278 }
1279
1280 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1281     const int support = type==1 ? 3 : 5;
1282     int level;
1283     if(type==2) return;
1284
1285     for(level=decomposition_count-1; level>=0; level--){
1286         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1287             switch(type){
1288             case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1289                 break;
1290             case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1291                 break;
1292             }
1293         }
1294     }
1295 }
1296
1297 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1298     const int w= b->width;
1299     const int h= b->height;
1300     int x,y;
1301
1302     if(1){
1303         int run, runs;
1304         x_and_coeff *xc= b->x_coeff;
1305         x_and_coeff *prev_xc= NULL;
1306         x_and_coeff *prev2_xc= xc;
1307         x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1308         x_and_coeff *prev_parent_xc= parent_xc;
1309
1310         runs= get_symbol2(&s->c, b->state[30], 0);
1311         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1312         else           run= INT_MAX;
1313
1314         for(y=0; y<h; y++){
1315             int v=0;
1316             int lt=0, t=0, rt=0;
1317
1318             if(y && prev_xc->x == 0){
1319                 rt= prev_xc->coeff;
1320             }
1321             for(x=0; x<w; x++){
1322                 int p=0;
1323                 const int l= v;
1324
1325                 lt= t; t= rt;
1326
1327                 if(y){
1328                     if(prev_xc->x <= x)
1329                         prev_xc++;
1330                     if(prev_xc->x == x + 1)
1331                         rt= prev_xc->coeff;
1332                     else
1333                         rt=0;
1334                 }
1335                 if(parent_xc){
1336                     if(x>>1 > parent_xc->x){
1337                         parent_xc++;
1338                     }
1339                     if(x>>1 == parent_xc->x){
1340                         p= parent_xc->coeff;
1341                     }
1342                 }
1343                 if(/*ll|*/l|lt|t|rt|p){
1344                     int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1345
1346                     v=get_rac(&s->c, &b->state[0][context]);
1347                     if(v){
1348                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1349                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1350
1351                         xc->x=x;
1352                         (xc++)->coeff= v;
1353                     }
1354                 }else{
1355                     if(!run){
1356                         if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1357                         else           run= INT_MAX;
1358                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1359                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1360
1361                         xc->x=x;
1362                         (xc++)->coeff= v;
1363                     }else{
1364                         int max_run;
1365                         run--;
1366                         v=0;
1367
1368                         if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1369                         else  max_run= FFMIN(run, w-x-1);
1370                         if(parent_xc)
1371                             max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1372                         x+= max_run;
1373                         run-= max_run;
1374                     }
1375                 }
1376             }
1377             (xc++)->x= w+1; //end marker
1378             prev_xc= prev2_xc;
1379             prev2_xc= xc;
1380
1381             if(parent_xc){
1382                 if(y&1){
1383                     while(parent_xc->x != parent->width+1)
1384                         parent_xc++;
1385                     parent_xc++;
1386                     prev_parent_xc= parent_xc;
1387                 }else{
1388                     parent_xc= prev_parent_xc;
1389                 }
1390             }
1391         }
1392
1393         (xc++)->x= w+1; //end marker
1394     }
1395 }
1396
1397 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1398     const int w= b->width;
1399     int y;
1400     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1401     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1402     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1403     int new_index = 0;
1404
1405     if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1406         qadd= 0;
1407         qmul= 1<<QEXPSHIFT;
1408     }
1409
1410     /* If we are on the second or later slice, restore our index. */
1411     if (start_y != 0)
1412         new_index = save_state[0];
1413
1414
1415     for(y=start_y; y<h; y++){
1416         int x = 0;
1417         int v;
1418         IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1419         memset(line, 0, b->width*sizeof(IDWTELEM));
1420         v = b->x_coeff[new_index].coeff;
1421         x = b->x_coeff[new_index++].x;
1422         while(x < w){
1423             register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1424             register int u= -(v&1);
1425             line[x] = (t^u) - u;
1426
1427             v = b->x_coeff[new_index].coeff;
1428             x = b->x_coeff[new_index++].x;
1429         }
1430     }
1431
1432     /* Save our variables for the next slice. */
1433     save_state[0] = new_index;
1434
1435     return;
1436 }
1437
1438 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1439     int plane_index, level, orientation;
1440
1441     for(plane_index=0; plane_index<3; plane_index++){
1442         for(level=0; level<MAX_DECOMPOSITIONS; level++){
1443             for(orientation=level ? 1:0; orientation<4; orientation++){
1444                 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1445             }
1446         }
1447     }
1448     memset(s->header_state, MID_STATE, sizeof(s->header_state));
1449     memset(s->block_state, MID_STATE, sizeof(s->block_state));
1450 }
1451
1452 static int alloc_blocks(SnowContext *s){
1453     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1454     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1455
1456     s->b_width = w;
1457     s->b_height= h;
1458
1459     av_free(s->block);
1460     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1461     return 0;
1462 }
1463
1464 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1465     uint8_t *bytestream= d->bytestream;
1466     uint8_t *bytestream_start= d->bytestream_start;
1467     *d= *s;
1468     d->bytestream= bytestream;
1469     d->bytestream_start= bytestream_start;
1470 }
1471
1472 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1473     const int w= s->b_width << s->block_max_depth;
1474     const int rem_depth= s->block_max_depth - level;
1475     const int index= (x + y*w) << rem_depth;
1476     const int block_w= 1<<rem_depth;
1477     BlockNode block;
1478     int i,j;
1479
1480     block.color[0]= l;
1481     block.color[1]= cb;
1482     block.color[2]= cr;
1483     block.mx= mx;
1484     block.my= my;
1485     block.ref= ref;
1486     block.type= type;
1487     block.level= level;
1488
1489     for(j=0; j<block_w; j++){
1490         for(i=0; i<block_w; i++){
1491             s->block[index + i + j*w]= block;
1492         }
1493     }
1494 }
1495
1496 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1497     const int offset[3]= {
1498           y*c->  stride + x,
1499         ((y*c->uvstride + x)>>1),
1500         ((y*c->uvstride + x)>>1),
1501     };
1502     int i;
1503     for(i=0; i<3; i++){
1504         c->src[0][i]= src [i];
1505         c->ref[0][i]= ref [i] + offset[i];
1506     }
1507     assert(!ref_index);
1508 }
1509
1510 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1511                            const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1512     if(s->ref_frames == 1){
1513         *mx = mid_pred(left->mx, top->mx, tr->mx);
1514         *my = mid_pred(left->my, top->my, tr->my);
1515     }else{
1516         const int *scale = scale_mv_ref[ref];
1517         *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1518                        (top ->mx * scale[top ->ref] + 128) >>8,
1519                        (tr  ->mx * scale[tr  ->ref] + 128) >>8);
1520         *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1521                        (top ->my * scale[top ->ref] + 128) >>8,
1522                        (tr  ->my * scale[tr  ->ref] + 128) >>8);
1523     }
1524 }
1525
1526 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1527     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1528         return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1529     }else{
1530         return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1531     }
1532 }
1533
1534 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1535     const int w= s->b_width << s->block_max_depth;
1536     const int rem_depth= s->block_max_depth - level;
1537     const int index= (x + y*w) << rem_depth;
1538     int trx= (x+1)<<rem_depth;
1539     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
1540     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
1541     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
1542     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1543     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1544
1545     if(s->keyframe){
1546         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1547         return;
1548     }
1549
1550     if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1551         int type, mx, my;
1552         int l = left->color[0];
1553         int cb= left->color[1];
1554         int cr= left->color[2];
1555         int ref = 0;
1556         int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1557         int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1558         int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
1559
1560         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1561
1562         if(type){
1563             pred_mv(s, &mx, &my, 0, left, top, tr);
1564             l += get_symbol(&s->c, &s->block_state[32], 1);
1565             cb+= get_symbol(&s->c, &s->block_state[64], 1);
1566             cr+= get_symbol(&s->c, &s->block_state[96], 1);
1567         }else{
1568             if(s->ref_frames > 1)
1569                 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
1570             pred_mv(s, &mx, &my, ref, left, top, tr);
1571             mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1572             my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
1573         }
1574         set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
1575     }else{
1576         decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1577         decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1578         decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1579         decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1580     }
1581 }
1582
1583 static void decode_blocks(SnowContext *s){
1584     int x, y;
1585     int w= s->b_width;
1586     int h= s->b_height;
1587
1588     for(y=0; y<h; y++){
1589         for(x=0; x<w; x++){
1590             decode_q_branch(s, 0, x, y);
1591         }
1592     }
1593 }
1594
1595 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1596     static const uint8_t weight[64]={
1597     8,7,6,5,4,3,2,1,
1598     7,7,0,0,0,0,0,1,
1599     6,0,6,0,0,0,2,0,
1600     5,0,0,5,0,3,0,0,
1601     4,0,0,0,4,0,0,0,
1602     3,0,0,5,0,3,0,0,
1603     2,0,6,0,0,0,2,0,
1604     1,7,0,0,0,0,0,1,
1605     };
1606
1607     static const uint8_t brane[256]={
1608     0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1609     0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1610     0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1611     0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1612     0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1613     0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1614     0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1615     0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1616     0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1617     0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1618     0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1619     0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1620     0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1621     0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1622     0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1623     0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1624     };
1625
1626     static const uint8_t needs[16]={
1627     0,1,0,0,
1628     2,4,2,0,
1629     0,1,0,0,
1630     15
1631     };
1632
1633     int x, y, b, r, l;
1634     int16_t tmpIt   [64*(32+HTAPS_MAX)];
1635     uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
1636     int16_t *tmpI= tmpIt;
1637     uint8_t *tmp2= tmp2t[0];
1638     const uint8_t *hpel[11];
1639     assert(dx<16 && dy<16);
1640     r= brane[dx + 16*dy]&15;
1641     l= brane[dx + 16*dy]>>4;
1642
1643     b= needs[l] | needs[r];
1644     if(p && !p->diag_mc)
1645         b= 15;
1646
1647     if(b&5){
1648         for(y=0; y < b_h+HTAPS_MAX-1; y++){
1649             for(x=0; x < b_w; x++){
1650                 int a_1=src[x + HTAPS_MAX/2-4];
1651                 int a0= src[x + HTAPS_MAX/2-3];
1652                 int a1= src[x + HTAPS_MAX/2-2];
1653                 int a2= src[x + HTAPS_MAX/2-1];
1654                 int a3= src[x + HTAPS_MAX/2+0];
1655                 int a4= src[x + HTAPS_MAX/2+1];
1656                 int a5= src[x + HTAPS_MAX/2+2];
1657                 int a6= src[x + HTAPS_MAX/2+3];
1658                 int am=0;
1659                 if(!p || p->fast_mc){
1660                     am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1661                     tmpI[x]= am;
1662                     am= (am+16)>>5;
1663                 }else{
1664                     am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1665                     tmpI[x]= am;
1666                     am= (am+32)>>6;
1667                 }
1668
1669                 if(am&(~255)) am= ~(am>>31);
1670                 tmp2[x]= am;
1671             }
1672             tmpI+= 64;
1673             tmp2+= stride;
1674             src += stride;
1675         }
1676         src -= stride*y;
1677     }
1678     src += HTAPS_MAX/2 - 1;
1679     tmp2= tmp2t[1];
1680
1681     if(b&2){
1682         for(y=0; y < b_h; y++){
1683             for(x=0; x < b_w+1; x++){
1684                 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1685                 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1686                 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1687                 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1688                 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1689                 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1690                 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1691                 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1692                 int am=0;
1693                 if(!p || p->fast_mc)
1694                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1695                 else
1696                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1697
1698                 if(am&(~255)) am= ~(am>>31);
1699                 tmp2[x]= am;
1700             }
1701             src += stride;
1702             tmp2+= stride;
1703         }
1704         src -= stride*y;
1705     }
1706     src += stride*(HTAPS_MAX/2 - 1);
1707     tmp2= tmp2t[2];
1708     tmpI= tmpIt;
1709     if(b&4){
1710         for(y=0; y < b_h; y++){
1711             for(x=0; x < b_w; x++){
1712                 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1713                 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1714                 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1715                 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1716                 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1717                 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1718                 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1719                 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1720                 int am=0;
1721                 if(!p || p->fast_mc)
1722                     am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1723                 else
1724                     am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1725                 if(am&(~255)) am= ~(am>>31);
1726                 tmp2[x]= am;
1727             }
1728             tmpI+= 64;
1729             tmp2+= stride;
1730         }
1731     }
1732
1733     hpel[ 0]= src;
1734     hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1735     hpel[ 2]= src + 1;
1736
1737     hpel[ 4]= tmp2t[1];
1738     hpel[ 5]= tmp2t[2];
1739     hpel[ 6]= tmp2t[1] + 1;
1740
1741     hpel[ 8]= src + stride;
1742     hpel[ 9]= hpel[1] + stride;
1743     hpel[10]= hpel[8] + 1;
1744
1745     if(b==15){
1746         const uint8_t *src1= hpel[dx/8 + dy/8*4  ];
1747         const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1748         const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1749         const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1750         dx&=7;
1751         dy&=7;
1752         for(y=0; y < b_h; y++){
1753             for(x=0; x < b_w; x++){
1754                 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1755                          (8-dx)*   dy *src3[x] + dx*   dy *src4[x]+32)>>6;
1756             }
1757             src1+=stride;
1758             src2+=stride;
1759             src3+=stride;
1760             src4+=stride;
1761             dst +=stride;
1762         }
1763     }else{
1764         const uint8_t *src1= hpel[l];
1765         const uint8_t *src2= hpel[r];
1766         int a= weight[((dx&7) + (8*(dy&7)))];
1767         int b= 8-a;
1768         for(y=0; y < b_h; y++){
1769             for(x=0; x < b_w; x++){
1770                 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1771             }
1772             src1+=stride;
1773             src2+=stride;
1774             dst +=stride;
1775         }
1776     }
1777 }
1778
1779 #define mca(dx,dy,b_w)\
1780 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1781     uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1782     assert(h==b_w);\
1783     mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1784 }
1785
1786 mca( 0, 0,16)
1787 mca( 8, 0,16)
1788 mca( 0, 8,16)
1789 mca( 8, 8,16)
1790 mca( 0, 0,8)
1791 mca( 8, 0,8)
1792 mca( 0, 8,8)
1793 mca( 8, 8,8)
1794
1795 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1796     if(block->type & BLOCK_INTRA){
1797         int x, y;
1798         const int color = block->color[plane_index];
1799         const int color4= color*0x01010101;
1800         if(b_w==32){
1801             for(y=0; y < b_h; y++){
1802                 *(uint32_t*)&dst[0 + y*stride]= color4;
1803                 *(uint32_t*)&dst[4 + y*stride]= color4;
1804                 *(uint32_t*)&dst[8 + y*stride]= color4;
1805                 *(uint32_t*)&dst[12+ y*stride]= color4;
1806                 *(uint32_t*)&dst[16+ y*stride]= color4;
1807                 *(uint32_t*)&dst[20+ y*stride]= color4;
1808                 *(uint32_t*)&dst[24+ y*stride]= color4;
1809                 *(uint32_t*)&dst[28+ y*stride]= color4;
1810             }
1811         }else if(b_w==16){
1812             for(y=0; y < b_h; y++){
1813                 *(uint32_t*)&dst[0 + y*stride]= color4;
1814                 *(uint32_t*)&dst[4 + y*stride]= color4;
1815                 *(uint32_t*)&dst[8 + y*stride]= color4;
1816                 *(uint32_t*)&dst[12+ y*stride]= color4;
1817             }
1818         }else if(b_w==8){
1819             for(y=0; y < b_h; y++){
1820                 *(uint32_t*)&dst[0 + y*stride]= color4;
1821                 *(uint32_t*)&dst[4 + y*stride]= color4;
1822             }
1823         }else if(b_w==4){
1824             for(y=0; y < b_h; y++){
1825                 *(uint32_t*)&dst[0 + y*stride]= color4;
1826             }
1827         }else{
1828             for(y=0; y < b_h; y++){
1829                 for(x=0; x < b_w; x++){
1830                     dst[x + y*stride]= color;
1831                 }
1832             }
1833         }
1834     }else{
1835         uint8_t *src= s->last_picture[block->ref].data[plane_index];
1836         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
1837         int mx= block->mx*scale;
1838         int my= block->my*scale;
1839         const int dx= mx&15;
1840         const int dy= my&15;
1841         const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1842         sx += (mx>>4) - (HTAPS_MAX/2-1);
1843         sy += (my>>4) - (HTAPS_MAX/2-1);
1844         src += sx + sy*stride;
1845         if(   (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1846            || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1847             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1848             src= tmp + MB_SIZE;
1849         }
1850 //        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1851 //        assert(!(b_w&(b_w-1)));
1852         assert(b_w>1 && b_h>1);
1853         assert((tab_index>=0 && tab_index<4) || b_w==32);
1854         if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1855             mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1856         else if(b_w==32){
1857             int y;
1858             for(y=0; y<b_h; y+=16){
1859                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1860                 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1861             }
1862         }else if(b_w==b_h)
1863             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1864         else if(b_w==2*b_h){
1865             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
1866             s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1867         }else{
1868             assert(2*b_w==b_h);
1869             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
1870             s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1871         }
1872     }
1873 }
1874
1875 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1876                               int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1877     int y, x;
1878     IDWTELEM * dst;
1879     for(y=0; y<b_h; y++){
1880         //FIXME ugly misuse of obmc_stride
1881         const uint8_t *obmc1= obmc + y*obmc_stride;
1882         const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1883         const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1884         const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1885         dst = slice_buffer_get_line(sb, src_y + y);
1886         for(x=0; x<b_w; x++){
1887             int v=   obmc1[x] * block[3][x + y*src_stride]
1888                     +obmc2[x] * block[2][x + y*src_stride]
1889                     +obmc3[x] * block[1][x + y*src_stride]
1890                     +obmc4[x] * block[0][x + y*src_stride];
1891
1892             v <<= 8 - LOG2_OBMC_MAX;
1893             if(FRAC_BITS != 8){
1894                 v >>= 8 - FRAC_BITS;
1895             }
1896             if(add){
1897                 v += dst[x + src_x];
1898                 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1899                 if(v&(~255)) v= ~(v>>31);
1900                 dst8[x + y*src_stride] = v;
1901             }else{
1902                 dst[x + src_x] -= v;
1903             }
1904         }
1905     }
1906 }
1907
1908 //FIXME name cleanup (b_w, block_w, b_width stuff)
1909 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1910     const int b_width = s->b_width  << s->block_max_depth;
1911     const int b_height= s->b_height << s->block_max_depth;
1912     const int b_stride= b_width;
1913     BlockNode *lt= &s->block[b_x + b_y*b_stride];
1914     BlockNode *rt= lt+1;
1915     BlockNode *lb= lt+b_stride;
1916     BlockNode *rb= lb+1;
1917     uint8_t *block[4];
1918     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1919     uint8_t *tmp = s->scratchbuf;
1920     uint8_t *ptmp;
1921     int x,y;
1922
1923     if(b_x<0){
1924         lt= rt;
1925         lb= rb;
1926     }else if(b_x + 1 >= b_width){
1927         rt= lt;
1928         rb= lb;
1929     }
1930     if(b_y<0){
1931         lt= lb;
1932         rt= rb;
1933     }else if(b_y + 1 >= b_height){
1934         lb= lt;
1935         rb= rt;
1936     }
1937
1938     if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1939         obmc -= src_x;
1940         b_w += src_x;
1941         if(!sliced && !offset_dst)
1942             dst -= src_x;
1943         src_x=0;
1944     }else if(src_x + b_w > w){
1945         b_w = w - src_x;
1946     }
1947     if(src_y<0){
1948         obmc -= src_y*obmc_stride;
1949         b_h += src_y;
1950         if(!sliced && !offset_dst)
1951             dst -= src_y*dst_stride;
1952         src_y=0;
1953     }else if(src_y + b_h> h){
1954         b_h = h - src_y;
1955     }
1956
1957     if(b_w<=0 || b_h<=0) return;
1958
1959     assert(src_stride > 2*MB_SIZE + 5);
1960
1961     if(!sliced && offset_dst)
1962         dst += src_x + src_y*dst_stride;
1963     dst8+= src_x + src_y*src_stride;
1964 //    src += src_x + src_y*src_stride;
1965
1966     ptmp= tmp + 3*tmp_step;
1967     block[0]= ptmp;
1968     ptmp+=tmp_step;
1969     pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1970
1971     if(same_block(lt, rt)){
1972         block[1]= block[0];
1973     }else{
1974         block[1]= ptmp;
1975         ptmp+=tmp_step;
1976         pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1977     }
1978
1979     if(same_block(lt, lb)){
1980         block[2]= block[0];
1981     }else if(same_block(rt, lb)){
1982         block[2]= block[1];
1983     }else{
1984         block[2]= ptmp;
1985         ptmp+=tmp_step;
1986         pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1987     }
1988
1989     if(same_block(lt, rb) ){
1990         block[3]= block[0];
1991     }else if(same_block(rt, rb)){
1992         block[3]= block[1];
1993     }else if(same_block(lb, rb)){
1994         block[3]= block[2];
1995     }else{
1996         block[3]= ptmp;
1997         pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1998     }
1999 #if 0
2000     for(y=0; y<b_h; y++){
2001         for(x=0; x<b_w; x++){
2002             int v=   obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2003             if(add) dst[x + y*dst_stride] += v;
2004             else    dst[x + y*dst_stride] -= v;
2005         }
2006     }
2007     for(y=0; y<b_h; y++){
2008         uint8_t *obmc2= obmc + (obmc_stride>>1);
2009         for(x=0; x<b_w; x++){
2010             int v=   obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2011             if(add) dst[x + y*dst_stride] += v;
2012             else    dst[x + y*dst_stride] -= v;
2013         }
2014     }
2015     for(y=0; y<b_h; y++){
2016         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2017         for(x=0; x<b_w; x++){
2018             int v=   obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2019             if(add) dst[x + y*dst_stride] += v;
2020             else    dst[x + y*dst_stride] -= v;
2021         }
2022     }
2023     for(y=0; y<b_h; y++){
2024         uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2025         uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2026         for(x=0; x<b_w; x++){
2027             int v=   obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2028             if(add) dst[x + y*dst_stride] += v;
2029             else    dst[x + y*dst_stride] -= v;
2030         }
2031     }
2032 #else
2033     if(sliced){
2034         s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2035     }else{
2036         for(y=0; y<b_h; y++){
2037             //FIXME ugly misuse of obmc_stride
2038             const uint8_t *obmc1= obmc + y*obmc_stride;
2039             const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2040             const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2041             const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2042             for(x=0; x<b_w; x++){
2043                 int v=   obmc1[x] * block[3][x + y*src_stride]
2044                         +obmc2[x] * block[2][x + y*src_stride]
2045                         +obmc3[x] * block[1][x + y*src_stride]
2046                         +obmc4[x] * block[0][x + y*src_stride];
2047
2048                 v <<= 8 - LOG2_OBMC_MAX;
2049                 if(FRAC_BITS != 8){
2050                     v >>= 8 - FRAC_BITS;
2051                 }
2052                 if(add){
2053                     v += dst[x + y*dst_stride];
2054                     v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2055                     if(v&(~255)) v= ~(v>>31);
2056                     dst8[x + y*src_stride] = v;
2057                 }else{
2058                     dst[x + y*dst_stride] -= v;
2059                 }
2060             }
2061         }
2062     }
2063 #endif /* 0 */
2064 }
2065
2066 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2067     Plane *p= &s->plane[plane_index];
2068     const int mb_w= s->b_width  << s->block_max_depth;
2069     const int mb_h= s->b_height << s->block_max_depth;
2070     int x, y, mb_x;
2071     int block_size = MB_SIZE >> s->block_max_depth;
2072     int block_w    = plane_index ? block_size/2 : block_size;
2073     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2074     int obmc_stride= plane_index ? block_size : 2*block_size;
2075     int ref_stride= s->current_picture.linesize[plane_index];
2076     uint8_t *dst8= s->current_picture.data[plane_index];
2077     int w= p->width;
2078     int h= p->height;
2079
2080     if(s->keyframe || (s->avctx->debug&512)){
2081         if(mb_y==mb_h)
2082             return;
2083
2084         if(add){
2085             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2086 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2087                 IDWTELEM * line = sb->line[y];
2088                 for(x=0; x<w; x++){
2089 //                    int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2090                     int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2091                     v >>= FRAC_BITS;
2092                     if(v&(~255)) v= ~(v>>31);
2093                     dst8[x + y*ref_stride]= v;
2094                 }
2095             }
2096         }else{
2097             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2098 //                DWTELEM * line = slice_buffer_get_line(sb, y);
2099                 IDWTELEM * line = sb->line[y];
2100                 for(x=0; x<w; x++){
2101                     line[x] -= 128 << FRAC_BITS;
2102 //                    buf[x + y*w]-= 128<<FRAC_BITS;
2103                 }
2104             }
2105         }
2106
2107         return;
2108     }
2109
2110     for(mb_x=0; mb_x<=mb_w; mb_x++){
2111         add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2112                    block_w*mb_x - block_w/2,
2113                    block_w*mb_y - block_w/2,
2114                    block_w, block_w,
2115                    w, h,
2116                    w, ref_stride, obmc_stride,
2117                    mb_x - 1, mb_y - 1,
2118                    add, 0, plane_index);
2119     }
2120 }
2121
2122 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2123     Plane *p= &s->plane[plane_index];
2124     const int mb_w= s->b_width  << s->block_max_depth;
2125     const int mb_h= s->b_height << s->block_max_depth;
2126     int x, y, mb_x;
2127     int block_size = MB_SIZE >> s->block_max_depth;
2128     int block_w    = plane_index ? block_size/2 : block_size;
2129     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2130     const int obmc_stride= plane_index ? block_size : 2*block_size;
2131     int ref_stride= s->current_picture.linesize[plane_index];
2132     uint8_t *dst8= s->current_picture.data[plane_index];
2133     int w= p->width;
2134     int h= p->height;
2135
2136     if(s->keyframe || (s->avctx->debug&512)){
2137         if(mb_y==mb_h)
2138             return;
2139
2140         if(add){
2141             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2142                 for(x=0; x<w; x++){
2143                     int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2144                     v >>= FRAC_BITS;
2145                     if(v&(~255)) v= ~(v>>31);
2146                     dst8[x + y*ref_stride]= v;
2147                 }
2148             }
2149         }else{
2150             for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2151                 for(x=0; x<w; x++){
2152                     buf[x + y*w]-= 128<<FRAC_BITS;
2153                 }
2154             }
2155         }
2156
2157         return;
2158     }
2159
2160     for(mb_x=0; mb_x<=mb_w; mb_x++){
2161         add_yblock(s, 0, NULL, buf, dst8, obmc,
2162                    block_w*mb_x - block_w/2,
2163                    block_w*mb_y - block_w/2,
2164                    block_w, block_w,
2165                    w, h,
2166                    w, ref_stride, obmc_stride,
2167                    mb_x - 1, mb_y - 1,
2168                    add, 1, plane_index);
2169     }
2170 }
2171
2172 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2173     const int mb_h= s->b_height << s->block_max_depth;
2174     int mb_y;
2175     for(mb_y=0; mb_y<=mb_h; mb_y++)
2176         predict_slice(s, buf, plane_index, add, mb_y);
2177 }
2178
2179 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
2180     const int w= b->width;
2181     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2182     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2183     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2184     int x,y;
2185
2186     if(s->qlog == LOSSLESS_QLOG) return;
2187
2188     for(y=start_y; y<end_y; y++){
2189 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2190         IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2191         for(x=0; x<w; x++){
2192             int i= line[x];
2193             if(i<0){
2194                 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2195             }else if(i>0){
2196                 line[x]=  (( i*qmul + qadd)>>(QEXPSHIFT));
2197             }
2198         }
2199     }
2200 }
2201
2202 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
2203     const int w= b->width;
2204     int x,y;
2205
2206     IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
2207     IDWTELEM * prev;
2208
2209     if (start_y != 0)
2210         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2211
2212     for(y=start_y; y<end_y; y++){
2213         prev = line;
2214 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2215         line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2216         for(x=0; x<w; x++){
2217             if(x){
2218                 if(use_median){
2219                     if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2220                     else  line[x] += line[x - 1];
2221                 }else{
2222                     if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2223                     else  line[x] += line[x - 1];
2224                 }
2225             }else{
2226                 if(y) line[x] += prev[x];
2227             }
2228         }
2229     }
2230 }
2231
2232 static void decode_qlogs(SnowContext *s){
2233     int plane_index, level, orientation;
2234
2235     for(plane_index=0; plane_index<3; plane_index++){
2236         for(level=0; level<s->spatial_decomposition_count; level++){
2237             for(orientation=level ? 1:0; orientation<4; orientation++){
2238                 int q;
2239                 if     (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2240                 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2241                 else                    q= get_symbol(&s->c, s->header_state, 1);
2242                 s->plane[plane_index].band[level][orientation].qlog= q;
2243             }
2244         }
2245     }
2246 }
2247
2248 #define GET_S(dst, check) \
2249     tmp= get_symbol(&s->c, s->header_state, 0);\
2250     if(!(check)){\
2251         av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
2252         return -1;\
2253     }\
2254     dst= tmp;
2255
2256 static int decode_header(SnowContext *s){
2257     int plane_index, tmp;
2258     uint8_t kstate[32];
2259
2260     memset(kstate, MID_STATE, sizeof(kstate));
2261
2262     s->keyframe= get_rac(&s->c, kstate);
2263     if(s->keyframe || s->always_reset){
2264         reset_contexts(s);
2265         s->spatial_decomposition_type=
2266         s->qlog=
2267         s->qbias=
2268         s->mv_scale=
2269         s->block_max_depth= 0;
2270     }
2271     if(s->keyframe){
2272         GET_S(s->version, tmp <= 0U)
2273         s->always_reset= get_rac(&s->c, s->header_state);
2274         s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2275         s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2276         GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2277         s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2278         s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2279         s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2280         s->spatial_scalability= get_rac(&s->c, s->header_state);
2281 //        s->rate_scalability= get_rac(&s->c, s->header_state);
2282         GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
2283         s->max_ref_frames++;
2284
2285         decode_qlogs(s);
2286     }
2287
2288     if(!s->keyframe){
2289         if(get_rac(&s->c, s->header_state)){
2290             for(plane_index=0; plane_index<2; plane_index++){
2291                 int htaps, i, sum=0;
2292                 Plane *p= &s->plane[plane_index];
2293                 p->diag_mc= get_rac(&s->c, s->header_state);
2294                 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
2295                 if((unsigned)htaps > HTAPS_MAX || htaps==0)
2296                     return -1;
2297                 p->htaps= htaps;
2298                 for(i= htaps/2; i; i--){
2299                     p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
2300                     sum += p->hcoeff[i];
2301                 }
2302                 p->hcoeff[0]= 32-sum;
2303             }
2304             s->plane[2].diag_mc= s->plane[1].diag_mc;
2305             s->plane[2].htaps  = s->plane[1].htaps;
2306             memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
2307         }
2308         if(get_rac(&s->c, s->header_state)){
2309             GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2310             decode_qlogs(s);
2311         }
2312     }
2313
2314     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
2315     if(s->spatial_decomposition_type > 1U){
2316         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2317         return -1;
2318     }
2319     if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
2320              s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
2321         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
2322         return -1;
2323     }
2324
2325     s->qlog           += get_symbol(&s->c, s->header_state, 1);
2326     s->mv_scale       += get_symbol(&s->c, s->header_state, 1);
2327     s->qbias          += get_symbol(&s->c, s->header_state, 1);
2328     s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
2329     if(s->block_max_depth > 1 || s->block_max_depth < 0){
2330         av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
2331         s->block_max_depth= 0;
2332         return -1;
2333     }
2334
2335     return 0;
2336 }
2337
2338 static void init_qexp(void){
2339     int i;
2340     double v=128;
2341
2342     for(i=0; i<QROOT; i++){
2343         qexp[i]= lrintf(v);
2344         v *= pow(2, 1.0 / QROOT);
2345     }
2346 }
2347
2348 static av_cold int common_init(AVCodecContext *avctx){
2349     SnowContext *s = avctx->priv_data;
2350     int width, height;
2351     int i, j;
2352
2353     s->avctx= avctx;
2354     s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
2355
2356     dsputil_init(&s->dsp, avctx);
2357
2358 #define mcf(dx,dy)\
2359     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
2360     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2361         s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2362     s->dsp.put_qpel_pixels_tab       [1][dy+dx/4]=\
2363     s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2364         s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2365
2366     mcf( 0, 0)
2367     mcf( 4, 0)
2368     mcf( 8, 0)
2369     mcf(12, 0)
2370     mcf( 0, 4)
2371     mcf( 4, 4)
2372     mcf( 8, 4)
2373     mcf(12, 4)
2374     mcf( 0, 8)
2375     mcf( 4, 8)
2376     mcf( 8, 8)
2377     mcf(12, 8)
2378     mcf( 0,12)
2379     mcf( 4,12)
2380     mcf( 8,12)
2381     mcf(12,12)
2382
2383 #define mcfh(dx,dy)\
2384     s->dsp.put_pixels_tab       [0][dy/4+dx/8]=\
2385     s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2386         mc_block_hpel ## dx ## dy ## 16;\
2387     s->dsp.put_pixels_tab       [1][dy/4+dx/8]=\
2388     s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2389         mc_block_hpel ## dx ## dy ## 8;
2390
2391     mcfh(0, 0)
2392     mcfh(8, 0)
2393     mcfh(0, 8)
2394     mcfh(8, 8)
2395
2396     if(!qexp[0])
2397         init_qexp();
2398
2399 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2400
2401     width= s->avctx->width;
2402     height= s->avctx->height;
2403
2404     s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
2405     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
2406
2407     for(i=0; i<MAX_REF_FRAMES; i++)
2408         for(j=0; j<MAX_REF_FRAMES; j++)
2409             scale_mv_ref[i][j] = 256*(i+1)/(j+1);
2410
2411     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2412     s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
2413
2414     return 0;
2415 }
2416
2417 static int common_init_after_header(AVCodecContext *avctx){
2418     SnowContext *s = avctx->priv_data;
2419     int plane_index, level, orientation;
2420
2421     for(plane_index=0; plane_index<3; plane_index++){
2422         int w= s->avctx->width;
2423         int h= s->avctx->height;
2424
2425         if(plane_index){
2426             w>>= s->chroma_h_shift;
2427             h>>= s->chroma_v_shift;
2428         }
2429         s->plane[plane_index].width = w;
2430         s->plane[plane_index].height= h;
2431
2432         for(level=s->spatial_decomposition_count-1; level>=0; level--){
2433             for(orientation=level ? 1 : 0; orientation<4; orientation++){
2434                 SubBand *b= &s->plane[plane_index].band[level][orientation];
2435
2436                 b->buf= s->spatial_dwt_buffer;
2437                 b->level= level;
2438                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2439                 b->width = (w + !(orientation&1))>>1;
2440                 b->height= (h + !(orientation>1))>>1;
2441
2442                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
2443                 b->buf_x_offset = 0;
2444                 b->buf_y_offset = 0;
2445
2446                 if(orientation&1){
2447                     b->buf += (w+1)>>1;
2448                     b->buf_x_offset = (w+1)>>1;
2449                 }
2450                 if(orientation>1){
2451                     b->buf += b->stride>>1;
2452                     b->buf_y_offset = b->stride_line >> 1;
2453                 }
2454                 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2455
2456                 if(level)
2457                     b->parent= &s->plane[plane_index].band[level-1][orientation];
2458                 //FIXME avoid this realloc
2459                 av_freep(&b->x_coeff);
2460                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2461             }
2462             w= (w+1)>>1;
2463             h= (h+1)>>1;
2464         }
2465     }
2466
2467     return 0;
2468 }
2469
2470 #define QUANTIZE2 0
2471
2472 #if QUANTIZE2==1
2473 #define Q2_STEP 8
2474
2475 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
2476     SubBand *b= &p->band[level][orientation];
2477     int x, y;
2478     int xo=0;
2479     int yo=0;
2480     int step= 1 << (s->spatial_decomposition_count - level);
2481
2482     if(orientation&1)
2483         xo= step>>1;
2484     if(orientation&2)
2485         yo= step>>1;
2486
2487     //FIXME bias for nonzero ?
2488     //FIXME optimize
2489     memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
2490     for(y=0; y<p->height; y++){
2491         for(x=0; x<p->width; x++){
2492             int sx= (x-xo + step/2) / step / Q2_STEP;
2493             int sy= (y-yo + step/2) / step / Q2_STEP;
2494             int v= r0[x + y*p->width] - r1[x + y*p->width];
2495             assert(sx>=0 && sy>=0 && sx < score_stride);
2496             v= ((v+8)>>4)<<4;
2497             score[sx + sy*score_stride] += v*v;
2498             assert(score[sx + sy*score_stride] >= 0);
2499         }
2500     }
2501 }
2502
2503 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
2504     int level, orientation;
2505
2506     for(level=0; level<s->spatial_decomposition_count; level++){
2507         for(orientation=level ? 1 : 0; orientation<4; orientation++){
2508             SubBand *b= &p->band[level][orientation];
2509             IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
2510
2511             dequantize(s, b, dst, b->stride);
2512         }
2513     }
2514 }
2515
2516 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
2517     int level, orientation, ys, xs, x, y, pass;
2518     IDWTELEM best_dequant[height * stride];
2519     IDWTELEM idwt2_buffer[height * stride];
2520     const int score_stride= (width + 10)/Q2_STEP;
2521     int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2522     int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2523     int threshold= (s->m.lambda * s->m.lambda) >> 6;
2524
2525     //FIXME pass the copy cleanly ?
2526
2527 //    memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
2528     ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
2529
2530     for(level=0; level<s->spatial_decomposition_count; level++){
2531         for(orientation=level ? 1 : 0; orientation<4; orientation++){
2532             SubBand *b= &p->band[level][orientation];
2533             IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2534              DWTELEM *src=       buffer + (b-> buf - s->spatial_dwt_buffer);
2535             assert(src == b->buf); // code does not depend on this but it is true currently
2536
2537             quantize(s, b, dst, src, b->stride, s->qbias);
2538         }
2539     }
2540     for(pass=0; pass<1; pass++){
2541         if(s->qbias == 0) //keyframe
2542             continue;
2543         for(level=0; level<s->spatial_decomposition_count; level++){
2544             for(orientation=level ? 1 : 0; orientation<4; orientation++){
2545                 SubBand *b= &p->band[level][orientation];
2546                 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
2547                 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2548
2549                 for(ys= 0; ys<Q2_STEP; ys++){
2550                     for(xs= 0; xs<Q2_STEP; xs++){
2551                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2552                         dequantize_all(s, p, idwt2_buffer, width, height);
2553                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2554                         find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2555                         memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2556                         for(y=ys; y<b->height; y+= Q2_STEP){
2557                             for(x=xs; x<b->width; x+= Q2_STEP){
2558                                 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
2559                                 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
2560                                 //FIXME try more than just --
2561                             }
2562                         }
2563                         dequantize_all(s, p, idwt2_buffer, width, height);
2564                         ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2565                         find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2566                         for(y=ys; y<b->height; y+= Q2_STEP){
2567                             for(x=xs; x<b->width; x+= Q2_STEP){
2568                                 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
2569                                 if(score[score_idx] <= best_score[score_idx] + threshold){
2570                                     best_score[score_idx]= score[score_idx];
2571                                     if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
2572                                     if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
2573                                     //FIXME copy instead
2574                                 }
2575                             }
2576                         }
2577                     }
2578                 }
2579             }
2580         }
2581     }
2582     memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
2583 }
2584
2585 #endif /* QUANTIZE2==1 */
2586
2587 #define USE_HALFPEL_PLANE 0
2588
2589 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
2590     int p,x,y;
2591
2592     assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
2593
2594     for(p=0; p<3; p++){
2595         int is_chroma= !!p;
2596         int w= s->avctx->width  >>is_chroma;
2597         int h= s->avctx->height >>is_chroma;
2598         int ls= frame->linesize[p];
2599         uint8_t *src= frame->data[p];
2600
2601         halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2602         halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2603         halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2604
2605         halfpel[0][p]= src;
2606         for(y=0; y<h; y++){
2607             for(x=0; x<w; x++){
2608                 int i= y*ls + x;
2609
2610                 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
2611             }
2612         }
2613         for(y=0; y<h; y++){
2614             for(x=0; x<w; x++){
2615                 int i= y*ls + x;
2616
2617                 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2618             }
2619         }
2620         src= halfpel[1][p];
2621         for(y=0; y<h; y++){
2622             for(x=0; x<w; x++){
2623                 int i= y*ls + x;
2624
2625                 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2626             }
2627         }
2628
2629 //FIXME border!
2630     }
2631 }
2632
2633 static void release_buffer(AVCodecContext *avctx){
2634     SnowContext *s = avctx->priv_data;
2635     int i;
2636
2637     if(s->last_picture[s->max_ref_frames-1].data[0]){
2638         avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2639         for(i=0; i<9; i++)
2640             if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
2641                 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
2642     }
2643 }
2644
2645 static int frame_start(SnowContext *s){
2646    AVFrame tmp;
2647    int w= s->avctx->width; //FIXME round up to x16 ?
2648    int h= s->avctx->height;
2649
2650     if(s->current_picture.data[0]){
2651         s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w   , h   , EDGE_WIDTH  );
2652         s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2653         s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2654     }
2655
2656     release_buffer(s->avctx);
2657
2658     tmp= s->last_picture[s->max_ref_frames-1];
2659     memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
2660     memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
2661     if(USE_HALFPEL_PLANE && s->current_picture.data[0])
2662         halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
2663     s->last_picture[0]= s->current_picture;
2664     s->current_picture= tmp;
2665
2666     if(s->keyframe){
2667         s->ref_frames= 0;
2668     }else{
2669         int i;
2670         for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2671             if(i && s->last_picture[i-1].key_frame)
2672                 break;
2673         s->ref_frames= i;
2674         if(s->ref_frames==0){
2675             av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2676             return -1;
2677         }
2678     }
2679
2680     s->current_picture.reference= 1;
2681     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2682         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2683         return -1;
2684     }
2685
2686     s->current_picture.key_frame= s->keyframe;
2687
2688     return 0;
2689 }
2690
2691 static av_cold void common_end(SnowContext *s){
2692     int plane_index, level, orientation, i;
2693
2694     av_freep(&s->spatial_dwt_buffer);
2695     av_freep(&s->spatial_idwt_buffer);
2696
2697     s->m.me.temp= NULL;
2698     av_freep(&s->m.me.scratchpad);
2699     av_freep(&s->m.me.map);
2700     av_freep(&s->m.me.score_map);
2701     av_freep(&s->m.obmc_scratchpad);
2702
2703     av_freep(&s->block);
2704     av_freep(&s->scratchbuf);
2705
2706     for(i=0; i<MAX_REF_FRAMES; i++){
2707         av_freep(&s->ref_mvs[i]);
2708         av_freep(&s->ref_scores[i]);
2709         if(s->last_picture[i].data[0])
2710             s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2711     }
2712
2713     for(plane_index=0; plane_index<3; plane_index++){
2714         for(level=s->spatial_decomposition_count-1; level>=0; level--){
2715             for(orientation=level ? 1 : 0; orientation<4; orientation++){
2716                 SubBand *b= &s->plane[plane_index].band[level][orientation];
2717
2718                 av_freep(&b->x_coeff);
2719             }
2720         }
2721     }
2722 }
2723
2724 static av_cold int decode_init(AVCodecContext *avctx)
2725 {
2726     avctx->pix_fmt= PIX_FMT_YUV420P;
2727
2728     common_init(avctx);
2729
2730     return 0;
2731 }
2732
2733 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2734     const uint8_t *buf = avpkt->data;
2735     int buf_size = avpkt->size;
2736     SnowContext *s = avctx->priv_data;
2737     RangeCoder * const c= &s->c;
2738     int bytes_read;
2739     AVFrame *picture = data;
2740     int level, orientation, plane_index;
2741
2742     ff_init_range_decoder(c, buf, buf_size);
2743     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2744
2745     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2746     if(decode_header(s)<0)
2747         return -1;
2748     common_init_after_header(avctx);
2749
2750     // realloc slice buffer for the case that spatial_decomposition_count changed
2751     slice_buffer_destroy(&s->sb);
2752     slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2753
2754     for(plane_index=0; plane_index<3; plane_index++){
2755         Plane *p= &s->plane[plane_index];
2756         p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2757                                               && p->hcoeff[1]==-10
2758                                               && p->hcoeff[2]==2;
2759     }
2760
2761     alloc_blocks(s);
2762
2763     if(frame_start(s) < 0)
2764         return -1;
2765     //keyframe flag duplication mess FIXME
2766     if(avctx->debug&FF_DEBUG_PICT_INFO)
2767         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2768
2769     decode_blocks(s);
2770
2771     for(plane_index=0; plane_index<3; plane_index++){
2772         Plane *p= &s->plane[plane_index];
2773         int w= p->width;
2774         int h= p->height;
2775         int x, y;
2776         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2777
2778         if(s->avctx->debug&2048){
2779             memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2780             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2781
2782             for(y=0; y<h; y++){
2783                 for(x=0; x<w; x++){
2784                     int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2785                     s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2786                 }
2787             }
2788         }
2789
2790         {
2791         for(level=0; level<s->spatial_decomposition_count; level++){
2792             for(orientation=level ? 1 : 0; orientation<4; orientation++){
2793                 SubBand *b= &p->band[level][orientation];
2794                 unpack_coeffs(s, b, b->parent, orientation);
2795             }
2796         }
2797         }
2798
2799         {
2800         const int mb_h= s->b_height << s->block_max_depth;
2801         const int block_size = MB_SIZE >> s->block_max_depth;
2802         const int block_w    = plane_index ? block_size/2 : block_size;
2803         int mb_y;
2804         DWTCompose cs[MAX_DECOMPOSITIONS];
2805         int yd=0, yq=0;
2806         int y;
2807         int end_y;
2808
2809         ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2810         for(mb_y=0; mb_y<=mb_h; mb_y++){
2811
2812             int slice_starty = block_w*mb_y;
2813             int slice_h = block_w*(mb_y+1);
2814             if (!(s->keyframe || s->avctx->debug&512)){
2815                 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2816                 slice_h -= (block_w >> 1);
2817             }
2818
2819             for(level=0; level<s->spatial_decomposition_count; level++){
2820                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2821                     SubBand *b= &p->band[level][orientation];
2822                     int start_y;
2823                     int end_y;
2824                     int our_mb_start = mb_y;
2825                     int our_mb_end = (mb_y + 1);
2826                     const int extra= 3;
2827                     start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2828                     end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2829                     if (!(s->keyframe || s->avctx->debug&512)){
2830                         start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2831                         end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2832                     }
2833                     start_y = FFMIN(b->height, start_y);
2834                     end_y = FFMIN(b->height, end_y);
2835
2836                     if (start_y != end_y){
2837                         if (orientation == 0){
2838                             SubBand * correlate_band = &p->band[0][0];
2839                             int correlate_end_y = FFMIN(b->height, end_y + 1);
2840                             int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2841                             decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2842                             correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2843                             dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2844                         }
2845                         else
2846                             decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2847                     }
2848                 }
2849             }
2850
2851             for(; yd<slice_h; yd+=4){
2852                 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2853             }
2854
2855             if(s->qlog == LOSSLESS_QLOG){
2856                 for(; yq<slice_h && yq<h; yq++){
2857                     IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2858                     for(x=0; x<w; x++){
2859                         line[x] <<= FRAC_BITS;
2860                     }
2861                 }
2862             }
2863
2864             predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2865
2866             y = FFMIN(p->height, slice_starty);
2867             end_y = FFMIN(p->height, slice_h);
2868             while(y < end_y)
2869                 slice_buffer_release(&s->sb, y++);
2870         }
2871
2872         slice_buffer_flush(&s->sb);
2873         }
2874
2875     }
2876
2877     emms_c();
2878
2879     release_buffer(avctx);
2880
2881     if(!(s->avctx->debug&2048))
2882         *picture= s->current_picture;
2883     else
2884         *picture= s->mconly_picture;
2885
2886     *data_size = sizeof(AVFrame);
2887
2888     bytes_read= c->bytestream - c->bytestream_start;
2889     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2890
2891     return bytes_read;
2892 }
2893
2894 static av_cold int decode_end(AVCodecContext *avctx)
2895 {
2896     SnowContext *s = avctx->priv_data;
2897
2898     slice_buffer_destroy(&s->sb);
2899
2900     common_end(s);
2901
2902     return 0;
2903 }
2904
2905 AVCodec snow_decoder = {
2906     "snow",
2907     CODEC_TYPE_VIDEO,
2908     CODEC_ID_SNOW,
2909     sizeof(SnowContext),
2910     decode_init,
2911     NULL,
2912     decode_end,
2913     decode_frame,
2914     CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2915     NULL,
2916     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2917 };
2918
2919 #if CONFIG_SNOW_ENCODER
2920 static av_cold int encode_init(AVCodecContext *avctx)
2921 {
2922     SnowContext *s = avctx->priv_data;
2923     int plane_index;
2924
2925     if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2926         av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2927                "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2928         return -1;
2929     }
2930
2931     if(avctx->prediction_method == DWT_97
2932        && (avctx->flags & CODEC_FLAG_QSCALE)
2933        && avctx->global_quality == 0){
2934         av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2935         return -1;
2936     }
2937
2938     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2939
2940     s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2941     s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2942
2943     for(plane_index=0; plane_index<3; plane_index++){
2944         s->plane[plane_index].diag_mc= 1;
2945         s->plane[plane_index].htaps= 6;
2946         s->plane[plane_index].hcoeff[0]=  40;
2947         s->plane[plane_index].hcoeff[1]= -10;
2948         s->plane[plane_index].hcoeff[2]=   2;
2949         s->plane[plane_index].fast_mc= 1;
2950     }
2951
2952     common_init(avctx);
2953     alloc_blocks(s);
2954
2955     s->version=0;
2956
2957     s->m.avctx   = avctx;
2958     s->m.flags   = avctx->flags;
2959     s->m.bit_rate= avctx->bit_rate;
2960
2961     s->m.me.temp      =
2962     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2963     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2964     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2965     s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2966     h263_encode_init(&s->m); //mv_penalty
2967
2968     s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2969
2970     if(avctx->flags&CODEC_FLAG_PASS1){
2971         if(!avctx->stats_out)
2972             avctx->stats_out = av_mallocz(256);
2973     }
2974     if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2975         if(ff_rate_control_init(&s->m) < 0)
2976             return -1;
2977     }
2978     s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2979
2980     avctx->coded_frame= &s->current_picture;
2981     switch(avctx->pix_fmt){
2982 //    case PIX_FMT_YUV444P:
2983 //    case PIX_FMT_YUV422P:
2984     case PIX_FMT_YUV420P:
2985     case PIX_FMT_GRAY8:
2986 //    case PIX_FMT_YUV411P:
2987 //    case PIX_FMT_YUV410P:
2988         s->colorspace_type= 0;
2989         break;
2990 /*    case PIX_FMT_RGB32:
2991         s->colorspace= 1;
2992         break;*/
2993     default:
2994         av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2995         return -1;
2996     }
2997 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2998     s->chroma_h_shift= 1;
2999     s->chroma_v_shift= 1;
3000
3001     ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3002     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3003
3004     s->avctx->get_buffer(s->avctx, &s->input_picture);
3005
3006     if(s->avctx->me_method == ME_ITER){
3007         int i;
3008         int size= s->b_width * s->b_height << 2*s->block_max_depth;
3009         for(i=0; i<s->max_ref_frames; i++){
3010             s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3011             s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3012         }
3013     }
3014
3015     return 0;
3016 }
3017
3018 //near copy & paste from dsputil, FIXME
3019 static int pix_sum(uint8_t * pix, int line_size, int w)
3020 {
3021     int s, i, j;
3022
3023     s = 0;
3024     for (i = 0; i < w; i++) {
3025         for (j = 0; j < w; j++) {
3026             s += pix[0];
3027             pix ++;
3028         }
3029         pix += line_size - w;
3030     }
3031     return s;
3032 }
3033
3034 //near copy & paste from dsputil, FIXME
3035 static int pix_norm1(uint8_t * pix, int line_size, int w)
3036 {
3037     int s, i, j;
3038     uint32_t *sq = ff_squareTbl + 256;
3039
3040     s = 0;
3041     for (i = 0; i < w; i++) {
3042         for (j = 0; j < w; j ++) {
3043             s += sq[pix[0]];
3044             pix ++;
3045         }
3046         pix += line_size - w;
3047     }
3048     return s;
3049 }
3050
3051 //FIXME copy&paste
3052 #define P_LEFT P[1]
3053 #define P_TOP P[2]
3054 #define P_TOPRIGHT P[3]
3055 #define P_MEDIAN P[4]
3056 #define P_MV1 P[9]
3057 #define FLAG_QPEL   1 //must be 1
3058
3059 static int encode_q_branch(SnowContext *s, int level, int x, int y){
3060     uint8_t p_buffer[1024];
3061     uint8_t i_buffer[1024];
3062     uint8_t p_state[sizeof(s->block_state)];
3063     uint8_t i_state[sizeof(s->block_state)];
3064     RangeCoder pc, ic;
3065     uint8_t *pbbak= s->c.bytestream;
3066     uint8_t *pbbak_start= s->c.bytestream_start;
3067     int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
3068     const int w= s->b_width  << s->block_max_depth;
3069     const int h= s->b_height << s->block_max_depth;
3070     const int rem_depth= s->block_max_depth - level;
3071     const int index= (x + y*w) << rem_depth;
3072     const int block_w= 1<<(LOG2_MB_SIZE - level);
3073     int trx= (x+1)<<rem_depth;
3074     int try= (y+1)<<rem_depth;
3075     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
3076     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
3077     const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
3078     const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
3079     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
3080     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3081     int pl = left->color[0];
3082     int pcb= left->color[1];
3083     int pcr= left->color[2];
3084     int pmx, pmy;
3085     int mx=0, my=0;
3086     int l,cr,cb;
3087     const int stride= s->current_picture.linesize[0];
3088     const int uvstride= s->current_picture.linesize[1];
3089     uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
3090                                 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
3091                                 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
3092     int P[10][2];
3093     int16_t last_mv[3][2];
3094     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
3095     const int shift= 1+qpel;
3096     MotionEstContext *c= &s->m.me;
3097     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3098     int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3099     int my_context= av_log2(2*FFABS(left->my - top->my));
3100     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3101     int ref, best_ref, ref_score, ref_mx, ref_my;
3102
3103     assert(sizeof(s->block_state) >= 256);
3104     if(s->keyframe){
3105         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3106         return 0;
3107     }
3108
3109 //    clip predictors / edge ?
3110
3111     P_LEFT[0]= left->mx;
3112     P_LEFT[1]= left->my;
3113     P_TOP [0]= top->mx;
3114     P_TOP [1]= top->my;
3115     P_TOPRIGHT[0]= tr->mx;
3116     P_TOPRIGHT[1]= tr->my;
3117
3118     last_mv[0][0]= s->block[index].mx;
3119     last_mv[0][1]= s->block[index].my;
3120     last_mv[1][0]= right->mx;
3121     last_mv[1][1]= right->my;
3122     last_mv[2][0]= bottom->mx;
3123     last_mv[2][1]= bottom->my;
3124
3125     s->m.mb_stride=2;
3126     s->m.mb_x=
3127     s->m.mb_y= 0;
3128     c->skip= 0;
3129
3130     assert(c->  stride ==   stride);
3131     assert(c->uvstride == uvstride);
3132
3133     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
3134     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
3135     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
3136     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
3137
3138     c->xmin = - x*block_w - 16+3;
3139     c->ymin = - y*block_w - 16+3;
3140     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3141     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3142
3143     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
3144     if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
3145     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
3146     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
3147     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
3148     if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
3149     if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
3150
3151     P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
3152     P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
3153
3154     if (!y) {
3155         c->pred_x= P_LEFT[0];
3156         c->pred_y= P_LEFT[1];
3157     } else {
3158         c->pred_x = P_MEDIAN[0];
3159         c->pred_y = P_MEDIAN[1];
3160     }
3161
3162     score= INT_MAX;
3163     best_ref= 0;
3164     for(ref=0; ref<s->ref_frames; ref++){
3165         init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
3166
3167         ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
3168                                          (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
3169
3170         assert(ref_mx >= c->xmin);
3171         assert(ref_mx <= c->xmax);
3172         assert(ref_my >= c->ymin);
3173         assert(ref_my <= c->ymax);
3174
3175         ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
3176         ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
3177         ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
3178         if(s->ref_mvs[ref]){
3179             s->ref_mvs[ref][index][0]= ref_mx;
3180             s->ref_mvs[ref][index][1]= ref_my;
3181             s->ref_scores[ref][index]= ref_score;
3182         }
3183         if(score > ref_score){
3184             score= ref_score;
3185             best_ref= ref;
3186             mx= ref_mx;
3187             my= ref_my;
3188         }
3189     }
3190     //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
3191
3192   //  subpel search
3193     base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
3194     pc= s->c;
3195     pc.bytestream_start=
3196     pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
3197     memcpy(p_state, s->block_state, sizeof(s->block_state));
3198
3199     if(level!=s->block_max_depth)
3200         put_rac(&pc, &p_state[4 + s_context], 1);
3201     put_rac(&pc, &p_state[1 + left->type + top->type], 0);
3202     if(s->ref_frames > 1)
3203         put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
3204     pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
3205     put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
3206     put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
3207     p_len= pc.bytestream - pc.bytestream_start;
3208     score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
3209
3210     block_s= block_w*block_w;
3211     sum = pix_sum(current_data[0], stride, block_w);
3212     l= (sum + block_s/2)/block_s;
3213     iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
3214
3215     block_s= block_w*block_w>>2;
3216     sum = pix_sum(current_data[1], uvstride, block_w>>1);
3217     cb= (sum + block_s/2)/block_s;
3218 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
3219     sum = pix_sum(current_data[2], uvstride, block_w>>1);
3220     cr= (sum + block_s/2)/block_s;
3221 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
3222
3223     ic= s->c;
3224     ic.bytestream_start=
3225     ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
3226     memcpy(i_state, s->block_state, sizeof(s->block_state));
3227     if(level!=s->block_max_depth)
3228         put_rac(&ic, &i_state[4 + s_context], 1);
3229     put_rac(&ic, &i_state[1 + left->type + top->type], 1);
3230     put_symbol(&ic, &i_state[32],  l-pl , 1);
3231     put_symbol(&ic, &i_state[64], cb-pcb, 1);
3232     put_symbol(&ic, &i_state[96], cr-pcr, 1);
3233     i_len= ic.bytestream - ic.bytestream_start;
3234     iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
3235
3236 //    assert(score==256*256*256*64-1);
3237     assert(iscore < 255*255*256 + s->lambda2*10);
3238     assert(iscore >= 0);
3239     assert(l>=0 && l<=255);
3240     assert(pl>=0 && pl<=255);
3241
3242     if(level==0){
3243         int varc= iscore >> 8;
3244         int vard= score >> 8;
3245         if (vard <= 64 || vard < varc)
3246             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
3247         else
3248             c->scene_change_score+= s->m.qscale;
3249     }
3250
3251     if(level!=s->block_max_depth){
3252         put_rac(&s->c, &s->block_state[4 + s_context], 0);
3253         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
3254         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
3255         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
3256         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
3257         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
3258
3259         if(score2 < score && score2 < iscore)
3260             return score2;
3261     }
3262
3263     if(iscore < score){
3264         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3265         memcpy(pbbak, i_buffer, i_len);
3266         s->c= ic;
3267         s->c.bytestream_start= pbbak_start;
3268         s->c.bytestream= pbbak + i_len;
3269         set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
3270         memcpy(s->block_state, i_state, sizeof(s->block_state));
3271         return iscore;
3272     }else{
3273         memcpy(pbbak, p_buffer, p_len);
3274         s->c= pc;
3275         s->c.bytestream_start= pbbak_start;
3276         s->c.bytestream= pbbak + p_len;
3277         set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
3278         memcpy(s->block_state, p_state, sizeof(s->block_state));
3279         return score;
3280     }
3281 }
3282
3283 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
3284     const int w= s->b_width  << s->block_max_depth;
3285     const int rem_depth= s->block_max_depth - level;
3286     const int index= (x + y*w) << rem_depth;
3287     int trx= (x+1)<<rem_depth;
3288     BlockNode *b= &s->block[index];
3289     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
3290     const BlockNode *top   = y ? &s->block[index-w] : &null_block;
3291     const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
3292     const BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3293     int pl = left->color[0];
3294     int pcb= left->color[1];
3295     int pcr= left->color[2];
3296     int pmx, pmy;
3297     int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3298     int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
3299     int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
3300     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3301
3302     if(s->keyframe){
3303         set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3304         return;
3305     }
3306
3307     if(level!=s->block_max_depth){
3308         if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
3309             put_rac(&s->c, &s->block_state[4 + s_context], 1);
3310         }else{
3311             put_rac(&s->c, &s->block_state[4 + s_context], 0);
3312             encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
3313             encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
3314             encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
3315             encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
3316             return;
3317         }
3318     }
3319     if(b->type & BLOCK_INTRA){
3320         pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3321         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
3322         put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
3323         put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
3324         put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
3325         set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
3326     }else{
3327         pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
3328         put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
3329         if(s->ref_frames > 1)
3330             put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
3331         put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
3332         put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
3333         set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
3334     }
3335 }
3336
3337 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
3338     int i, x2, y2;
3339     Plane *p= &s->plane[plane_index];
3340     const int block_size = MB_SIZE >> s->block_max_depth;
3341     const int block_w    = plane_index ? block_size/2 : block_size;
3342     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3343     const int obmc_stride= plane_index ? block_size : 2*block_size;
3344     const int ref_stride= s->current_picture.linesize[plane_index];
3345     uint8_t *src= s-> input_picture.data[plane_index];
3346     IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
3347     const int b_stride = s->b_width << s->block_max_depth;
3348     const int w= p->width;
3349     const int h= p->height;
3350     int index= mb_x + mb_y*b_stride;
3351     BlockNode *b= &s->block[index];
3352     BlockNode backup= *b;
3353     int ab=0;
3354     int aa=0;
3355
3356     b->type|= BLOCK_INTRA;
3357     b->color[plane_index]= 0;
3358     memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
3359
3360     for(i=0; i<4; i++){
3361         int mb_x2= mb_x + (i &1) - 1;
3362         int mb_y2= mb_y + (i>>1) - 1;
3363         int x= block_w*mb_x2 + block_w/2;
3364         int y= block_w*mb_y2 + block_w/2;
3365
3366         add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
3367                     x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3368
3369         for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3370             for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3371                 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3372                 int obmc_v= obmc[index];
3373                 int d;
3374                 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3375                 if(x<0) obmc_v += obmc[index + block_w];
3376                 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3377                 if(x+block_w>w) obmc_v += obmc[index - block_w];
3378                 //FIXME precalculate this or simplify it somehow else
3379
3380                 d = -dst[index] + (1<<(FRAC_BITS-1));
3381                 dst[index] = d;
3382                 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3383                 aa += obmc_v * obmc_v; //FIXME precalculate this
3384             }
3385         }
3386     }
3387     *b= backup;
3388
3389     return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
3390 }
3391
3392 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3393     const int b_stride = s->b_width << s->block_max_depth;
3394     const int b_height = s->b_height<< s->block_max_depth;
3395     int index= x + y*b_stride;
3396     const BlockNode *b     = &s->block[index];
3397     const BlockNode *left  = x ? &s->block[index-1] : &null_block;
3398     const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
3399     const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
3400     const BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3401     int dmx, dmy;
3402 //  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3403 //  int my_context= av_log2(2*FFABS(left->my - top->my));
3404
3405     if(x<0 || x>=b_stride || y>=b_height)
3406         return 0;
3407 /*
3408 1            0      0
3409 01X          1-2    1
3410 001XX        3-6    2-3
3411 0001XXX      7-14   4-7
3412 00001XXXX   15-30   8-15
3413 */
3414 //FIXME try accurate rate
3415 //FIXME intra and inter predictors if surrounding blocks are not the same type
3416     if(b->type & BLOCK_INTRA){
3417         return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
3418                    + av_log2(2*FFABS(left->color[1] - b->color[1]))
3419                    + av_log2(2*FFABS(left->color[2] - b->color[2])));
3420     }else{
3421         pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3422         dmx-= b->mx;
3423         dmy-= b->my;
3424         return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
3425                     + av_log2(2*FFABS(dmy))
3426                     + av_log2(2*b->ref));
3427     }
3428 }
3429
3430 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3431     Plane *p= &s->plane[plane_index];
3432     const int block_size = MB_SIZE >> s->block_max_depth;
3433     const int block_w    = plane_index ? block_size/2 : block_size;
3434     const int obmc_stride= plane_index ? block_size : 2*block_size;
3435     const int ref_stride= s->current_picture.linesize[plane_index];
3436     uint8_t *dst= s->current_picture.data[plane_index];
3437     uint8_t *src= s->  input_picture.data[plane_index];
3438     IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3439     uint8_t *cur = s->scratchbuf;
3440     uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
3441     const int b_stride = s->b_width << s->block_max_depth;
3442     const int b_height = s->b_height<< s->block_max_depth;
3443     const int w= p->width;
3444     const int h= p->height;
3445     int distortion;
3446     int rate= 0;
3447     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3448     int sx= block_w*mb_x - block_w/2;
3449     int sy= block_w*mb_y - block_w/2;
3450     int x0= FFMAX(0,-sx);
3451     int y0= FFMAX(0,-sy);
3452     int x1= FFMIN(block_w*2, w-sx);
3453     int y1= FFMIN(block_w*2, h-sy);
3454     int i,x,y;
3455
3456     pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3457
3458     for(y=y0; y<y1; y++){
3459         const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3460         const IDWTELEM *pred1 = pred + y*obmc_stride;
3461         uint8_t *cur1 = cur + y*ref_stride;
3462         uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3463         for(x=x0; x<x1; x++){
3464 #if FRAC_BITS >= LOG2_OBMC_MAX
3465             int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3466 #else
3467             int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
3468 #endif
3469             v = (v + pred1[x]) >> FRAC_BITS;
3470             if(v&(~255)) v= ~(v>>31);
3471             dst1[x] = v;
3472         }
3473     }
3474
3475     /* copy the regions where obmc[] = (uint8_t)256 */
3476     if(LOG2_OBMC_MAX == 8
3477         && (mb_x == 0 || mb_x == b_stride-1)
3478         && (mb_y == 0 || mb_y == b_height-1)){
3479         if(mb_x == 0)
3480             x1 = block_w;
3481         else
3482             x0 = block_w;
3483         if(mb_y == 0)
3484             y1 = block_w;
3485         else
3486             y0 = block_w;
3487         for(y=y0; y<y1; y++)
3488             memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3489     }
3490
3491     if(block_w==16){
3492         /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3493         /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3494         /* FIXME cmps overlap but do not cover the wavelet's whole support.
3495          * So improving the score of one block is not strictly guaranteed
3496          * to improve the score of the whole frame, thus iterative motion
3497          * estimation does not always converge. */
3498         if(s->avctx->me_cmp == FF_CMP_W97)
3499             distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3500         else if(s->avctx->me_cmp == FF_CMP_W53)
3501             distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3502         else{
3503             distortion = 0;
3504             for(i=0; i<4; i++){
3505                 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3506                 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3507             }
3508         }
3509     }else{
3510         assert(block_w==8);
3511         distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3512     }
3513
3514     if(plane_index==0){
3515         for(i=0; i<4; i++){
3516 /* ..RRr
3517  * .RXx.
3518  * rxx..
3519  */
3520             rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3521         }
3522         if(mb_x == b_stride-2)
3523             rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3524     }
3525     return distortion + rate*penalty_factor;
3526 }
3527
3528 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3529     int i, y2;
3530     Plane *p= &s->plane[plane_index];
3531     const int block_size = MB_SIZE >> s->block_max_depth;
3532     const int block_w    = plane_index ? block_size/2 : block_size;
3533     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3534     const int obmc_stride= plane_index ? block_size : 2*block_size;
3535     const int ref_stride= s->current_picture.linesize[plane_index];
3536     uint8_t *dst= s->current_picture.data[plane_index];
3537     uint8_t *src= s-> input_picture.data[plane_index];
3538     //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
3539     // const has only been removed from zero_dst to suppress a warning
3540     static IDWTELEM zero_dst[4096]; //FIXME
3541     const int b_stride = s->b_width << s->block_max_depth;
3542     const int w= p->width;
3543     const int h= p->height;
3544     int distortion= 0;
3545     int rate= 0;
3546     const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3547
3548     for(i=0; i<9; i++){
3549         int mb_x2= mb_x + (i%3) - 1;
3550         int mb_y2= mb_y + (i/3) - 1;
3551         int x= block_w*mb_x2 + block_w/2;
3552         int y= block_w*mb_y2 + block_w/2;
3553
3554         add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3555                    x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3556
3557         //FIXME find a cleaner/simpler way to skip the outside stuff
3558         for(y2= y; y2<0; y2++)
3559             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3560         for(y2= h; y2<y+block_w; y2++)
3561             memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3562         if(x<0){
3563             for(y2= y; y2<y+block_w; y2++)
3564                 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3565         }
3566         if(x+block_w > w){
3567             for(y2= y; y2<y+block_w; y2++)
3568                 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3569         }
3570
3571         assert(block_w== 8 || block_w==16);
3572         distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3573     }
3574
3575     if(plane_index==0){
3576         BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3577         int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3578
3579 /* ..RRRr
3580  * .RXXx.
3581  * .RXXx.
3582  * rxxx.
3583  */
3584         if(merged)
3585             rate = get_block_bits(s, mb_x, mb_y, 2);
3586         for(i=merged?4:0; i<9; i++){
3587             static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3588             rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3589         }
3590     }
3591     return distortion + rate*penalty_factor;
3592 }
3593
3594 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3595     int level;
3596     for(level=decomposition_count-1; level>=0; level--){
3597         switch(type){
3598         case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
3599         case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
3600         }
3601     }
3602 }
3603
3604 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
3605     const int support = type==1 ? 3 : 5;
3606     int level;
3607     if(type==2) return;
3608
3609     for(level=decomposition_count-1; level>=0; level--){
3610         while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
3611             switch(type){
3612             case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3613                 break;
3614             case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3615                 break;
3616             }
3617         }
3618     }
3619 }
3620
3621 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3622         DWTCompose cs[MAX_DECOMPOSITIONS];
3623         int y;
3624         ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
3625         for(y=0; y<height; y+=4)
3626             ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
3627 }
3628
3629 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3630     const int w= b->width;
3631     const int h= b->height;
3632     int x, y;
3633
3634     if(1){
3635         int run=0;
3636         int runs[w*h];
3637         int run_index=0;
3638         int max_index;
3639
3640         for(y=0; y<h; y++){
3641             for(x=0; x<w; x++){
3642                 int v, p=0;
3643                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3644                 v= src[x + y*stride];
3645
3646                 if(y){
3647                     t= src[x + (y-1)*stride];
3648                     if(x){
3649                         lt= src[x - 1 + (y-1)*stride];
3650                     }
3651                     if(x + 1 < w){
3652                         rt= src[x + 1 + (y-1)*stride];
3653                     }
3654                 }
3655                 if(x){
3656                     l= src[x - 1 + y*stride];
3657                     /*if(x > 1){
3658                         if(orientation==1) ll= src[y + (x-2)*stride];
3659                         else               ll= src[x - 2 + y*stride];
3660                     }*/
3661                 }
3662                 if(parent){
3663                     int px= x>>1;
3664                     int py= y>>1;
3665                     if(px<b->parent->width && py<b->parent->height)
3666                         p= parent[px + py*2*stride];
3667                 }
3668                 if(!(/*ll|*/l|lt|t|rt|p)){
3669                     if(v){
3670                         runs[run_index++]= run;
3671                         run=0;
3672                     }else{
3673                         run++;
3674                     }
3675                 }
3676             }
3677         }
3678         max_index= run_index;
3679         runs[run_index++]= run;
3680         run_index=0;
3681         run= runs[run_index++];
3682
3683         put_symbol2(&s->c, b->state[30], max_index, 0);
3684         if(run_index <= max_index)
3685             put_symbol2(&s->c, b->state[1], run, 3);
3686
3687         for(y=0; y<h; y++){
3688             if(s->c.bytestream_end - s->c.bytestream < w*40){
3689                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3690                 return -1;
3691             }
3692             for(x=0; x<w; x++){
3693                 int v, p=0;
3694                 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3695                 v= src[x + y*stride];
3696
3697                 if(y){
3698                     t= src[x + (y-1)*stride];
3699                     if(x){
3700                         lt= src[x - 1 + (y-1)*stride];
3701                     }
3702                     if(x + 1 < w){
3703                         rt= src[x + 1 + (y-1)*stride];
3704                     }
3705                 }
3706                 if(x){
3707                     l= src[x - 1 + y*stride];
3708                     /*if(x > 1){
3709                         if(orientation==1) ll= src[y + (x-2)*stride];
3710                         else               ll= src[x - 2 + y*stride];
3711                     }*/
3712                 }
3713                 if(parent){
3714                     int px= x>>1;
3715                     int py= y>>1;
3716                     if(px<b->parent->width && py<b->parent->height)
3717                         p= parent[px + py*2*stride];
3718                 }
3719                 if(/*ll|*/l|lt|t|rt|p){
3720                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3721
3722                     put_rac(&s->c, &b->state[0][context], !!v);
3723                 }else{
3724                     if(!run){
3725                         run= runs[run_index++];
3726
3727                         if(run_index <= max_index)
3728                             put_symbol2(&s->c, b->state[1], run, 3);
3729                         assert(v);
3730                     }else{
3731                         run--;
3732                         assert(!v);
3733                     }
3734                 }
3735                 if(v){
3736                     int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3737                     int l2= 2*FFABS(l) + (l<0);
3738                     int t2= 2*FFABS(t) + (t<0);
3739
3740                     put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3741                     put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3742                 }
3743             }
3744         }
3745     }
3746     return 0;
3747 }
3748
3749 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3750 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
3751 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
3752     return encode_subband_c0run(s, b, src, parent, stride, orientation);
3753 //    encode_subband_dzr(s, b, src, parent, stride, orientation);
3754 }
3755
3756 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3757     const int b_stride= s->b_width << s->block_max_depth;
3758     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3759     BlockNode backup= *block;
3760     int rd, index, value;
3761
3762     assert(mb_x>=0 && mb_y>=0);
3763     assert(mb_x<b_stride);
3764
3765     if(intra){
3766         block->color[0] = p[0];
3767         block->color[1] = p[1];
3768         block->color[2] = p[2];
3769         block->type |= BLOCK_INTRA;
3770     }else{
3771         index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3772         value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3773         if(s->me_cache[index] == value)
3774             return 0;
3775         s->me_cache[index]= value;
3776
3777         block->mx= p[0];
3778         block->my= p[1];
3779         block->type &= ~BLOCK_INTRA;
3780     }
3781
3782     rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3783
3784 //FIXME chroma
3785     if(rd < *best_rd){
3786         *best_rd= rd;
3787         return 1;
3788     }else{
3789         *block= backup;
3790         return 0;
3791     }
3792 }
3793
3794 /* special case for int[2] args we discard afterwards,
3795  * fixes compilation problem with gcc 2.95 */
3796 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3797     int p[2] = {p0, p1};
3798     return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3799 }
3800
3801 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3802     const int b_stride= s->b_width << s->block_max_depth;
3803     BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3804     BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3805     int rd, index, value;
3806
3807     assert(mb_x>=0 && mb_y>=0);
3808     assert(mb_x<b_stride);
3809     assert(((mb_x|mb_y)&1) == 0);
3810
3811     index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3812     value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3813     if(s->me_cache[index] == value)
3814         return 0;
3815     s->me_cache[index]= value;
3816
3817     block->mx= p0;
3818     block->my= p1;
3819     block->ref= ref;
3820     block->type &= ~BLOCK_INTRA;
3821     block[1]= block[b_stride]= block[b_stride+1]= *block;
3822
3823     rd= get_4block_rd(s, mb_x, mb_y, 0);
3824
3825 //FIXME chroma
3826     if(rd < *best_rd){
3827         *best_rd= rd;
3828         return 1;
3829     }else{
3830         block[0]= backup[0];
3831         block[1]= backup[1];
3832         block[b_stride]= backup[2];
3833         block[b_stride+1]= backup[3];
3834         return 0;
3835     }
3836 }
3837
3838 static void iterative_me(SnowContext *s){
3839     int pass, mb_x, mb_y;
3840     const int b_width = s->b_width  << s->block_max_depth;
3841     const int b_height= s->b_height << s->block_max_depth;
3842     const int b_stride= b_width;
3843     int color[3];
3844
3845     {
3846         RangeCoder r = s->c;
3847         uint8_t state[sizeof(s->block_state)];
3848         memcpy(state, s->block_state, sizeof(s->block_state));
3849         for(mb_y= 0; mb_y<s->b_height; mb_y++)
3850             for(mb_x= 0; mb_x<s->b_width; mb_x++)
3851                 encode_q_branch(s, 0, mb_x, mb_y);
3852         s->c = r;
3853         memcpy(s->block_state, state, sizeof(s->block_state));
3854     }
3855
3856     for(pass=0; pass<25; pass++){
3857         int change= 0;
3858
3859         for(mb_y= 0; mb_y<b_height; mb_y++){
3860             for(mb_x= 0; mb_x<b_width; mb_x++){
3861                 int dia_change, i, j, ref;
3862                 int best_rd= INT_MAX, ref_rd;
3863                 BlockNode backup, ref_b;
3864                 const int index= mb_x + mb_y * b_stride;
3865                 BlockNode *block= &s->block[index];
3866                 BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
3867                 BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
3868                 BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : NULL;
3869                 BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : NULL;
3870                 BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
3871                 BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : NULL;
3872                 BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3873                 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3874                 const int b_w= (MB_SIZE >> s->block_max_depth);
3875                 uint8_t obmc_edged[b_w*2][b_w*2];
3876
3877                 if(pass && (block->type & BLOCK_OPT))
3878                     continue;
3879                 block->type |= BLOCK_OPT;
3880
3881                 backup= *block;
3882
3883                 if(!s->me_cache_generation)
3884                     memset(s->me_cache, 0, sizeof(s->me_cache));
3885                 s->me_cache_generation += 1<<22;
3886
3887                 //FIXME precalculate
3888                 {
3889                     int x, y;
3890                     memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3891                     if(mb_x==0)
3892                         for(y=0; y<b_w*2; y++)
3893                             memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3894                     if(mb_x==b_stride-1)
3895                         for(y=0; y<b_w*2; y++)
3896                             memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3897                     if(mb_y==0){
3898                         for(x=0; x<b_w*2; x++)
3899                             obmc_edged[0][x] += obmc_edged[b_w-1][x];
3900                         for(y=1; y<b_w; y++)
3901                             memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3902                     }
3903                     if(mb_y==b_height-1){
3904                         for(x=0; x<b_w*2; x++)
3905                             obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3906                         for(y=b_w; y<b_w*2-1; y++)
3907                             memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3908                     }
3909                 }
3910
3911                 //skip stuff outside the picture
3912                 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3913                     uint8_t *src= s->  input_picture.data[0];
3914                     uint8_t *dst= s->current_picture.data[0];
3915                     const int stride= s->current_picture.linesize[0];
3916                     const int block_w= MB_SIZE >> s->block_max_depth;
3917                     const int sx= block_w*mb_x - block_w/2;
3918                     const int sy= block_w*mb_y - block_w/2;
3919                     const int w= s->plane[0].width;
3920                     const int h= s->plane[0].height;
3921                     int y;
3922
3923                     for(y=sy; y<0; y++)
3924                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3925                     for(y=h; y<sy+block_w*2; y++)
3926                         memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3927                     if(sx<0){
3928                         for(y=sy; y<sy+block_w*2; y++)
3929                             memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3930                     }
3931                     if(sx+block_w*2 > w){
3932                         for(y=sy; y<sy+block_w*2; y++)
3933                             memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3934                     }
3935                 }
3936
3937                 // intra(black) = neighbors' contribution to the current block
3938                 for(i=0; i<3; i++)
3939                     color[i]= get_dc(s, mb_x, mb_y, i);
3940
3941                 // get previous score (cannot be cached due to OBMC)
3942                 if(pass > 0 && (block->type&BLOCK_INTRA)){
3943                     int color0[3]= {block->color[0], block->color[1], block->color[2]};
3944                     check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3945                 }else
3946                     check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3947
3948                 ref_b= *block;
3949                 ref_rd= best_rd;
3950                 for(ref=0; ref < s->ref_frames; ref++){
3951                     int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3952                     if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3953                         continue;
3954                     block->ref= ref;
3955                     best_rd= INT_MAX;
3956
3957                     check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3958                     check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3959                     if(tb)
3960                         check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3961                     if(lb)
3962                         check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3963                     if(rb)
3964                         check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3965                     if(bb)
3966                         check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3967
3968                     /* fullpel ME */
3969                     //FIXME avoid subpel interpolation / round to nearest integer
3970                     do{
3971                         dia_change=0;
3972                         for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3973                             for(j=0; j<i; j++){
3974                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3975                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3976                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3977                                 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3978                             }
3979                         }
3980                     }while(dia_change);
3981                     /* subpel ME */
3982                     do{
3983                         static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3984                         dia_change=0;
3985                         for(i=0; i<8; i++)
3986                             dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3987                     }while(dia_change);
3988                     //FIXME or try the standard 2 pass qpel or similar
3989
3990                     mvr[0][0]= block->mx;
3991                     mvr[0][1]= block->my;
3992                     if(ref_rd > best_rd){
3993                         ref_rd= best_rd;
3994                         ref_b= *block;
3995                     }
3996                 }
3997                 best_rd= ref_rd;
3998                 *block= ref_b;
3999 #if 1
4000                 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
4001                 //FIXME RD style color selection
4002 #endif
4003                 if(!same_block(block, &backup)){
4004                     if(tb ) tb ->type &= ~BLOCK_OPT;
4005                     if(lb ) lb ->type &= ~BLOCK_OPT;
4006                     if(rb ) rb ->type &= ~BLOCK_OPT;
4007                     if(bb ) bb ->type &= ~BLOCK_OPT;
4008                     if(tlb) tlb->type &= ~BLOCK_OPT;
4009                     if(trb) trb->type &= ~BLOCK_OPT;
4010                     if(blb) blb->type &= ~BLOCK_OPT;
4011                     if(brb) brb->type &= ~BLOCK_OPT;
4012                     change ++;
4013                 }
4014             }
4015         }
4016         av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
4017         if(!change)
4018             break;
4019     }
4020
4021     if(s->block_max_depth == 1){
4022         int change= 0;
4023         for(mb_y= 0; mb_y<b_height; mb_y+=2){
4024             for(mb_x= 0; mb_x<b_width; mb_x+=2){
4025                 int i;
4026                 int best_rd, init_rd;
4027                 const int index= mb_x + mb_y * b_stride;
4028                 BlockNode *b[4];
4029
4030                 b[0]= &s->block[index];
4031                 b[1]= b[0]+1;
4032                 b[2]= b[0]+b_stride;
4033                 b[3]= b[2]+1;
4034                 if(same_block(b[0], b[1]) &&
4035                    same_block(b[0], b[2]) &&
4036                    same_block(b[0], b[3]))
4037                     continue;
4038
4039                 if(!s->me_cache_generation)
4040                     memset(s->me_cache, 0, sizeof(s->me_cache));
4041                 s->me_cache_generation += 1<<22;
4042
4043                 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
4044
4045                 //FIXME more multiref search?
4046                 check_4block_inter(s, mb_x, mb_y,
4047                                    (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
4048                                    (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
4049
4050                 for(i=0; i<4; i++)
4051                     if(!(b[i]->type&BLOCK_INTRA))
4052                         check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
4053
4054                 if(init_rd != best_rd)
4055                     change++;
4056             }
4057         }
4058         av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
4059     }
4060 }
4061
4062 static void encode_blocks(SnowContext *s, int search){
4063     int x, y;
4064     int w= s->b_width;
4065     int h= s->b_height;
4066
4067     if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
4068         iterative_me(s);
4069
4070     for(y=0; y<h; y++){
4071         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
4072             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4073             return;
4074         }
4075         for(x=0; x<w; x++){
4076             if(s->avctx->me_method == ME_ITER || !search)
4077                 encode_q_branch2(s, 0, x, y);
4078             else
4079                 encode_q_branch (s, 0, x, y);
4080         }
4081     }
4082 }
4083
4084 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
4085     const int w= b->width;
4086     const int h= b->height;
4087     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4088     const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
4089     int x,y, thres1, thres2;
4090
4091     if(s->qlog == LOSSLESS_QLOG){
4092         for(y=0; y<h; y++)
4093             for(x=0; x<w; x++)
4094                 dst[x + y*stride]= src[x + y*stride];
4095         return;
4096     }
4097
4098     bias= bias ? 0 : (3*qmul)>>3;
4099     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
4100     thres2= 2*thres1;
4101
4102     if(!bias){
4103         for(y=0; y<h; y++){
4104             for(x=0; x<w; x++){
4105                 int i= src[x + y*stride];
4106
4107                 if((unsigned)(i+thres1) > thres2){
4108                     if(i>=0){
4109                         i<<= QEXPSHIFT;
4110                         i/= qmul; //FIXME optimize
4111                         dst[x + y*stride]=  i;
4112                     }else{
4113                         i= -i;
4114                         i<<= QEXPSHIFT;
4115                         i/= qmul; //FIXME optimize
4116                         dst[x + y*stride]= -i;
4117                     }
4118                 }else
4119                     dst[x + y*stride]= 0;
4120             }
4121         }
4122     }else{
4123         for(y=0; y<h; y++){
4124             for(x=0; x<w; x++){
4125                 int i= src[x + y*stride];
4126
4127                 if((unsigned)(i+thres1) > thres2){
4128                     if(i>=0){
4129                         i<<= QEXPSHIFT;
4130                         i= (i + bias) / qmul; //FIXME optimize
4131                         dst[x + y*stride]=  i;
4132                     }else{
4133                         i= -i;
4134                         i<<= QEXPSHIFT;
4135                         i= (i + bias) / qmul; //FIXME optimize
4136                         dst[x + y*stride]= -i;
4137                     }
4138                 }else
4139                     dst[x + y*stride]= 0;
4140             }
4141         }
4142     }
4143 }
4144
4145 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
4146     const int w= b->width;
4147     const int h= b->height;
4148     const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4149     const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4150     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
4151     int x,y;
4152
4153     if(s->qlog == LOSSLESS_QLOG) return;
4154
4155     for(y=0; y<h; y++){
4156         for(x=0; x<w; x++){
4157             int i= src[x + y*stride];
4158             if(i<0){
4159                 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
4160             }else if(i>0){
4161                 src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
4162             }
4163         }
4164     }
4165 }
4166
4167 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4168     const int w= b->width;
4169     const int h= b->height;
4170     int x,y;
4171
4172     for(y=h-1; y>=0; y--){
4173         for(x=w-1; x>=0; x--){
4174             int i= x + y*stride;
4175
4176             if(x){
4177                 if(use_median){
4178                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4179                     else  src[i] -= src[i - 1];
4180                 }else{
4181                     if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4182                     else  src[i] -= src[i - 1];
4183                 }
4184             }else{
4185                 if(y) src[i] -= src[i - stride];
4186             }
4187         }
4188     }
4189 }
4190
4191 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4192     const int w= b->width;
4193     const int h= b->height;
4194     int x,y;
4195
4196     for(y=0; y<h; y++){
4197         for(x=0; x<w; x++){
4198             int i= x + y*stride;
4199
4200             if(x){
4201                 if(use_median){
4202                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4203                     else  src[i] += src[i - 1];
4204                 }else{
4205                     if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4206                     else  src[i] += src[i - 1];
4207                 }
4208             }else{
4209                 if(y) src[i] += src[i - stride];
4210             }
4211         }
4212     }
4213 }
4214
4215 static void encode_qlogs(SnowContext *s){
4216     int plane_index, level, orientation;
4217
4218     for(plane_index=0; plane_index<2; plane_index++){
4219         for(level=0; level<s->spatial_decomposition_count; level++){
4220             for(orientation=level ? 1:0; orientation<4; orientation++){
4221                 if(orientation==2) continue;
4222                 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
4223             }
4224         }
4225     }
4226 }
4227
4228 static void encode_header(SnowContext *s){
4229     int plane_index, i;
4230     uint8_t kstate[32];
4231
4232     memset(kstate, MID_STATE, sizeof(kstate));
4233
4234     put_rac(&s->c, kstate, s->keyframe);
4235     if(s->keyframe || s->always_reset){
4236         reset_contexts(s);
4237         s->last_spatial_decomposition_type=
4238         s->last_qlog=
4239         s->last_qbias=
4240         s->last_mv_scale=
4241         s->last_block_max_depth= 0;
4242         for(plane_index=0; plane_index<2; plane_index++){
4243             Plane *p= &s->plane[plane_index];
4244             p->last_htaps=0;
4245             p->last_diag_mc=0;
4246             memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
4247         }
4248     }
4249     if(s->keyframe){
4250         put_symbol(&s->c, s->header_state, s->version, 0);
4251         put_rac(&s->c, s->header_state, s->always_reset);
4252         put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
4253         put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
4254         put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4255         put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
4256         put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
4257         put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
4258         put_rac(&s->c, s->header_state, s->spatial_scalability);
4259 //        put_rac(&s->c, s->header_state, s->rate_scalability);
4260         put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
4261
4262         encode_qlogs(s);
4263     }
4264
4265     if(!s->keyframe){
4266         int update_mc=0;
4267         for(plane_index=0; plane_index<2; plane_index++){
4268             Plane *p= &s->plane[plane_index];
4269             update_mc |= p->last_htaps   != p->htaps;
4270             update_mc |= p->last_diag_mc != p->diag_mc;
4271             update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4272         }
4273         put_rac(&s->c, s->header_state, update_mc);
4274         if(update_mc){
4275             for(plane_index=0; plane_index<2; plane_index++){
4276                 Plane *p= &s->plane[plane_index];
4277                 put_rac(&s->c, s->header_state, p->diag_mc);
4278                 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
4279                 for(i= p->htaps/2; i; i--)
4280                     put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
4281             }
4282         }
4283         if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4284             put_rac(&s->c, s->header_state, 1);
4285             put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4286             encode_qlogs(s);
4287         }else
4288             put_rac(&s->c, s->header_state, 0);
4289     }
4290
4291     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
4292     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
4293     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
4294     put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
4295     put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
4296
4297 }
4298
4299 static void update_last_header_values(SnowContext *s){
4300     int plane_index;
4301
4302     if(!s->keyframe){
4303         for(plane_index=0; plane_index<2; plane_index++){
4304             Plane *p= &s->plane[plane_index];
4305             p->last_diag_mc= p->diag_mc;
4306             p->last_htaps  = p->htaps;
4307             memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4308         }
4309     }
4310
4311     s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
4312     s->last_qlog                        = s->qlog;
4313     s->last_qbias                       = s->qbias;
4314     s->last_mv_scale                    = s->mv_scale;
4315     s->last_block_max_depth             = s->block_max_depth;
4316     s->last_spatial_decomposition_count = s->spatial_decomposition_count;
4317 }
4318
4319 static int qscale2qlog(int qscale){
4320     return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
4321            + 61*QROOT/8; //<64 >60
4322 }
4323
4324 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
4325 {
4326     /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
4327      * FIXME we know exact mv bits at this point,
4328      * but ratecontrol isn't set up to include them. */
4329     uint32_t coef_sum= 0;
4330     int level, orientation, delta_qlog;
4331
4332     for(level=0; level<s->spatial_decomposition_count; level++){
4333         for(orientation=level ? 1 : 0; orientation<4; orientation++){
4334             SubBand *b= &s->plane[0].band[level][orientation];
4335             IDWTELEM *buf= b->ibuf;
4336             const int w= b->width;
4337             const int h= b->height;
4338             const int stride= b->stride;
4339             const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
4340             const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4341             const int qdiv= (1<<16)/qmul;
4342             int x, y;
4343             //FIXME this is ugly
4344             for(y=0; y<h; y++)
4345                 for(x=0; x<w; x++)
4346                     buf[x+y*stride]= b->buf[x+y*stride];
4347             if(orientation==0)
4348                 decorrelate(s, b, buf, stride, 1, 0);
4349             for(y=0; y<h; y++)
4350                 for(x=0; x<w; x++)
4351                     coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
4352         }
4353     }
4354
4355     /* ugly, ratecontrol just takes a sqrt again */
4356     coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
4357     assert(coef_sum < INT_MAX);
4358
4359     if(pict->pict_type == FF_I_TYPE){
4360         s->m.current_picture.mb_var_sum= coef_sum;
4361         s->m.current_picture.mc_mb_var_sum= 0;
4362     }else{
4363         s->m.current_picture.mc_mb_var_sum= coef_sum;
4364         s->m.current_picture.mb_var_sum= 0;
4365     }
4366
4367     pict->quality= ff_rate_estimate_qscale(&s->m, 1);
4368     if (pict->quality < 0)
4369         return INT_MIN;
4370     s->lambda= pict->quality * 3/2;
4371     delta_qlog= qscale2qlog(pict->quality) - s->qlog;
4372     s->qlog+= delta_qlog;
4373     return delta_qlog;
4374 }
4375
4376 static void calculate_visual_weight(SnowContext *s, Plane *p){
4377     int width = p->width;
4378     int height= p->height;
4379     int level, orientation, x, y;
4380
4381     for(level=0; level<s->spatial_decomposition_count; level++){
4382         for(orientation=level ? 1 : 0; orientation<4; orientation++){
4383             SubBand *b= &p->band[level][orientation];
4384             IDWTELEM *ibuf= b->ibuf;
4385             int64_t error=0;
4386
4387             memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
4388             ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
4389             ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
4390             for(y=0; y<height; y++){
4391                 for(x=0; x<width; x++){
4392                     int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
4393                     error += d*d;
4394                 }
4395             }
4396
4397             b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
4398         }
4399     }
4400 }
4401
4402 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4403     SnowContext *s = avctx->priv_data;
4404     RangeCoder * const c= &s->c;
4405     AVFrame *pict = data;
4406     const int width= s->avctx->width;
4407     const int height= s->avctx->height;
4408     int level, orientation, plane_index, i, y;
4409     uint8_t rc_header_bak[sizeof(s->header_state)];
4410     uint8_t rc_block_bak[sizeof(s->block_state)];
4411
4412     ff_init_range_encoder(c, buf, buf_size);
4413     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4414
4415     for(i=0; i<3; i++){
4416         int shift= !!i;
4417         for(y=0; y<(height>>shift); y++)
4418             memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4419                    &pict->data[i][y * pict->linesize[i]],
4420                    width>>shift);
4421     }
4422     s->new_picture = *pict;
4423
4424     s->m.picture_number= avctx->frame_number;
4425     if(avctx->flags&CODEC_FLAG_PASS2){
4426         s->m.pict_type =
4427         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4428         s->keyframe= pict->pict_type==FF_I_TYPE;
4429         if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4430             pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4431             if (pict->quality < 0)
4432                 return -1;
4433         }
4434     }else{
4435         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4436         s->m.pict_type=
4437         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4438     }
4439
4440     if(s->pass1_rc && avctx->frame_number == 0)
4441         pict->quality= 2*FF_QP2LAMBDA;
4442     if(pict->quality){
4443         s->qlog= qscale2qlog(pict->quality);
4444         s->lambda = pict->quality * 3/2;
4445     }
4446     if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4447         s->qlog= LOSSLESS_QLOG;
4448         s->lambda = 0;
4449     }//else keep previous frame's qlog until after motion estimation
4450
4451     frame_start(s);
4452
4453     s->m.current_picture_ptr= &s->m.current_picture;
4454     if(pict->pict_type == FF_P_TYPE){
4455         int block_width = (width +15)>>4;
4456         int block_height= (height+15)>>4;
4457         int stride= s->current_picture.linesize[0];
4458
4459         assert(s->current_picture.data[0]);
4460         assert(s->last_picture[0].data[0]);
4461
4462         s->m.avctx= s->avctx;
4463         s->m.current_picture.data[0]= s->current_picture.data[0];
4464         s->m.   last_picture.data[0]= s->last_picture[0].data[0];
4465         s->m.    new_picture.data[0]= s->  input_picture.data[0];
4466         s->m.   last_picture_ptr= &s->m.   last_picture;
4467         s->m.linesize=
4468         s->m.   last_picture.linesize[0]=
4469         s->m.    new_picture.linesize[0]=
4470         s->m.current_picture.linesize[0]= stride;
4471         s->m.uvlinesize= s->current_picture.linesize[1];
4472         s->m.width = width;
4473         s->m.height= height;
4474         s->m.mb_width = block_width;
4475         s->m.mb_height= block_height;
4476         s->m.mb_stride=   s->m.mb_width+1;
4477         s->m.b8_stride= 2*s->m.mb_width+1;
4478         s->m.f_code=1;
4479         s->m.pict_type= pict->pict_type;
4480         s->m.me_method= s->avctx->me_method;
4481         s->m.me.scene_change_score=0;
4482         s->m.flags= s->avctx->flags;
4483         s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4484         s->m.out_format= FMT_H263;
4485         s->m.unrestricted_mv= 1;
4486
4487         s->m.lambda = s->lambda;
4488         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4489         s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4490
4491         s->m.dsp= s->dsp; //move
4492         ff_init_me(&s->m);
4493         s->dsp= s->m.dsp;
4494     }
4495
4496     if(s->pass1_rc){
4497         memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4498         memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4499     }
4500
4501 redo_frame:
4502
4503     if(pict->pict_type == FF_I_TYPE)
4504         s->spatial_decomposition_count= 5;
4505     else
4506         s->spatial_decomposition_count= 5;
4507
4508     s->m.pict_type = pict->pict_type;
4509     s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4510
4511     common_init_after_header(avctx);
4512
4513     if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4514         for(plane_index=0; plane_index<3; plane_index++){
4515             calculate_visual_weight(s, &s->plane[plane_index]);
4516         }
4517     }
4518
4519     encode_header(s);
4520     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4521     encode_blocks(s, 1);
4522     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4523
4524     for(plane_index=0; plane_index<3; plane_index++){
4525         Plane *p= &s->plane[plane_index];
4526         int w= p->width;
4527         int h= p->height;
4528         int x, y;
4529 //        int bits= put_bits_count(&s->c.pb);
4530
4531         if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4532             //FIXME optimize
4533             if(pict->data[plane_index]) //FIXME gray hack
4534                 for(y=0; y<h; y++){
4535                     for(x=0; x<w; x++){
4536                         s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4537                     }
4538                 }
4539             predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4540
4541             if(   plane_index==0
4542                && pict->pict_type == FF_P_TYPE
4543                && !(avctx->flags&CODEC_FLAG_PASS2)
4544                && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4545                 ff_init_range_encoder(c, buf, buf_size);
4546                 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4547                 pict->pict_type= FF_I_TYPE;
4548                 s->keyframe=1;
4549                 s->current_picture.key_frame=1;
4550                 goto redo_frame;
4551             }
4552
4553             if(s->qlog == LOSSLESS_QLOG){
4554                 for(y=0; y<h; y++){
4555                     for(x=0; x<w; x++){
4556                         s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4557                     }
4558                 }
4559             }else{
4560                 for(y=0; y<h; y++){
4561                     for(x=0; x<w; x++){
4562                         s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4563                     }
4564                 }
4565             }
4566
4567             /*  if(QUANTIZE2)
4568                 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4569             else*/
4570                 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4571
4572             if(s->pass1_rc && plane_index==0){
4573                 int delta_qlog = ratecontrol_1pass(s, pict);
4574                 if (delta_qlog <= INT_MIN)
4575                     return -1;
4576                 if(delta_qlog){
4577                     //reordering qlog in the bitstream would eliminate this reset
4578                     ff_init_range_encoder(c, buf, buf_size);
4579                     memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4580                     memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4581                     encode_header(s);
4582                     encode_blocks(s, 0);
4583                 }
4584             }
4585
4586             for(level=0; level<s->spatial_decomposition_count; level++){
4587                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4588                     SubBand *b= &p->band[level][orientation];
4589
4590                     if(!QUANTIZE2)
4591                         quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4592                     if(orientation==0)
4593                         decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4594                     encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4595                     assert(b->parent==NULL || b->parent->stride == b->stride*2);
4596                     if(orientation==0)
4597                         correlate(s, b, b->ibuf, b->stride, 1, 0);
4598                 }
4599             }
4600
4601             for(level=0; level<s->spatial_decomposition_count; level++){
4602                 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4603                     SubBand *b= &p->band[level][orientation];
4604
4605                     dequantize(s, b, b->ibuf, b->stride);
4606                 }
4607             }
4608
4609             ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4610             if(s->qlog == LOSSLESS_QLOG){
4611                 for(y=0; y<h; y++){
4612                     for(x=0; x<w; x++){
4613                         s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4614                     }
4615                 }
4616             }
4617             predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4618         }else{
4619             //ME/MC only
4620             if(pict->pict_type == FF_I_TYPE){
4621                 for(y=0; y<h; y++){
4622                     for(x=0; x<w; x++){
4623                         s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4624                             pict->data[plane_index][y*pict->linesize[plane_index] + x];
4625                     }
4626                 }
4627             }else{
4628                 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4629                 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4630             }
4631         }
4632         if(s->avctx->flags&CODEC_FLAG_PSNR){
4633             int64_t error= 0;
4634
4635             if(pict->data[plane_index]) //FIXME gray hack
4636                 for(y=0; y<h; y++){
4637                     for(x=0; x<w; x++){
4638                         int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4639                         error += d*d;
4640                     }
4641                 }
4642             s->avctx->error[plane_index] += error;
4643             s->current_picture.error[plane_index] = error;
4644         }
4645
4646     }
4647
4648     update_last_header_values(s);
4649
4650     release_buffer(avctx);
4651
4652     s->current_picture.coded_picture_number = avctx->frame_number;
4653     s->current_picture.pict_type = pict->pict_type;
4654     s->current_picture.quality = pict->quality;
4655     s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4656     s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4657     s->m.current_picture.display_picture_number =
4658     s->m.current_picture.coded_picture_number = avctx->frame_number;
4659     s->m.current_picture.quality = pict->quality;
4660     s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4661     if(s->pass1_rc)
4662         if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4663             return -1;
4664     if(avctx->flags&CODEC_FLAG_PASS1)
4665         ff_write_pass1_stats(&s->m);
4666     s->m.last_pict_type = s->m.pict_type;
4667     avctx->frame_bits = s->m.frame_bits;
4668     avctx->mv_bits = s->m.mv_bits;
4669     avctx->misc_bits = s->m.misc_bits;
4670     avctx->p_tex_bits = s->m.p_tex_bits;
4671
4672     emms_c();
4673
4674     return ff_rac_terminate(c);
4675 }
4676
4677 static av_cold int encode_end(AVCodecContext *avctx)
4678 {
4679     SnowContext *s = avctx->priv_data;
4680
4681     common_end(s);
4682     av_free(avctx->stats_out);
4683
4684     return 0;
4685 }
4686
4687 AVCodec snow_encoder = {
4688     "snow",
4689     CODEC_TYPE_VIDEO,
4690     CODEC_ID_SNOW,
4691     sizeof(SnowContext),
4692     encode_init,
4693     encode_frame,
4694     encode_end,
4695     .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4696 };
4697 #endif
4698
4699
4700 #ifdef TEST
4701 #undef malloc
4702 #undef free
4703 #undef printf
4704
4705 #include "libavutil/lfg.h"
4706
4707 int main(void){
4708     int width=256;
4709     int height=256;
4710     int buffer[2][width*height];
4711     SnowContext s;
4712     int i;
4713     AVLFG prng;
4714     s.spatial_decomposition_count=6;
4715     s.spatial_decomposition_type=1;
4716
4717     av_lfg_init(&prng, 1);
4718
4719     printf("testing 5/3 DWT\n");
4720     for(i=0; i<width*height; i++)
4721         buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4722
4723     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4724     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4725
4726     for(i=0; i<width*height; i++)
4727         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4728
4729     printf("testing 9/7 DWT\n");
4730     s.spatial_decomposition_type=0;
4731     for(i=0; i<width*height; i++)
4732         buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4733
4734     ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4735     ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4736
4737     for(i=0; i<width*height; i++)
4738         if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4739
4740 #if 0
4741     printf("testing AC coder\n");
4742     memset(s.header_state, 0, sizeof(s.header_state));
4743     ff_init_range_encoder(&s.c, buffer[0], 256*256);
4744     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4745
4746     for(i=-256; i<256; i++){
4747         put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4748     }
4749     ff_rac_terminate(&s.c);
4750
4751     memset(s.header_state, 0, sizeof(s.header_state));
4752     ff_init_range_decoder(&s.c, buffer[0], 256*256);
4753     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4754
4755     for(i=-256; i<256; i++){
4756         int j;
4757         j= get_symbol(&s.c, s.header_state, 1);
4758         if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4759     }
4760 #endif
4761     {
4762     int level, orientation, x, y;
4763     int64_t errors[8][4];
4764     int64_t g=0;
4765
4766         memset(errors, 0, sizeof(errors));
4767         s.spatial_decomposition_count=3;
4768         s.spatial_decomposition_type=0;
4769         for(level=0; level<s.spatial_decomposition_count; level++){
4770             for(orientation=level ? 1 : 0; orientation<4; orientation++){
4771                 int w= width  >> (s.spatial_decomposition_count-level);
4772                 int h= height >> (s.spatial_decomposition_count-level);
4773                 int stride= width  << (s.spatial_decomposition_count-level);
4774                 DWTELEM *buf= buffer[0];
4775                 int64_t error=0;
4776
4777                 if(orientation&1) buf+=w;
4778                 if(orientation>1) buf+=stride>>1;
4779
4780                 memset(buffer[0], 0, sizeof(int)*width*height);
4781                 buf[w/2 + h/2*stride]= 256*256;
4782                 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4783                 for(y=0; y<height; y++){
4784                     for(x=0; x<width; x++){
4785                         int64_t d= buffer[0][x + y*width];
4786                         error += d*d;
4787                         if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4788                     }
4789                     if(FFABS(height/2-y)<9 && level==2) printf("\n");
4790                 }
4791                 error= (int)(sqrt(error)+0.5);
4792                 errors[level][orientation]= error;
4793                 if(g) g=av_gcd(g, error);
4794                 else g= error;
4795             }
4796         }
4797         printf("static int const visual_weight[][4]={\n");
4798         for(level=0; level<s.spatial_decomposition_count; level++){
4799             printf("  {");
4800             for(orientation=0; orientation<4; orientation++){
4801                 printf("%8"PRId64",", errors[level][orientation]/g);
4802             }
4803             printf("},\n");
4804         }
4805         printf("};\n");
4806         {
4807             int level=2;
4808             int w= width  >> (s.spatial_decomposition_count-level);
4809             //int h= height >> (s.spatial_decomposition_count-level);
4810             int stride= width  << (s.spatial_decomposition_count-level);
4811             DWTELEM *buf= buffer[0];
4812             int64_t error=0;
4813
4814             buf+=w;
4815             buf+=stride>>1;
4816
4817             memset(buffer[0], 0, sizeof(int)*width*height);
4818 #if 1
4819             for(y=0; y<height; y++){
4820                 for(x=0; x<width; x++){
4821                     int tab[4]={0,2,3,1};
4822                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4823                 }
4824             }
4825             ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4826 #else
4827             for(y=0; y<h; y++){
4828                 for(x=0; x<w; x++){
4829                     buf[x + y*stride  ]=169;
4830                     buf[x + y*stride-w]=64;
4831                 }
4832             }
4833             ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4834 #endif
4835             for(y=0; y<height; y++){
4836                 for(x=0; x<width; x++){
4837                     int64_t d= buffer[0][x + y*width];
4838                     error += d*d;
4839                     if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4840                 }
4841                 if(FFABS(height/2-y)<9) printf("\n");
4842             }
4843         }
4844
4845     }
4846     return 0;
4847 }
4848 #endif /* TEST */