git.sesse.net Git - ffmpeg/blob - doc/snow.txt

   1 =============================================
   2 SNOW Video Codec Specification Draft 20070103
   3 =============================================
   4
   5
   6 Definitions:
   7 ============
   8
   9 MUST    the specific part must be done to conform to this standard
  10 SHOULD  it is recommended to be done that way, but not strictly required
  11
  12 ilog2(x) is the rounded down logarithm of x with basis 2
  13 ilog2(0) = 0
  14
  15 Type definitions:
  16 =================
  17
  18 b   1-bit range coded
  19 u   unsigned scalar value range coded
  20 s   signed scalar value range coded
  21
  22
  23 Bitstream syntax:
  24 =================
  25
  26 frame:
  27     header
  28     prediction
  29     residual
  30
  31 header:
  32     keyframe                            b   MID_STATE
  33     if(keyframe || always_reset)
  34         reset_contexts
  35     if(keyframe){
  36         version                         u   header_state
  37         always_reset                    b   header_state
  38         temporal_decomposition_type     u   header_state
  39         temporal_decomposition_count    u   header_state
  40         spatial_decomposition_count     u   header_state
  41         colorspace_type                 u   header_state
  42         chroma_h_shift                  u   header_state
  43         chroma_v_shift                  u   header_state
  44         spatial_scalability             b   header_state
  45         max_ref_frames-1                u   header_state
  46         qlogs
  47     }
  48
  49     spatial_decomposition_type          s   header_state
  50     qlog                                s   header_state
  51     mv_scale                            s   header_state
  52     qbias                               s   header_state
  53     block_max_depth                     s   header_state
  54
  55 qlogs:
  56     for(plane=0; plane<2; plane++){
  57         quant_table[plane][0][0]        s   header_state
  58         for(level=0; level < spatial_decomposition_count; level++){
  59             quant_table[plane][level][1]s   header_state
  60             quant_table[plane][level][3]s   header_state
  61         }
  62     }
  63
  64 reset_contexts
  65     *_state[*]= MID_STATE
  66
  67 prediction:
  68     for(y=0; y<block_count_vertical; y++)
  69         for(x=0; x<block_count_horizontal; x++)
  70             block(0)
  71
  72 block(level):
  73     if(keyframe){
  74         intra=1
  75         y_diff=cb_diff=cr_diff=0
  76     }else{
  77         if(level!=max_block_depth){
  78             s_context= 2*left->level + 2*top->level + topleft->level + topright->level
  79             leaf                        b   block_state[4 + s_context]
  80         }
  81         if(level==max_block_depth || leaf){
  82             intra                       b   block_state[1 + left->intra + top->intra]
  83             if(intra){
  84                 y_diff                  s   block_state[32]
  85                 cb_diff                 s   block_state[64]
  86                 cr_diff                 s   block_state[96]
  87             }else{
  88                 ref_context= ilog2(2*left->ref) + ilog2(2*top->ref)
  89                 if(ref_frames > 1)
  90                     ref                 u   block_state[128 + 1024 + 32*ref_context]
  91                 mx_context= ilog2(2*abs(left->mx - top->mx))
  92                 my_context= ilog2(2*abs(left->my - top->my))
  93                 mvx_diff                s   block_state[128 + 32*(mx_context + 16*!!ref)]
  94                 mvy_diff                s   block_state[128 + 32*(my_context + 16*!!ref)]
  95             }
  96         }else{
  97             block(level+1)
  98             block(level+1)
  99             block(level+1)
 100             block(level+1)
 101         }
 102     }
 103
 104
 105 residual:
 106     FIXME
 107
 108
 109
 110 Tag description:
 111 ----------------
 112
 113 version
 114     0
 115     this MUST NOT change within a bitstream
 116
 117 always_reset
 118     if 1 then the range coder contexts will be reset after each frame
 119
 120 temporal_decomposition_type
 121     0
 122
 123 temporal_decomposition_count
 124     0
 125
 126 spatial_decomposition_count
 127     FIXME
 128
 129 colorspace_type
 130     0
 131     this MUST NOT change within a bitstream
 132
 133 chroma_h_shift
 134     log2(luma.width / chroma.width)
 135     this MUST NOT change within a bitstream
 136
 137 chroma_v_shift
 138     log2(luma.height / chroma.height)
 139     this MUST NOT change within a bitstream
 140
 141 spatial_scalability
 142     0
 143
 144 max_ref_frames
 145     maximum number of reference frames
 146     this MUST NOT change within a bitstream
 147
 148 ref_frames
 149     minimum of the number of available reference frames and max_ref_frames
 150     for example the first frame after a key frame always has ref_frames=1
 151
 152 spatial_decomposition_type
 153     wavelet type
 154     0 is a 9/7 symmetric compact integer wavelet
 155     1 is a 5/3 symmetric compact integer wavelet
 156     others are reserved
 157     stored as delta from last, last is reset to 0 if always_reset || keyframe
 158
 159 qlog
 160     quality (logarthmic quantizer scale)
 161     stored as delta from last, last is reset to 0 if always_reset || keyframe
 162
 163 mv_scale
 164     stored as delta from last, last is reset to 0 if always_reset || keyframe
 165     FIXME check that everything works fine if this changes between frames
 166
 167 qbias
 168     dequantization bias
 169     stored as delta from last, last is reset to 0 if always_reset || keyframe
 170
 171 block_max_depth
 172     maximum depth of the block tree
 173     stored as delta from last, last is reset to 0 if always_reset || keyframe
 174
 175 quant_table
 176     quantiztation table
 177
 178 Range Coder:
 179 ============
 180 FIXME
 181
 182 Neighboring Blocks:
 183 ===================
 184 left and top are set to the respective blocks unless they are outside of
 185 the image in which case they are set to the Null block
 186
 187 top-left is set to the top left block unless it is outside of the image in
 188 which case it is set to the left block
 189
 190 if this block has no larger parent block or it is at the left side of its
 191 parent block and the top right block is not outside of the image then the
 192 top right block is used for top-right else the top-left block is used
 193
 194 Null block
 195 y,cb,cr are 128
 196 level, ref, mx and my are 0
 197
 198
 199 Motion Vector Prediction:
 200 =========================
 201 1. the motion vectors of all the neighboring blocks are scaled to
 202 compensate for the difference of reference frames
 203
 204 scaled_mv= (mv * (256 * (current_reference+1) / (mv.reference+1)) + 128)>>8
 205
 206 2. the median of the scaled left, top and top-right vectors is used as
 207 motion vector prediction
 208
 209 3. the used motion vector is the sum of the predictor and
 210    (mvx_diff, mvy_diff)*mv_scale
 211
 212
 213 Intra DC Predicton:
 214 ======================
 215 the luma and chroma values of the left block are used as predictors
 216
 217 the used luma and chroma is the sum of the predictor and y_diff, cb_diff, cr_diff
 218
 219
 220 Motion Compensation:
 221 ====================
 222 FIXME
 223
 224 LL band prediction:
 225 ===================
 226 FIXME
 227
 228 Dequantizaton:
 229 ==============
 230 FIXME
 231
 232 Wavelet Transform:
 233 ==================
 234 FIXME
 235
 236 TODO:
 237 =====
 238 Important:
 239 finetune initial contexts
 240 spatial_decomposition_count per frame?
 241 flip wavelet?
 242 try to use the wavelet transformed predicted image (motion compensated image) as context for coding the residual coefficients
 243 try the MV length as context for coding the residual coefficients
 244 use extradata for stuff which is in the keyframes now?
 245 the MV median predictor is patented IIRC
 246
 247 Not Important:
 248 spatial_scalability b vs u (!= 0 breaks syntax anyway so we can add a u later)
 249
 250
 251 Credits:
 252 ========
 253 Michael Niedermayer
 254 Loren Merritt
 255
 256
 257 Copyright:
 258 ==========
 259 GPL + GFDL + whatever is needed to make this a RFC