2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
27 #include "rangecoder.h"
30 #include "mpegvideo.h"
36 static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
54 static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 static const int8_t quant3bA[256]={
73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 static const int8_t quant5[256]={
91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
108 static const int8_t quant7[256]={
109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
126 static const int8_t quant9[256]={
127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
144 static const int8_t quant11[256]={
145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
162 static const int8_t quant13[256]={
163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182 static const uint8_t obmc32[1024]={
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
185 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
186 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
187 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
188 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
189 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
190 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
191 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
192 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
193 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
194 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
195 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
196 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
200 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
201 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
202 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
203 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
204 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
205 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
206 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
207 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
208 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
209 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
210 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
211 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
212 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
213 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217 static const uint8_t obmc16[256]={
218 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
219 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
220 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
221 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
222 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
223 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
227 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
228 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
229 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
230 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
231 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
232 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
233 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
237 static const uint8_t obmc32[1024]={
238 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
239 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
240 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
241 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
242 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
243 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
244 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
245 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
246 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
247 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
248 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
249 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
250 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
255 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
256 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
257 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
258 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
259 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
260 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
261 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
262 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
263 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
264 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
265 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
266 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
267 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
268 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
269 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
272 static const uint8_t obmc16[256]={
273 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
274 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
275 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
276 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
277 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
285 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
286 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
287 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
288 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
292 static const uint8_t obmc32[1024]={
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
296 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
297 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
298 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
299 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
300 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
301 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
302 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
303 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
304 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
305 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
306 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
310 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
311 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
312 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
313 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
314 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
315 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
316 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
317 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
318 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
319 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
320 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
321 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
322 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 static const uint8_t obmc16[256]={
328 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
329 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
330 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
331 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
332 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
333 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
337 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
338 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
339 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
340 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
341 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
342 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
343 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
349 static const uint8_t obmc8[64]={
350 4, 12, 20, 28, 28, 20, 12, 4,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 20, 60,100,140,140,100, 60, 20,
353 28, 84,140,196,196,140, 84, 28,
354 28, 84,140,196,196,140, 84, 28,
355 20, 60,100,140,140,100, 60, 20,
356 12, 36, 60, 84, 84, 60, 36, 12,
357 4, 12, 20, 28, 28, 20, 12, 4,
362 static const uint8_t obmc4[16]={
370 static const uint8_t * const obmc_tab[4]={
371 obmc32, obmc16, obmc8, obmc4
374 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
376 typedef struct BlockNode{
382 //#define TYPE_SPLIT 1
383 #define BLOCK_INTRA 1
385 //#define TYPE_NOCOLOR 4
386 uint8_t level; //FIXME merge into type?
389 static const BlockNode null_block= { //FIXME add border maybe
390 .color= {128,128,128},
398 #define LOG2_MB_SIZE 4
399 #define MB_SIZE (1<<LOG2_MB_SIZE)
400 #define ENCODER_EXTRA_BITS 4
403 typedef struct x_and_coeff{
408 typedef struct SubBand{
413 int qlog; ///< log(qscale)/log[2^(1/6)]
418 int stride_line; ///< Stride measured in lines, not pixels.
419 x_and_coeff * x_coeff;
420 struct SubBand *parent;
421 uint8_t state[/*7*2*/ 7 + 512][32];
424 typedef struct Plane{
427 SubBand band[MAX_DECOMPOSITIONS][4];
430 int8_t hcoeff[HTAPS_MAX/2];
435 int8_t last_hcoeff[HTAPS_MAX/2];
439 typedef struct SnowContext{
441 AVCodecContext *avctx;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture[MAX_REF_FRAMES];
449 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450 AVFrame mconly_picture;
451 // uint8_t q_context[16];
452 uint8_t header_state[32];
453 uint8_t block_state[128 + 32*128];
457 int spatial_decomposition_type;
458 int last_spatial_decomposition_type;
459 int temporal_decomposition_type;
460 int spatial_decomposition_count;
461 int last_spatial_decomposition_count;
462 int temporal_decomposition_count;
465 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466 uint32_t *ref_scores[MAX_REF_FRAMES];
467 DWTELEM *spatial_dwt_buffer;
468 IDWTELEM *spatial_idwt_buffer;
472 int spatial_scalability;
482 #define QBIAS_SHIFT 3
486 int last_block_max_depth;
487 Plane plane[MAX_PLANES];
489 #define ME_CACHE_SIZE 1024
490 int me_cache[ME_CACHE_SIZE];
491 int me_cache_generation;
494 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
500 // Avoid a name clash on SGI IRIX
503 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504 static uint8_t qexp[QROOT];
506 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
510 const int a= FFABS(v);
511 const int e= av_log2(a);
513 const int el= FFMIN(e, 10);
514 put_rac(c, state+0, 0);
517 put_rac(c, state+1+i, 1); //1..10
520 put_rac(c, state+1+9, 1); //1..10
522 put_rac(c, state+1+FFMIN(i,9), 0);
524 for(i=e-1; i>=el; i--){
525 put_rac(c, state+22+9, (a>>i)&1); //22..31
528 put_rac(c, state+22+i, (a>>i)&1); //22..31
532 put_rac(c, state+11 + el, v < 0); //11..21
535 put_rac(c, state+0, 0);
538 put_rac(c, state+1+i, 1); //1..10
540 put_rac(c, state+1+i, 0);
542 for(i=e-1; i>=0; i--){
543 put_rac(c, state+22+i, (a>>i)&1); //22..31
547 put_rac(c, state+11 + e, v < 0); //11..21
550 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
552 put_rac(c, state+1+9, 0);
554 for(i=e-1; i>=0; i--){
555 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
559 put_rac(c, state+11 + 10, v < 0); //11..21
563 put_rac(c, state+0, 1);
567 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568 if(get_rac(c, state+0))
573 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
578 for(i=e-1; i>=0; i--){
579 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
582 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
587 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
589 int r= log2>=0 ? 1<<log2 : 1;
595 put_rac(c, state+4+log2, 1);
600 put_rac(c, state+4+log2, 0);
602 for(i=log2-1; i>=0; i--){
603 put_rac(c, state+31-i, (v>>i)&1);
607 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
609 int r= log2>=0 ? 1<<log2 : 1;
614 while(get_rac(c, state+4+log2)){
620 for(i=log2-1; i>=0; i--){
621 v+= get_rac(c, state+31-i)<<i;
627 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628 const int w= b->width;
629 const int h= b->height;
633 x_and_coeff *xc= b->x_coeff;
634 x_and_coeff *prev_xc= NULL;
635 x_and_coeff *prev2_xc= xc;
636 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637 x_and_coeff *prev_parent_xc= parent_xc;
639 runs= get_symbol2(&s->c, b->state[30], 0);
640 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
647 if(y && prev_xc->x == 0){
659 if(prev_xc->x == x + 1)
665 if(x>>1 > parent_xc->x){
668 if(x>>1 == parent_xc->x){
672 if(/*ll|*/l|lt|t|rt|p){
673 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
675 v=get_rac(&s->c, &b->state[0][context]);
677 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
685 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
687 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
697 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698 else max_run= FFMIN(run, w-x-1);
700 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
706 (xc++)->x= w+1; //end marker
712 while(parent_xc->x != parent->width+1)
715 prev_parent_xc= parent_xc;
717 parent_xc= prev_parent_xc;
722 (xc++)->x= w+1; //end marker
725 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726 const int w= b->width;
728 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
733 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
738 /* If we are on the second or later slice, restore our index. */
740 new_index = save_state[0];
743 for(y=start_y; y<h; y++){
746 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747 memset(line, 0, b->width*sizeof(IDWTELEM));
748 v = b->x_coeff[new_index].coeff;
749 x = b->x_coeff[new_index++].x;
751 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752 register int u= -(v&1);
755 v = b->x_coeff[new_index].coeff;
756 x = b->x_coeff[new_index++].x;
760 /* Save our variables for the next slice. */
761 save_state[0] = new_index;
766 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767 int plane_index, level, orientation;
769 for(plane_index=0; plane_index<3; plane_index++){
770 for(level=0; level<MAX_DECOMPOSITIONS; level++){
771 for(orientation=level ? 1:0; orientation<4; orientation++){
772 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
776 memset(s->header_state, MID_STATE, sizeof(s->header_state));
777 memset(s->block_state, MID_STATE, sizeof(s->block_state));
780 static int alloc_blocks(SnowContext *s){
781 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
788 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
792 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793 uint8_t *bytestream= d->bytestream;
794 uint8_t *bytestream_start= d->bytestream_start;
796 d->bytestream= bytestream;
797 d->bytestream_start= bytestream_start;
800 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801 const int w= s->b_width << s->block_max_depth;
802 const int rem_depth= s->block_max_depth - level;
803 const int index= (x + y*w) << rem_depth;
804 const int block_w= 1<<rem_depth;
817 for(j=0; j<block_w; j++){
818 for(i=0; i<block_w; i++){
819 s->block[index + i + j*w]= block;
824 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825 const int offset[3]= {
827 ((y*c->uvstride + x)>>1),
828 ((y*c->uvstride + x)>>1),
832 c->src[0][i]= src [i];
833 c->ref[0][i]= ref [i] + offset[i];
838 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840 if(s->ref_frames == 1){
841 *mx = mid_pred(left->mx, top->mx, tr->mx);
842 *my = mid_pred(left->my, top->my, tr->my);
844 const int *scale = scale_mv_ref[ref];
845 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846 (top ->mx * scale[top ->ref] + 128) >>8,
847 (tr ->mx * scale[tr ->ref] + 128) >>8);
848 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849 (top ->my * scale[top ->ref] + 128) >>8,
850 (tr ->my * scale[tr ->ref] + 128) >>8);
854 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
858 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
862 static void decode_q_branch(SnowContext *s, int level, int x, int y){
863 const int w= s->b_width << s->block_max_depth;
864 const int rem_depth= s->block_max_depth - level;
865 const int index= (x + y*w) << rem_depth;
866 int trx= (x+1)<<rem_depth;
867 const BlockNode *left = x ? &s->block[index-1] : &null_block;
868 const BlockNode *top = y ? &s->block[index-w] : &null_block;
869 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
870 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
874 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
878 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
880 int l = left->color[0];
881 int cb= left->color[1];
882 int cr= left->color[2];
884 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
888 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
891 pred_mv(s, &mx, &my, 0, left, top, tr);
892 l += get_symbol(&s->c, &s->block_state[32], 1);
893 cb+= get_symbol(&s->c, &s->block_state[64], 1);
894 cr+= get_symbol(&s->c, &s->block_state[96], 1);
896 if(s->ref_frames > 1)
897 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898 pred_mv(s, &mx, &my, ref, left, top, tr);
899 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
902 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
904 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
911 static void decode_blocks(SnowContext *s){
918 decode_q_branch(s, 0, x, y);
923 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
924 static const uint8_t weight[64]={
935 static const uint8_t brane[256]={
936 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
954 static const uint8_t needs[16]={
962 int16_t tmpIt [64*(32+HTAPS_MAX)];
963 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964 int16_t *tmpI= tmpIt;
965 uint8_t *tmp2= tmp2t[0];
966 const uint8_t *hpel[11];
967 assert(dx<16 && dy<16);
968 r= brane[dx + 16*dy]&15;
969 l= brane[dx + 16*dy]>>4;
971 b= needs[l] | needs[r];
976 for(y=0; y < b_h+HTAPS_MAX-1; y++){
977 for(x=0; x < b_w; x++){
978 int a_1=src[x + HTAPS_MAX/2-4];
979 int a0= src[x + HTAPS_MAX/2-3];
980 int a1= src[x + HTAPS_MAX/2-2];
981 int a2= src[x + HTAPS_MAX/2-1];
982 int a3= src[x + HTAPS_MAX/2+0];
983 int a4= src[x + HTAPS_MAX/2+1];
984 int a5= src[x + HTAPS_MAX/2+2];
985 int a6= src[x + HTAPS_MAX/2+3];
987 if(!p || p->fast_mc){
988 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
992 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
997 if(am&(~255)) am= ~(am>>31);
1006 src += HTAPS_MAX/2 - 1;
1010 for(y=0; y < b_h; y++){
1011 for(x=0; x < b_w+1; x++){
1012 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1021 if(!p || p->fast_mc)
1022 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1024 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1026 if(am&(~255)) am= ~(am>>31);
1034 src += stride*(HTAPS_MAX/2 - 1);
1038 for(y=0; y < b_h; y++){
1039 for(x=0; x < b_w; x++){
1040 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1049 if(!p || p->fast_mc)
1050 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1052 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053 if(am&(~255)) am= ~(am>>31);
1062 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1067 hpel[ 6]= tmp2t[1] + 1;
1069 hpel[ 8]= src + stride;
1070 hpel[ 9]= hpel[1] + stride;
1071 hpel[10]= hpel[8] + 1;
1074 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1075 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1080 for(y=0; y < b_h; y++){
1081 for(x=0; x < b_w; x++){
1082 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1092 const uint8_t *src1= hpel[l];
1093 const uint8_t *src2= hpel[r];
1094 int a= weight[((dx&7) + (8*(dy&7)))];
1096 for(y=0; y < b_h; y++){
1097 for(x=0; x < b_w; x++){
1098 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1107 #define mca(dx,dy,b_w)\
1108 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1109 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1111 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1123 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1124 if(block->type & BLOCK_INTRA){
1126 const int color = block->color[plane_index];
1127 const int color4= color*0x01010101;
1129 for(y=0; y < b_h; y++){
1130 *(uint32_t*)&dst[0 + y*stride]= color4;
1131 *(uint32_t*)&dst[4 + y*stride]= color4;
1132 *(uint32_t*)&dst[8 + y*stride]= color4;
1133 *(uint32_t*)&dst[12+ y*stride]= color4;
1134 *(uint32_t*)&dst[16+ y*stride]= color4;
1135 *(uint32_t*)&dst[20+ y*stride]= color4;
1136 *(uint32_t*)&dst[24+ y*stride]= color4;
1137 *(uint32_t*)&dst[28+ y*stride]= color4;
1140 for(y=0; y < b_h; y++){
1141 *(uint32_t*)&dst[0 + y*stride]= color4;
1142 *(uint32_t*)&dst[4 + y*stride]= color4;
1143 *(uint32_t*)&dst[8 + y*stride]= color4;
1144 *(uint32_t*)&dst[12+ y*stride]= color4;
1147 for(y=0; y < b_h; y++){
1148 *(uint32_t*)&dst[0 + y*stride]= color4;
1149 *(uint32_t*)&dst[4 + y*stride]= color4;
1152 for(y=0; y < b_h; y++){
1153 *(uint32_t*)&dst[0 + y*stride]= color4;
1156 for(y=0; y < b_h; y++){
1157 for(x=0; x < b_w; x++){
1158 dst[x + y*stride]= color;
1163 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1164 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1165 int mx= block->mx*scale;
1166 int my= block->my*scale;
1167 const int dx= mx&15;
1168 const int dy= my&15;
1169 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1170 sx += (mx>>4) - (HTAPS_MAX/2-1);
1171 sy += (my>>4) - (HTAPS_MAX/2-1);
1172 src += sx + sy*stride;
1173 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1174 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1175 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1178 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1179 // assert(!(b_w&(b_w-1)));
1180 assert(b_w>1 && b_h>1);
1181 assert((tab_index>=0 && tab_index<4) || b_w==32);
1182 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1183 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1186 for(y=0; y<b_h; y+=16){
1187 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1188 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1191 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1192 else if(b_w==2*b_h){
1193 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1194 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1197 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1198 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1203 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1204 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1207 for(y=0; y<b_h; y++){
1208 //FIXME ugly misuse of obmc_stride
1209 const uint8_t *obmc1= obmc + y*obmc_stride;
1210 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1211 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1212 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1213 dst = slice_buffer_get_line(sb, src_y + y);
1214 for(x=0; x<b_w; x++){
1215 int v= obmc1[x] * block[3][x + y*src_stride]
1216 +obmc2[x] * block[2][x + y*src_stride]
1217 +obmc3[x] * block[1][x + y*src_stride]
1218 +obmc4[x] * block[0][x + y*src_stride];
1220 v <<= 8 - LOG2_OBMC_MAX;
1222 v >>= 8 - FRAC_BITS;
1225 v += dst[x + src_x];
1226 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1227 if(v&(~255)) v= ~(v>>31);
1228 dst8[x + y*src_stride] = v;
1230 dst[x + src_x] -= v;
1236 //FIXME name cleanup (b_w, block_w, b_width stuff)
1237 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1238 const int b_width = s->b_width << s->block_max_depth;
1239 const int b_height= s->b_height << s->block_max_depth;
1240 const int b_stride= b_width;
1241 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1242 BlockNode *rt= lt+1;
1243 BlockNode *lb= lt+b_stride;
1244 BlockNode *rb= lb+1;
1246 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1247 uint8_t *tmp = s->scratchbuf;
1254 }else if(b_x + 1 >= b_width){
1261 }else if(b_y + 1 >= b_height){
1266 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1269 if(!sliced && !offset_dst)
1272 }else if(src_x + b_w > w){
1276 obmc -= src_y*obmc_stride;
1278 if(!sliced && !offset_dst)
1279 dst -= src_y*dst_stride;
1281 }else if(src_y + b_h> h){
1285 if(b_w<=0 || b_h<=0) return;
1287 assert(src_stride > 2*MB_SIZE + 5);
1289 if(!sliced && offset_dst)
1290 dst += src_x + src_y*dst_stride;
1291 dst8+= src_x + src_y*src_stride;
1292 // src += src_x + src_y*src_stride;
1294 ptmp= tmp + 3*tmp_step;
1297 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1299 if(same_block(lt, rt)){
1304 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1307 if(same_block(lt, lb)){
1309 }else if(same_block(rt, lb)){
1314 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1317 if(same_block(lt, rb) ){
1319 }else if(same_block(rt, rb)){
1321 }else if(same_block(lb, rb)){
1325 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1328 for(y=0; y<b_h; y++){
1329 for(x=0; x<b_w; x++){
1330 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1331 if(add) dst[x + y*dst_stride] += v;
1332 else dst[x + y*dst_stride] -= v;
1335 for(y=0; y<b_h; y++){
1336 uint8_t *obmc2= obmc + (obmc_stride>>1);
1337 for(x=0; x<b_w; x++){
1338 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1339 if(add) dst[x + y*dst_stride] += v;
1340 else dst[x + y*dst_stride] -= v;
1343 for(y=0; y<b_h; y++){
1344 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1345 for(x=0; x<b_w; x++){
1346 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1347 if(add) dst[x + y*dst_stride] += v;
1348 else dst[x + y*dst_stride] -= v;
1351 for(y=0; y<b_h; y++){
1352 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1353 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1354 for(x=0; x<b_w; x++){
1355 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1356 if(add) dst[x + y*dst_stride] += v;
1357 else dst[x + y*dst_stride] -= v;
1362 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1364 for(y=0; y<b_h; y++){
1365 //FIXME ugly misuse of obmc_stride
1366 const uint8_t *obmc1= obmc + y*obmc_stride;
1367 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1368 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1369 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1370 for(x=0; x<b_w; x++){
1371 int v= obmc1[x] * block[3][x + y*src_stride]
1372 +obmc2[x] * block[2][x + y*src_stride]
1373 +obmc3[x] * block[1][x + y*src_stride]
1374 +obmc4[x] * block[0][x + y*src_stride];
1376 v <<= 8 - LOG2_OBMC_MAX;
1378 v >>= 8 - FRAC_BITS;
1381 v += dst[x + y*dst_stride];
1382 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1383 if(v&(~255)) v= ~(v>>31);
1384 dst8[x + y*src_stride] = v;
1386 dst[x + y*dst_stride] -= v;
1394 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1395 Plane *p= &s->plane[plane_index];
1396 const int mb_w= s->b_width << s->block_max_depth;
1397 const int mb_h= s->b_height << s->block_max_depth;
1399 int block_size = MB_SIZE >> s->block_max_depth;
1400 int block_w = plane_index ? block_size/2 : block_size;
1401 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1402 int obmc_stride= plane_index ? block_size : 2*block_size;
1403 int ref_stride= s->current_picture.linesize[plane_index];
1404 uint8_t *dst8= s->current_picture.data[plane_index];
1408 if(s->keyframe || (s->avctx->debug&512)){
1413 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1414 // DWTELEM * line = slice_buffer_get_line(sb, y);
1415 IDWTELEM * line = sb->line[y];
1417 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1418 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1420 if(v&(~255)) v= ~(v>>31);
1421 dst8[x + y*ref_stride]= v;
1425 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1426 // DWTELEM * line = slice_buffer_get_line(sb, y);
1427 IDWTELEM * line = sb->line[y];
1429 line[x] -= 128 << FRAC_BITS;
1430 // buf[x + y*w]-= 128<<FRAC_BITS;
1438 for(mb_x=0; mb_x<=mb_w; mb_x++){
1439 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1440 block_w*mb_x - block_w/2,
1441 block_w*mb_y - block_w/2,
1444 w, ref_stride, obmc_stride,
1446 add, 0, plane_index);
1450 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1451 Plane *p= &s->plane[plane_index];
1452 const int mb_w= s->b_width << s->block_max_depth;
1453 const int mb_h= s->b_height << s->block_max_depth;
1455 int block_size = MB_SIZE >> s->block_max_depth;
1456 int block_w = plane_index ? block_size/2 : block_size;
1457 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1458 const int obmc_stride= plane_index ? block_size : 2*block_size;
1459 int ref_stride= s->current_picture.linesize[plane_index];
1460 uint8_t *dst8= s->current_picture.data[plane_index];
1464 if(s->keyframe || (s->avctx->debug&512)){
1469 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1471 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1473 if(v&(~255)) v= ~(v>>31);
1474 dst8[x + y*ref_stride]= v;
1478 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1480 buf[x + y*w]-= 128<<FRAC_BITS;
1488 for(mb_x=0; mb_x<=mb_w; mb_x++){
1489 add_yblock(s, 0, NULL, buf, dst8, obmc,
1490 block_w*mb_x - block_w/2,
1491 block_w*mb_y - block_w/2,
1494 w, ref_stride, obmc_stride,
1496 add, 1, plane_index);
1500 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1501 const int mb_h= s->b_height << s->block_max_depth;
1503 for(mb_y=0; mb_y<=mb_h; mb_y++)
1504 predict_slice(s, buf, plane_index, add, mb_y);
1507 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1508 const int w= b->width;
1509 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1510 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1511 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1514 if(s->qlog == LOSSLESS_QLOG) return;
1516 for(y=start_y; y<end_y; y++){
1517 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1518 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1522 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1524 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1530 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1531 const int w= b->width;
1534 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1538 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1540 for(y=start_y; y<end_y; y++){
1542 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1543 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1547 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1548 else line[x] += line[x - 1];
1550 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1551 else line[x] += line[x - 1];
1554 if(y) line[x] += prev[x];
1560 static void decode_qlogs(SnowContext *s){
1561 int plane_index, level, orientation;
1563 for(plane_index=0; plane_index<3; plane_index++){
1564 for(level=0; level<s->spatial_decomposition_count; level++){
1565 for(orientation=level ? 1:0; orientation<4; orientation++){
1567 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1568 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1569 else q= get_symbol(&s->c, s->header_state, 1);
1570 s->plane[plane_index].band[level][orientation].qlog= q;
1576 #define GET_S(dst, check) \
1577 tmp= get_symbol(&s->c, s->header_state, 0);\
1579 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1584 static int decode_header(SnowContext *s){
1585 int plane_index, tmp;
1588 memset(kstate, MID_STATE, sizeof(kstate));
1590 s->keyframe= get_rac(&s->c, kstate);
1591 if(s->keyframe || s->always_reset){
1593 s->spatial_decomposition_type=
1597 s->block_max_depth= 0;
1600 GET_S(s->version, tmp <= 0U)
1601 s->always_reset= get_rac(&s->c, s->header_state);
1602 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1603 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1604 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1605 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1606 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1607 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1608 s->spatial_scalability= get_rac(&s->c, s->header_state);
1609 // s->rate_scalability= get_rac(&s->c, s->header_state);
1610 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1611 s->max_ref_frames++;
1617 if(get_rac(&s->c, s->header_state)){
1618 for(plane_index=0; plane_index<2; plane_index++){
1619 int htaps, i, sum=0;
1620 Plane *p= &s->plane[plane_index];
1621 p->diag_mc= get_rac(&s->c, s->header_state);
1622 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1623 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1626 for(i= htaps/2; i; i--){
1627 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1628 sum += p->hcoeff[i];
1630 p->hcoeff[0]= 32-sum;
1632 s->plane[2].diag_mc= s->plane[1].diag_mc;
1633 s->plane[2].htaps = s->plane[1].htaps;
1634 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1636 if(get_rac(&s->c, s->header_state)){
1637 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1642 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1643 if(s->spatial_decomposition_type > 1U){
1644 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1647 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1648 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1649 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1653 s->qlog += get_symbol(&s->c, s->header_state, 1);
1654 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1655 s->qbias += get_symbol(&s->c, s->header_state, 1);
1656 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1657 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1658 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1659 s->block_max_depth= 0;
1666 static void init_qexp(void){
1670 for(i=0; i<QROOT; i++){
1672 v *= pow(2, 1.0 / QROOT);
1676 static av_cold int common_init(AVCodecContext *avctx){
1677 SnowContext *s = avctx->priv_data;
1682 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1684 dsputil_init(&s->dsp, avctx);
1685 ff_dwt_init(&s->dwt);
1688 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1689 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1690 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1691 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1692 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1693 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1712 #define mcfh(dx,dy)\
1713 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1714 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1715 mc_block_hpel ## dx ## dy ## 16;\
1716 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1717 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1718 mc_block_hpel ## dx ## dy ## 8;
1728 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1730 width= s->avctx->width;
1731 height= s->avctx->height;
1733 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1734 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1736 for(i=0; i<MAX_REF_FRAMES; i++)
1737 for(j=0; j<MAX_REF_FRAMES; j++)
1738 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1740 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1741 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1746 static int common_init_after_header(AVCodecContext *avctx){
1747 SnowContext *s = avctx->priv_data;
1748 int plane_index, level, orientation;
1750 for(plane_index=0; plane_index<3; plane_index++){
1751 int w= s->avctx->width;
1752 int h= s->avctx->height;
1755 w>>= s->chroma_h_shift;
1756 h>>= s->chroma_v_shift;
1758 s->plane[plane_index].width = w;
1759 s->plane[plane_index].height= h;
1761 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1762 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1763 SubBand *b= &s->plane[plane_index].band[level][orientation];
1765 b->buf= s->spatial_dwt_buffer;
1767 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1768 b->width = (w + !(orientation&1))>>1;
1769 b->height= (h + !(orientation>1))>>1;
1771 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1772 b->buf_x_offset = 0;
1773 b->buf_y_offset = 0;
1777 b->buf_x_offset = (w+1)>>1;
1780 b->buf += b->stride>>1;
1781 b->buf_y_offset = b->stride_line >> 1;
1783 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1786 b->parent= &s->plane[plane_index].band[level-1][orientation];
1787 //FIXME avoid this realloc
1788 av_freep(&b->x_coeff);
1789 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1804 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1805 SubBand *b= &p->band[level][orientation];
1809 int step= 1 << (s->spatial_decomposition_count - level);
1816 //FIXME bias for nonzero ?
1818 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1819 for(y=0; y<p->height; y++){
1820 for(x=0; x<p->width; x++){
1821 int sx= (x-xo + step/2) / step / Q2_STEP;
1822 int sy= (y-yo + step/2) / step / Q2_STEP;
1823 int v= r0[x + y*p->width] - r1[x + y*p->width];
1824 assert(sx>=0 && sy>=0 && sx < score_stride);
1826 score[sx + sy*score_stride] += v*v;
1827 assert(score[sx + sy*score_stride] >= 0);
1832 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1833 int level, orientation;
1835 for(level=0; level<s->spatial_decomposition_count; level++){
1836 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1837 SubBand *b= &p->band[level][orientation];
1838 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1840 dequantize(s, b, dst, b->stride);
1845 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1846 int level, orientation, ys, xs, x, y, pass;
1847 IDWTELEM best_dequant[height * stride];
1848 IDWTELEM idwt2_buffer[height * stride];
1849 const int score_stride= (width + 10)/Q2_STEP;
1850 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1852 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1854 //FIXME pass the copy cleanly ?
1856 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1857 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1859 for(level=0; level<s->spatial_decomposition_count; level++){
1860 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1861 SubBand *b= &p->band[level][orientation];
1862 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1863 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1864 assert(src == b->buf); // code does not depend on this but it is true currently
1866 quantize(s, b, dst, src, b->stride, s->qbias);
1869 for(pass=0; pass<1; pass++){
1870 if(s->qbias == 0) //keyframe
1872 for(level=0; level<s->spatial_decomposition_count; level++){
1873 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1874 SubBand *b= &p->band[level][orientation];
1875 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1876 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1878 for(ys= 0; ys<Q2_STEP; ys++){
1879 for(xs= 0; xs<Q2_STEP; xs++){
1880 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1881 dequantize_all(s, p, idwt2_buffer, width, height);
1882 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1883 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1884 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1885 for(y=ys; y<b->height; y+= Q2_STEP){
1886 for(x=xs; x<b->width; x+= Q2_STEP){
1887 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1888 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1889 //FIXME try more than just --
1892 dequantize_all(s, p, idwt2_buffer, width, height);
1893 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1894 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1895 for(y=ys; y<b->height; y+= Q2_STEP){
1896 for(x=xs; x<b->width; x+= Q2_STEP){
1897 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1898 if(score[score_idx] <= best_score[score_idx] + threshold){
1899 best_score[score_idx]= score[score_idx];
1900 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1901 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1902 //FIXME copy instead
1911 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1914 #endif /* QUANTIZE2==1 */
1916 #define USE_HALFPEL_PLANE 0
1918 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1921 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1925 int w= s->avctx->width >>is_chroma;
1926 int h= s->avctx->height >>is_chroma;
1927 int ls= frame->linesize[p];
1928 uint8_t *src= frame->data[p];
1930 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1932 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1939 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1946 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1954 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1962 static void release_buffer(AVCodecContext *avctx){
1963 SnowContext *s = avctx->priv_data;
1966 if(s->last_picture[s->max_ref_frames-1].data[0]){
1967 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1969 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1970 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1974 static int frame_start(SnowContext *s){
1976 int w= s->avctx->width; //FIXME round up to x16 ?
1977 int h= s->avctx->height;
1979 if(s->current_picture.data[0]){
1980 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
1981 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
1982 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
1985 release_buffer(s->avctx);
1987 tmp= s->last_picture[s->max_ref_frames-1];
1988 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1989 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1990 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1991 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1992 s->last_picture[0]= s->current_picture;
1993 s->current_picture= tmp;
1999 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2000 if(i && s->last_picture[i-1].key_frame)
2003 if(s->ref_frames==0){
2004 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2009 s->current_picture.reference= 1;
2010 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2011 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2015 s->current_picture.key_frame= s->keyframe;
2020 static av_cold void common_end(SnowContext *s){
2021 int plane_index, level, orientation, i;
2023 av_freep(&s->spatial_dwt_buffer);
2024 av_freep(&s->spatial_idwt_buffer);
2027 av_freep(&s->m.me.scratchpad);
2028 av_freep(&s->m.me.map);
2029 av_freep(&s->m.me.score_map);
2030 av_freep(&s->m.obmc_scratchpad);
2032 av_freep(&s->block);
2033 av_freep(&s->scratchbuf);
2035 for(i=0; i<MAX_REF_FRAMES; i++){
2036 av_freep(&s->ref_mvs[i]);
2037 av_freep(&s->ref_scores[i]);
2038 if(s->last_picture[i].data[0])
2039 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2042 for(plane_index=0; plane_index<3; plane_index++){
2043 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2044 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2045 SubBand *b= &s->plane[plane_index].band[level][orientation];
2047 av_freep(&b->x_coeff);
2051 if (s->mconly_picture.data[0])
2052 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2053 if (s->current_picture.data[0])
2054 s->avctx->release_buffer(s->avctx, &s->current_picture);
2057 static av_cold int decode_init(AVCodecContext *avctx)
2059 avctx->pix_fmt= PIX_FMT_YUV420P;
2066 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2067 const uint8_t *buf = avpkt->data;
2068 int buf_size = avpkt->size;
2069 SnowContext *s = avctx->priv_data;
2070 RangeCoder * const c= &s->c;
2072 AVFrame *picture = data;
2073 int level, orientation, plane_index;
2075 ff_init_range_decoder(c, buf, buf_size);
2076 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2078 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2079 if(decode_header(s)<0)
2081 common_init_after_header(avctx);
2083 // realloc slice buffer for the case that spatial_decomposition_count changed
2084 ff_slice_buffer_destroy(&s->sb);
2085 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2087 for(plane_index=0; plane_index<3; plane_index++){
2088 Plane *p= &s->plane[plane_index];
2089 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2090 && p->hcoeff[1]==-10
2096 if(frame_start(s) < 0)
2098 //keyframe flag duplication mess FIXME
2099 if(avctx->debug&FF_DEBUG_PICT_INFO)
2100 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2104 for(plane_index=0; plane_index<3; plane_index++){
2105 Plane *p= &s->plane[plane_index];
2109 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2111 if(s->avctx->debug&2048){
2112 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2113 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2117 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2118 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2124 for(level=0; level<s->spatial_decomposition_count; level++){
2125 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2126 SubBand *b= &p->band[level][orientation];
2127 unpack_coeffs(s, b, b->parent, orientation);
2133 const int mb_h= s->b_height << s->block_max_depth;
2134 const int block_size = MB_SIZE >> s->block_max_depth;
2135 const int block_w = plane_index ? block_size/2 : block_size;
2137 DWTCompose cs[MAX_DECOMPOSITIONS];
2142 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2143 for(mb_y=0; mb_y<=mb_h; mb_y++){
2145 int slice_starty = block_w*mb_y;
2146 int slice_h = block_w*(mb_y+1);
2147 if (!(s->keyframe || s->avctx->debug&512)){
2148 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2149 slice_h -= (block_w >> 1);
2152 for(level=0; level<s->spatial_decomposition_count; level++){
2153 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2154 SubBand *b= &p->band[level][orientation];
2157 int our_mb_start = mb_y;
2158 int our_mb_end = (mb_y + 1);
2160 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2161 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2162 if (!(s->keyframe || s->avctx->debug&512)){
2163 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2164 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2166 start_y = FFMIN(b->height, start_y);
2167 end_y = FFMIN(b->height, end_y);
2169 if (start_y != end_y){
2170 if (orientation == 0){
2171 SubBand * correlate_band = &p->band[0][0];
2172 int correlate_end_y = FFMIN(b->height, end_y + 1);
2173 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2174 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2175 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2176 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2179 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2184 for(; yd<slice_h; yd+=4){
2185 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2188 if(s->qlog == LOSSLESS_QLOG){
2189 for(; yq<slice_h && yq<h; yq++){
2190 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2192 line[x] <<= FRAC_BITS;
2197 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2199 y = FFMIN(p->height, slice_starty);
2200 end_y = FFMIN(p->height, slice_h);
2202 ff_slice_buffer_release(&s->sb, y++);
2205 ff_slice_buffer_flush(&s->sb);
2212 release_buffer(avctx);
2214 if(!(s->avctx->debug&2048))
2215 *picture= s->current_picture;
2217 *picture= s->mconly_picture;
2219 *data_size = sizeof(AVFrame);
2221 bytes_read= c->bytestream - c->bytestream_start;
2222 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2227 static av_cold int decode_end(AVCodecContext *avctx)
2229 SnowContext *s = avctx->priv_data;
2231 ff_slice_buffer_destroy(&s->sb);
2238 AVCodec snow_decoder = {
2242 sizeof(SnowContext),
2247 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2249 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2252 #if CONFIG_SNOW_ENCODER
2253 static av_cold int encode_init(AVCodecContext *avctx)
2255 SnowContext *s = avctx->priv_data;
2258 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2259 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2260 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2264 if(avctx->prediction_method == DWT_97
2265 && (avctx->flags & CODEC_FLAG_QSCALE)
2266 && avctx->global_quality == 0){
2267 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2271 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2273 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2274 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2276 for(plane_index=0; plane_index<3; plane_index++){
2277 s->plane[plane_index].diag_mc= 1;
2278 s->plane[plane_index].htaps= 6;
2279 s->plane[plane_index].hcoeff[0]= 40;
2280 s->plane[plane_index].hcoeff[1]= -10;
2281 s->plane[plane_index].hcoeff[2]= 2;
2282 s->plane[plane_index].fast_mc= 1;
2291 s->m.flags = avctx->flags;
2292 s->m.bit_rate= avctx->bit_rate;
2295 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2296 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2297 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2298 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2299 h263_encode_init(&s->m); //mv_penalty
2301 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2303 if(avctx->flags&CODEC_FLAG_PASS1){
2304 if(!avctx->stats_out)
2305 avctx->stats_out = av_mallocz(256);
2307 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2308 if(ff_rate_control_init(&s->m) < 0)
2311 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2313 avctx->coded_frame= &s->current_picture;
2314 switch(avctx->pix_fmt){
2315 // case PIX_FMT_YUV444P:
2316 // case PIX_FMT_YUV422P:
2317 case PIX_FMT_YUV420P:
2319 // case PIX_FMT_YUV411P:
2320 // case PIX_FMT_YUV410P:
2321 s->colorspace_type= 0;
2323 /* case PIX_FMT_RGB32:
2327 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2330 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2331 s->chroma_h_shift= 1;
2332 s->chroma_v_shift= 1;
2334 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2335 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2337 s->avctx->get_buffer(s->avctx, &s->input_picture);
2339 if(s->avctx->me_method == ME_ITER){
2341 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2342 for(i=0; i<s->max_ref_frames; i++){
2343 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2344 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2351 //near copy & paste from dsputil, FIXME
2352 static int pix_sum(uint8_t * pix, int line_size, int w)
2357 for (i = 0; i < w; i++) {
2358 for (j = 0; j < w; j++) {
2362 pix += line_size - w;
2367 //near copy & paste from dsputil, FIXME
2368 static int pix_norm1(uint8_t * pix, int line_size, int w)
2371 uint32_t *sq = ff_squareTbl + 256;
2374 for (i = 0; i < w; i++) {
2375 for (j = 0; j < w; j ++) {
2379 pix += line_size - w;
2387 #define P_TOPRIGHT P[3]
2388 #define P_MEDIAN P[4]
2390 #define FLAG_QPEL 1 //must be 1
2392 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2393 uint8_t p_buffer[1024];
2394 uint8_t i_buffer[1024];
2395 uint8_t p_state[sizeof(s->block_state)];
2396 uint8_t i_state[sizeof(s->block_state)];
2398 uint8_t *pbbak= s->c.bytestream;
2399 uint8_t *pbbak_start= s->c.bytestream_start;
2400 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2401 const int w= s->b_width << s->block_max_depth;
2402 const int h= s->b_height << s->block_max_depth;
2403 const int rem_depth= s->block_max_depth - level;
2404 const int index= (x + y*w) << rem_depth;
2405 const int block_w= 1<<(LOG2_MB_SIZE - level);
2406 int trx= (x+1)<<rem_depth;
2407 int try= (y+1)<<rem_depth;
2408 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2409 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2410 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2411 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2412 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2413 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2414 int pl = left->color[0];
2415 int pcb= left->color[1];
2416 int pcr= left->color[2];
2420 const int stride= s->current_picture.linesize[0];
2421 const int uvstride= s->current_picture.linesize[1];
2422 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2423 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2424 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2426 int16_t last_mv[3][2];
2427 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2428 const int shift= 1+qpel;
2429 MotionEstContext *c= &s->m.me;
2430 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2431 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2432 int my_context= av_log2(2*FFABS(left->my - top->my));
2433 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2434 int ref, best_ref, ref_score, ref_mx, ref_my;
2436 assert(sizeof(s->block_state) >= 256);
2438 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2442 // clip predictors / edge ?
2444 P_LEFT[0]= left->mx;
2445 P_LEFT[1]= left->my;
2448 P_TOPRIGHT[0]= tr->mx;
2449 P_TOPRIGHT[1]= tr->my;
2451 last_mv[0][0]= s->block[index].mx;
2452 last_mv[0][1]= s->block[index].my;
2453 last_mv[1][0]= right->mx;
2454 last_mv[1][1]= right->my;
2455 last_mv[2][0]= bottom->mx;
2456 last_mv[2][1]= bottom->my;
2463 assert(c-> stride == stride);
2464 assert(c->uvstride == uvstride);
2466 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2467 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2468 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2469 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2471 c->xmin = - x*block_w - 16+3;
2472 c->ymin = - y*block_w - 16+3;
2473 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2474 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2476 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2477 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2478 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2479 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2480 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2481 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2482 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2484 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2485 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2488 c->pred_x= P_LEFT[0];
2489 c->pred_y= P_LEFT[1];
2491 c->pred_x = P_MEDIAN[0];
2492 c->pred_y = P_MEDIAN[1];
2497 for(ref=0; ref<s->ref_frames; ref++){
2498 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2500 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2501 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2503 assert(ref_mx >= c->xmin);
2504 assert(ref_mx <= c->xmax);
2505 assert(ref_my >= c->ymin);
2506 assert(ref_my <= c->ymax);
2508 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2509 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2510 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2511 if(s->ref_mvs[ref]){
2512 s->ref_mvs[ref][index][0]= ref_mx;
2513 s->ref_mvs[ref][index][1]= ref_my;
2514 s->ref_scores[ref][index]= ref_score;
2516 if(score > ref_score){
2523 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2526 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2528 pc.bytestream_start=
2529 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2530 memcpy(p_state, s->block_state, sizeof(s->block_state));
2532 if(level!=s->block_max_depth)
2533 put_rac(&pc, &p_state[4 + s_context], 1);
2534 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2535 if(s->ref_frames > 1)
2536 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2537 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2538 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2539 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2540 p_len= pc.bytestream - pc.bytestream_start;
2541 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2543 block_s= block_w*block_w;
2544 sum = pix_sum(current_data[0], stride, block_w);
2545 l= (sum + block_s/2)/block_s;
2546 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2548 block_s= block_w*block_w>>2;
2549 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2550 cb= (sum + block_s/2)/block_s;
2551 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2552 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2553 cr= (sum + block_s/2)/block_s;
2554 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2557 ic.bytestream_start=
2558 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2559 memcpy(i_state, s->block_state, sizeof(s->block_state));
2560 if(level!=s->block_max_depth)
2561 put_rac(&ic, &i_state[4 + s_context], 1);
2562 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2563 put_symbol(&ic, &i_state[32], l-pl , 1);
2564 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2565 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2566 i_len= ic.bytestream - ic.bytestream_start;
2567 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2569 // assert(score==256*256*256*64-1);
2570 assert(iscore < 255*255*256 + s->lambda2*10);
2571 assert(iscore >= 0);
2572 assert(l>=0 && l<=255);
2573 assert(pl>=0 && pl<=255);
2576 int varc= iscore >> 8;
2577 int vard= score >> 8;
2578 if (vard <= 64 || vard < varc)
2579 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2581 c->scene_change_score+= s->m.qscale;
2584 if(level!=s->block_max_depth){
2585 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2586 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2587 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2588 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2589 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2590 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2592 if(score2 < score && score2 < iscore)
2597 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2598 memcpy(pbbak, i_buffer, i_len);
2600 s->c.bytestream_start= pbbak_start;
2601 s->c.bytestream= pbbak + i_len;
2602 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2603 memcpy(s->block_state, i_state, sizeof(s->block_state));
2606 memcpy(pbbak, p_buffer, p_len);
2608 s->c.bytestream_start= pbbak_start;
2609 s->c.bytestream= pbbak + p_len;
2610 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2611 memcpy(s->block_state, p_state, sizeof(s->block_state));
2616 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2617 const int w= s->b_width << s->block_max_depth;
2618 const int rem_depth= s->block_max_depth - level;
2619 const int index= (x + y*w) << rem_depth;
2620 int trx= (x+1)<<rem_depth;
2621 BlockNode *b= &s->block[index];
2622 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2623 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2624 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2625 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2626 int pl = left->color[0];
2627 int pcb= left->color[1];
2628 int pcr= left->color[2];
2630 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2631 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2632 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2633 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2636 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2640 if(level!=s->block_max_depth){
2641 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2642 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2644 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2645 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2646 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2647 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2648 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2652 if(b->type & BLOCK_INTRA){
2653 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2654 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2655 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2656 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2657 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2658 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2660 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2661 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2662 if(s->ref_frames > 1)
2663 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2664 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2665 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2666 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2670 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2672 Plane *p= &s->plane[plane_index];
2673 const int block_size = MB_SIZE >> s->block_max_depth;
2674 const int block_w = plane_index ? block_size/2 : block_size;
2675 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2676 const int obmc_stride= plane_index ? block_size : 2*block_size;
2677 const int ref_stride= s->current_picture.linesize[plane_index];
2678 uint8_t *src= s-> input_picture.data[plane_index];
2679 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2680 const int b_stride = s->b_width << s->block_max_depth;
2681 const int w= p->width;
2682 const int h= p->height;
2683 int index= mb_x + mb_y*b_stride;
2684 BlockNode *b= &s->block[index];
2685 BlockNode backup= *b;
2689 b->type|= BLOCK_INTRA;
2690 b->color[plane_index]= 0;
2691 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2694 int mb_x2= mb_x + (i &1) - 1;
2695 int mb_y2= mb_y + (i>>1) - 1;
2696 int x= block_w*mb_x2 + block_w/2;
2697 int y= block_w*mb_y2 + block_w/2;
2699 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2700 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2702 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2703 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2704 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2705 int obmc_v= obmc[index];
2707 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2708 if(x<0) obmc_v += obmc[index + block_w];
2709 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2710 if(x+block_w>w) obmc_v += obmc[index - block_w];
2711 //FIXME precalculate this or simplify it somehow else
2713 d = -dst[index] + (1<<(FRAC_BITS-1));
2715 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2716 aa += obmc_v * obmc_v; //FIXME precalculate this
2722 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2725 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2726 const int b_stride = s->b_width << s->block_max_depth;
2727 const int b_height = s->b_height<< s->block_max_depth;
2728 int index= x + y*b_stride;
2729 const BlockNode *b = &s->block[index];
2730 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2731 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2732 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2733 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2735 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2736 // int my_context= av_log2(2*FFABS(left->my - top->my));
2738 if(x<0 || x>=b_stride || y>=b_height)
2745 00001XXXX 15-30 8-15
2747 //FIXME try accurate rate
2748 //FIXME intra and inter predictors if surrounding blocks are not the same type
2749 if(b->type & BLOCK_INTRA){
2750 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2751 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2752 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2754 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2757 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2758 + av_log2(2*FFABS(dmy))
2759 + av_log2(2*b->ref));
2763 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2764 Plane *p= &s->plane[plane_index];
2765 const int block_size = MB_SIZE >> s->block_max_depth;
2766 const int block_w = plane_index ? block_size/2 : block_size;
2767 const int obmc_stride= plane_index ? block_size : 2*block_size;
2768 const int ref_stride= s->current_picture.linesize[plane_index];
2769 uint8_t *dst= s->current_picture.data[plane_index];
2770 uint8_t *src= s-> input_picture.data[plane_index];
2771 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2772 uint8_t *cur = s->scratchbuf;
2773 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2774 const int b_stride = s->b_width << s->block_max_depth;
2775 const int b_height = s->b_height<< s->block_max_depth;
2776 const int w= p->width;
2777 const int h= p->height;
2780 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2781 int sx= block_w*mb_x - block_w/2;
2782 int sy= block_w*mb_y - block_w/2;
2783 int x0= FFMAX(0,-sx);
2784 int y0= FFMAX(0,-sy);
2785 int x1= FFMIN(block_w*2, w-sx);
2786 int y1= FFMIN(block_w*2, h-sy);
2789 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2791 for(y=y0; y<y1; y++){
2792 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2793 const IDWTELEM *pred1 = pred + y*obmc_stride;
2794 uint8_t *cur1 = cur + y*ref_stride;
2795 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2796 for(x=x0; x<x1; x++){
2797 #if FRAC_BITS >= LOG2_OBMC_MAX
2798 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2800 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2802 v = (v + pred1[x]) >> FRAC_BITS;
2803 if(v&(~255)) v= ~(v>>31);
2808 /* copy the regions where obmc[] = (uint8_t)256 */
2809 if(LOG2_OBMC_MAX == 8
2810 && (mb_x == 0 || mb_x == b_stride-1)
2811 && (mb_y == 0 || mb_y == b_height-1)){
2820 for(y=y0; y<y1; y++)
2821 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2825 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2826 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2827 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2828 * So improving the score of one block is not strictly guaranteed
2829 * to improve the score of the whole frame, thus iterative motion
2830 * estimation does not always converge. */
2831 if(s->avctx->me_cmp == FF_CMP_W97)
2832 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2833 else if(s->avctx->me_cmp == FF_CMP_W53)
2834 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2838 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2839 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2844 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2853 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2855 if(mb_x == b_stride-2)
2856 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2858 return distortion + rate*penalty_factor;
2861 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2863 Plane *p= &s->plane[plane_index];
2864 const int block_size = MB_SIZE >> s->block_max_depth;
2865 const int block_w = plane_index ? block_size/2 : block_size;
2866 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2867 const int obmc_stride= plane_index ? block_size : 2*block_size;
2868 const int ref_stride= s->current_picture.linesize[plane_index];
2869 uint8_t *dst= s->current_picture.data[plane_index];
2870 uint8_t *src= s-> input_picture.data[plane_index];
2871 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2872 // const has only been removed from zero_dst to suppress a warning
2873 static IDWTELEM zero_dst[4096]; //FIXME
2874 const int b_stride = s->b_width << s->block_max_depth;
2875 const int w= p->width;
2876 const int h= p->height;
2879 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2882 int mb_x2= mb_x + (i%3) - 1;
2883 int mb_y2= mb_y + (i/3) - 1;
2884 int x= block_w*mb_x2 + block_w/2;
2885 int y= block_w*mb_y2 + block_w/2;
2887 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2888 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2890 //FIXME find a cleaner/simpler way to skip the outside stuff
2891 for(y2= y; y2<0; y2++)
2892 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2893 for(y2= h; y2<y+block_w; y2++)
2894 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2896 for(y2= y; y2<y+block_w; y2++)
2897 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2900 for(y2= y; y2<y+block_w; y2++)
2901 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2904 assert(block_w== 8 || block_w==16);
2905 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2909 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2910 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2918 rate = get_block_bits(s, mb_x, mb_y, 2);
2919 for(i=merged?4:0; i<9; i++){
2920 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2921 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2924 return distortion + rate*penalty_factor;
2927 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2928 const int w= b->width;
2929 const int h= b->height;
2941 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2942 v= src[x + y*stride];
2945 t= src[x + (y-1)*stride];
2947 lt= src[x - 1 + (y-1)*stride];
2950 rt= src[x + 1 + (y-1)*stride];
2954 l= src[x - 1 + y*stride];
2956 if(orientation==1) ll= src[y + (x-2)*stride];
2957 else ll= src[x - 2 + y*stride];
2963 if(px<b->parent->width && py<b->parent->height)
2964 p= parent[px + py*2*stride];
2966 if(!(/*ll|*/l|lt|t|rt|p)){
2968 runs[run_index++]= run;
2976 max_index= run_index;
2977 runs[run_index++]= run;
2979 run= runs[run_index++];
2981 put_symbol2(&s->c, b->state[30], max_index, 0);
2982 if(run_index <= max_index)
2983 put_symbol2(&s->c, b->state[1], run, 3);
2986 if(s->c.bytestream_end - s->c.bytestream < w*40){
2987 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2992 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2993 v= src[x + y*stride];
2996 t= src[x + (y-1)*stride];
2998 lt= src[x - 1 + (y-1)*stride];
3001 rt= src[x + 1 + (y-1)*stride];
3005 l= src[x - 1 + y*stride];
3007 if(orientation==1) ll= src[y + (x-2)*stride];
3008 else ll= src[x - 2 + y*stride];
3014 if(px<b->parent->width && py<b->parent->height)
3015 p= parent[px + py*2*stride];
3017 if(/*ll|*/l|lt|t|rt|p){
3018 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3020 put_rac(&s->c, &b->state[0][context], !!v);
3023 run= runs[run_index++];
3025 if(run_index <= max_index)
3026 put_symbol2(&s->c, b->state[1], run, 3);
3034 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3035 int l2= 2*FFABS(l) + (l<0);
3036 int t2= 2*FFABS(t) + (t<0);
3038 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3039 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3047 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3048 // encode_subband_qtree(s, b, src, parent, stride, orientation);
3049 // encode_subband_z0run(s, b, src, parent, stride, orientation);
3050 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3051 // encode_subband_dzr(s, b, src, parent, stride, orientation);
3054 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3055 const int b_stride= s->b_width << s->block_max_depth;
3056 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3057 BlockNode backup= *block;
3058 int rd, index, value;
3060 assert(mb_x>=0 && mb_y>=0);
3061 assert(mb_x<b_stride);
3064 block->color[0] = p[0];
3065 block->color[1] = p[1];
3066 block->color[2] = p[2];
3067 block->type |= BLOCK_INTRA;
3069 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3070 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3071 if(s->me_cache[index] == value)
3073 s->me_cache[index]= value;
3077 block->type &= ~BLOCK_INTRA;
3080 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3092 /* special case for int[2] args we discard afterwards,
3093 * fixes compilation problem with gcc 2.95 */
3094 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3095 int p[2] = {p0, p1};
3096 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3099 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3100 const int b_stride= s->b_width << s->block_max_depth;
3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3103 int rd, index, value;
3105 assert(mb_x>=0 && mb_y>=0);
3106 assert(mb_x<b_stride);
3107 assert(((mb_x|mb_y)&1) == 0);
3109 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3110 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3111 if(s->me_cache[index] == value)
3113 s->me_cache[index]= value;
3118 block->type &= ~BLOCK_INTRA;
3119 block[1]= block[b_stride]= block[b_stride+1]= *block;
3121 rd= get_4block_rd(s, mb_x, mb_y, 0);
3128 block[0]= backup[0];
3129 block[1]= backup[1];
3130 block[b_stride]= backup[2];
3131 block[b_stride+1]= backup[3];
3136 static void iterative_me(SnowContext *s){
3137 int pass, mb_x, mb_y;
3138 const int b_width = s->b_width << s->block_max_depth;
3139 const int b_height= s->b_height << s->block_max_depth;
3140 const int b_stride= b_width;
3144 RangeCoder r = s->c;
3145 uint8_t state[sizeof(s->block_state)];
3146 memcpy(state, s->block_state, sizeof(s->block_state));
3147 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3148 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3149 encode_q_branch(s, 0, mb_x, mb_y);
3151 memcpy(s->block_state, state, sizeof(s->block_state));
3154 for(pass=0; pass<25; pass++){
3157 for(mb_y= 0; mb_y<b_height; mb_y++){
3158 for(mb_x= 0; mb_x<b_width; mb_x++){
3159 int dia_change, i, j, ref;
3160 int best_rd= INT_MAX, ref_rd;
3161 BlockNode backup, ref_b;
3162 const int index= mb_x + mb_y * b_stride;
3163 BlockNode *block= &s->block[index];
3164 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3165 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3166 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3167 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3168 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3169 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3170 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3171 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3172 const int b_w= (MB_SIZE >> s->block_max_depth);
3173 uint8_t obmc_edged[b_w*2][b_w*2];
3175 if(pass && (block->type & BLOCK_OPT))
3177 block->type |= BLOCK_OPT;
3181 if(!s->me_cache_generation)
3182 memset(s->me_cache, 0, sizeof(s->me_cache));
3183 s->me_cache_generation += 1<<22;
3185 //FIXME precalculate
3188 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3190 for(y=0; y<b_w*2; y++)
3191 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3192 if(mb_x==b_stride-1)
3193 for(y=0; y<b_w*2; y++)
3194 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3196 for(x=0; x<b_w*2; x++)
3197 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3198 for(y=1; y<b_w; y++)
3199 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3201 if(mb_y==b_height-1){
3202 for(x=0; x<b_w*2; x++)
3203 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3204 for(y=b_w; y<b_w*2-1; y++)
3205 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3209 //skip stuff outside the picture
3210 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3211 uint8_t *src= s-> input_picture.data[0];
3212 uint8_t *dst= s->current_picture.data[0];
3213 const int stride= s->current_picture.linesize[0];
3214 const int block_w= MB_SIZE >> s->block_max_depth;
3215 const int sx= block_w*mb_x - block_w/2;
3216 const int sy= block_w*mb_y - block_w/2;
3217 const int w= s->plane[0].width;
3218 const int h= s->plane[0].height;
3222 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3223 for(y=h; y<sy+block_w*2; y++)
3224 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3226 for(y=sy; y<sy+block_w*2; y++)
3227 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3229 if(sx+block_w*2 > w){
3230 for(y=sy; y<sy+block_w*2; y++)
3231 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3235 // intra(black) = neighbors' contribution to the current block
3237 color[i]= get_dc(s, mb_x, mb_y, i);
3239 // get previous score (cannot be cached due to OBMC)
3240 if(pass > 0 && (block->type&BLOCK_INTRA)){
3241 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3242 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3244 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3248 for(ref=0; ref < s->ref_frames; ref++){
3249 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3250 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3255 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3256 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3258 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3260 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3262 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3264 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3267 //FIXME avoid subpel interpolation / round to nearest integer
3270 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3272 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3273 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3274 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3275 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3281 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3284 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3286 //FIXME or try the standard 2 pass qpel or similar
3288 mvr[0][0]= block->mx;
3289 mvr[0][1]= block->my;
3290 if(ref_rd > best_rd){
3298 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3299 //FIXME RD style color selection
3301 if(!same_block(block, &backup)){
3302 if(tb ) tb ->type &= ~BLOCK_OPT;
3303 if(lb ) lb ->type &= ~BLOCK_OPT;
3304 if(rb ) rb ->type &= ~BLOCK_OPT;
3305 if(bb ) bb ->type &= ~BLOCK_OPT;
3306 if(tlb) tlb->type &= ~BLOCK_OPT;
3307 if(trb) trb->type &= ~BLOCK_OPT;
3308 if(blb) blb->type &= ~BLOCK_OPT;
3309 if(brb) brb->type &= ~BLOCK_OPT;
3314 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3319 if(s->block_max_depth == 1){
3321 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3322 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3324 int best_rd, init_rd;
3325 const int index= mb_x + mb_y * b_stride;
3328 b[0]= &s->block[index];
3330 b[2]= b[0]+b_stride;
3332 if(same_block(b[0], b[1]) &&
3333 same_block(b[0], b[2]) &&
3334 same_block(b[0], b[3]))
3337 if(!s->me_cache_generation)
3338 memset(s->me_cache, 0, sizeof(s->me_cache));
3339 s->me_cache_generation += 1<<22;
3341 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3343 //FIXME more multiref search?
3344 check_4block_inter(s, mb_x, mb_y,
3345 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3346 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3349 if(!(b[i]->type&BLOCK_INTRA))
3350 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3352 if(init_rd != best_rd)
3356 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3360 static void encode_blocks(SnowContext *s, int search){
3365 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3369 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3370 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3374 if(s->avctx->me_method == ME_ITER || !search)
3375 encode_q_branch2(s, 0, x, y);
3377 encode_q_branch (s, 0, x, y);
3382 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3383 const int w= b->width;
3384 const int h= b->height;
3385 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3386 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3387 int x,y, thres1, thres2;
3389 if(s->qlog == LOSSLESS_QLOG){
3392 dst[x + y*stride]= src[x + y*stride];
3396 bias= bias ? 0 : (3*qmul)>>3;
3397 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3403 int i= src[x + y*stride];
3405 if((unsigned)(i+thres1) > thres2){
3408 i/= qmul; //FIXME optimize
3409 dst[x + y*stride]= i;
3413 i/= qmul; //FIXME optimize
3414 dst[x + y*stride]= -i;
3417 dst[x + y*stride]= 0;
3423 int i= src[x + y*stride];
3425 if((unsigned)(i+thres1) > thres2){
3428 i= (i + bias) / qmul; //FIXME optimize
3429 dst[x + y*stride]= i;
3433 i= (i + bias) / qmul; //FIXME optimize
3434 dst[x + y*stride]= -i;
3437 dst[x + y*stride]= 0;
3443 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3444 const int w= b->width;
3445 const int h= b->height;
3446 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3447 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3448 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3451 if(s->qlog == LOSSLESS_QLOG) return;
3455 int i= src[x + y*stride];
3457 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3459 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3465 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3466 const int w= b->width;
3467 const int h= b->height;
3470 for(y=h-1; y>=0; y--){
3471 for(x=w-1; x>=0; x--){
3472 int i= x + y*stride;
3476 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3477 else src[i] -= src[i - 1];
3479 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3480 else src[i] -= src[i - 1];
3483 if(y) src[i] -= src[i - stride];
3489 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3490 const int w= b->width;
3491 const int h= b->height;
3496 int i= x + y*stride;
3500 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3501 else src[i] += src[i - 1];
3503 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3504 else src[i] += src[i - 1];
3507 if(y) src[i] += src[i - stride];
3513 static void encode_qlogs(SnowContext *s){
3514 int plane_index, level, orientation;
3516 for(plane_index=0; plane_index<2; plane_index++){
3517 for(level=0; level<s->spatial_decomposition_count; level++){
3518 for(orientation=level ? 1:0; orientation<4; orientation++){
3519 if(orientation==2) continue;
3520 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3526 static void encode_header(SnowContext *s){
3530 memset(kstate, MID_STATE, sizeof(kstate));
3532 put_rac(&s->c, kstate, s->keyframe);
3533 if(s->keyframe || s->always_reset){
3535 s->last_spatial_decomposition_type=
3539 s->last_block_max_depth= 0;
3540 for(plane_index=0; plane_index<2; plane_index++){
3541 Plane *p= &s->plane[plane_index];
3544 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3548 put_symbol(&s->c, s->header_state, s->version, 0);
3549 put_rac(&s->c, s->header_state, s->always_reset);
3550 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3551 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3552 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3553 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3554 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3555 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3556 put_rac(&s->c, s->header_state, s->spatial_scalability);
3557 // put_rac(&s->c, s->header_state, s->rate_scalability);
3558 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3565 for(plane_index=0; plane_index<2; plane_index++){
3566 Plane *p= &s->plane[plane_index];
3567 update_mc |= p->last_htaps != p->htaps;
3568 update_mc |= p->last_diag_mc != p->diag_mc;
3569 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3571 put_rac(&s->c, s->header_state, update_mc);
3573 for(plane_index=0; plane_index<2; plane_index++){
3574 Plane *p= &s->plane[plane_index];
3575 put_rac(&s->c, s->header_state, p->diag_mc);
3576 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3577 for(i= p->htaps/2; i; i--)
3578 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3581 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3582 put_rac(&s->c, s->header_state, 1);
3583 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3586 put_rac(&s->c, s->header_state, 0);
3589 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3590 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3591 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3592 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3593 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3597 static void update_last_header_values(SnowContext *s){
3601 for(plane_index=0; plane_index<2; plane_index++){
3602 Plane *p= &s->plane[plane_index];
3603 p->last_diag_mc= p->diag_mc;
3604 p->last_htaps = p->htaps;
3605 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3609 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3610 s->last_qlog = s->qlog;
3611 s->last_qbias = s->qbias;
3612 s->last_mv_scale = s->mv_scale;
3613 s->last_block_max_depth = s->block_max_depth;
3614 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3617 static int qscale2qlog(int qscale){
3618 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3619 + 61*QROOT/8; //<64 >60
3622 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3624 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3625 * FIXME we know exact mv bits at this point,
3626 * but ratecontrol isn't set up to include them. */
3627 uint32_t coef_sum= 0;
3628 int level, orientation, delta_qlog;
3630 for(level=0; level<s->spatial_decomposition_count; level++){
3631 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3632 SubBand *b= &s->plane[0].band[level][orientation];
3633 IDWTELEM *buf= b->ibuf;
3634 const int w= b->width;
3635 const int h= b->height;
3636 const int stride= b->stride;
3637 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3638 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3639 const int qdiv= (1<<16)/qmul;
3641 //FIXME this is ugly
3644 buf[x+y*stride]= b->buf[x+y*stride];
3646 decorrelate(s, b, buf, stride, 1, 0);
3649 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3653 /* ugly, ratecontrol just takes a sqrt again */
3654 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3655 assert(coef_sum < INT_MAX);
3657 if(pict->pict_type == FF_I_TYPE){
3658 s->m.current_picture.mb_var_sum= coef_sum;
3659 s->m.current_picture.mc_mb_var_sum= 0;
3661 s->m.current_picture.mc_mb_var_sum= coef_sum;
3662 s->m.current_picture.mb_var_sum= 0;
3665 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3666 if (pict->quality < 0)
3668 s->lambda= pict->quality * 3/2;
3669 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3670 s->qlog+= delta_qlog;
3674 static void calculate_visual_weight(SnowContext *s, Plane *p){
3675 int width = p->width;
3676 int height= p->height;
3677 int level, orientation, x, y;
3679 for(level=0; level<s->spatial_decomposition_count; level++){
3680 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3681 SubBand *b= &p->band[level][orientation];
3682 IDWTELEM *ibuf= b->ibuf;
3685 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3686 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3687 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3688 for(y=0; y<height; y++){
3689 for(x=0; x<width; x++){
3690 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3695 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3700 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3701 SnowContext *s = avctx->priv_data;
3702 RangeCoder * const c= &s->c;
3703 AVFrame *pict = data;
3704 const int width= s->avctx->width;
3705 const int height= s->avctx->height;
3706 int level, orientation, plane_index, i, y;
3707 uint8_t rc_header_bak[sizeof(s->header_state)];
3708 uint8_t rc_block_bak[sizeof(s->block_state)];
3710 ff_init_range_encoder(c, buf, buf_size);
3711 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3715 for(y=0; y<(height>>shift); y++)
3716 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3717 &pict->data[i][y * pict->linesize[i]],
3720 s->new_picture = *pict;
3722 s->m.picture_number= avctx->frame_number;
3723 if(avctx->flags&CODEC_FLAG_PASS2){
3725 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3726 s->keyframe= pict->pict_type==FF_I_TYPE;
3727 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3728 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3729 if (pict->quality < 0)
3733 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3735 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3738 if(s->pass1_rc && avctx->frame_number == 0)
3739 pict->quality= 2*FF_QP2LAMBDA;
3741 s->qlog= qscale2qlog(pict->quality);
3742 s->lambda = pict->quality * 3/2;
3744 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3745 s->qlog= LOSSLESS_QLOG;
3747 }//else keep previous frame's qlog until after motion estimation
3751 s->m.current_picture_ptr= &s->m.current_picture;
3752 s->m.last_picture.pts= s->m.current_picture.pts;
3753 s->m.current_picture.pts= pict->pts;
3754 if(pict->pict_type == FF_P_TYPE){
3755 int block_width = (width +15)>>4;
3756 int block_height= (height+15)>>4;
3757 int stride= s->current_picture.linesize[0];
3759 assert(s->current_picture.data[0]);
3760 assert(s->last_picture[0].data[0]);
3762 s->m.avctx= s->avctx;
3763 s->m.current_picture.data[0]= s->current_picture.data[0];
3764 s->m. last_picture.data[0]= s->last_picture[0].data[0];
3765 s->m. new_picture.data[0]= s-> input_picture.data[0];
3766 s->m. last_picture_ptr= &s->m. last_picture;
3768 s->m. last_picture.linesize[0]=
3769 s->m. new_picture.linesize[0]=
3770 s->m.current_picture.linesize[0]= stride;
3771 s->m.uvlinesize= s->current_picture.linesize[1];
3773 s->m.height= height;
3774 s->m.mb_width = block_width;
3775 s->m.mb_height= block_height;
3776 s->m.mb_stride= s->m.mb_width+1;
3777 s->m.b8_stride= 2*s->m.mb_width+1;
3779 s->m.pict_type= pict->pict_type;
3780 s->m.me_method= s->avctx->me_method;
3781 s->m.me.scene_change_score=0;
3782 s->m.flags= s->avctx->flags;
3783 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3784 s->m.out_format= FMT_H263;
3785 s->m.unrestricted_mv= 1;
3787 s->m.lambda = s->lambda;
3788 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3789 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3791 s->m.dsp= s->dsp; //move
3797 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3798 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3803 if(pict->pict_type == FF_I_TYPE)
3804 s->spatial_decomposition_count= 5;
3806 s->spatial_decomposition_count= 5;
3808 s->m.pict_type = pict->pict_type;
3809 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
3811 common_init_after_header(avctx);
3813 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3814 for(plane_index=0; plane_index<3; plane_index++){
3815 calculate_visual_weight(s, &s->plane[plane_index]);
3820 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3821 encode_blocks(s, 1);
3822 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3824 for(plane_index=0; plane_index<3; plane_index++){
3825 Plane *p= &s->plane[plane_index];
3829 // int bits= put_bits_count(&s->c.pb);
3831 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3833 if(pict->data[plane_index]) //FIXME gray hack
3836 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3839 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3842 && pict->pict_type == FF_P_TYPE
3843 && !(avctx->flags&CODEC_FLAG_PASS2)
3844 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3845 ff_init_range_encoder(c, buf, buf_size);
3846 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3847 pict->pict_type= FF_I_TYPE;
3849 s->current_picture.key_frame=1;
3853 if(s->qlog == LOSSLESS_QLOG){
3856 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3862 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3868 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3870 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3872 if(s->pass1_rc && plane_index==0){
3873 int delta_qlog = ratecontrol_1pass(s, pict);
3874 if (delta_qlog <= INT_MIN)
3877 //reordering qlog in the bitstream would eliminate this reset
3878 ff_init_range_encoder(c, buf, buf_size);
3879 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3880 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3882 encode_blocks(s, 0);
3886 for(level=0; level<s->spatial_decomposition_count; level++){
3887 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3888 SubBand *b= &p->band[level][orientation];
3891 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3893 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
3894 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3895 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3897 correlate(s, b, b->ibuf, b->stride, 1, 0);
3901 for(level=0; level<s->spatial_decomposition_count; level++){
3902 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3903 SubBand *b= &p->band[level][orientation];
3905 dequantize(s, b, b->ibuf, b->stride);
3909 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3910 if(s->qlog == LOSSLESS_QLOG){
3913 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3917 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3920 if(pict->pict_type == FF_I_TYPE){
3923 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3924 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3928 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3929 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3932 if(s->avctx->flags&CODEC_FLAG_PSNR){
3935 if(pict->data[plane_index]) //FIXME gray hack
3938 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3942 s->avctx->error[plane_index] += error;
3943 s->current_picture.error[plane_index] = error;
3948 update_last_header_values(s);
3950 release_buffer(avctx);
3952 s->current_picture.coded_picture_number = avctx->frame_number;
3953 s->current_picture.pict_type = pict->pict_type;
3954 s->current_picture.quality = pict->quality;
3955 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3956 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3957 s->m.current_picture.display_picture_number =
3958 s->m.current_picture.coded_picture_number = avctx->frame_number;
3959 s->m.current_picture.quality = pict->quality;
3960 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3962 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3964 if(avctx->flags&CODEC_FLAG_PASS1)
3965 ff_write_pass1_stats(&s->m);
3966 s->m.last_pict_type = s->m.pict_type;
3967 avctx->frame_bits = s->m.frame_bits;
3968 avctx->mv_bits = s->m.mv_bits;
3969 avctx->misc_bits = s->m.misc_bits;
3970 avctx->p_tex_bits = s->m.p_tex_bits;
3974 return ff_rac_terminate(c);
3977 static av_cold int encode_end(AVCodecContext *avctx)
3979 SnowContext *s = avctx->priv_data;
3982 if (s->input_picture.data[0])
3983 avctx->release_buffer(avctx, &s->input_picture);
3984 av_free(avctx->stats_out);
3989 AVCodec snow_encoder = {
3993 sizeof(SnowContext),
3997 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4007 #include "libavutil/lfg.h"
4012 int buffer[2][width*height];
4016 s.spatial_decomposition_count=6;
4017 s.spatial_decomposition_type=1;
4019 av_lfg_init(&prng, 1);
4021 printf("testing 5/3 DWT\n");
4022 for(i=0; i<width*height; i++)
4023 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4025 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4026 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4028 for(i=0; i<width*height; i++)
4029 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4031 printf("testing 9/7 DWT\n");
4032 s.spatial_decomposition_type=0;
4033 for(i=0; i<width*height; i++)
4034 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4036 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4037 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4039 for(i=0; i<width*height; i++)
4040 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4043 printf("testing AC coder\n");
4044 memset(s.header_state, 0, sizeof(s.header_state));
4045 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4046 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4048 for(i=-256; i<256; i++){
4049 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4051 ff_rac_terminate(&s.c);
4053 memset(s.header_state, 0, sizeof(s.header_state));
4054 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4055 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4057 for(i=-256; i<256; i++){
4059 j= get_symbol(&s.c, s.header_state, 1);
4060 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4064 int level, orientation, x, y;
4065 int64_t errors[8][4];
4068 memset(errors, 0, sizeof(errors));
4069 s.spatial_decomposition_count=3;
4070 s.spatial_decomposition_type=0;
4071 for(level=0; level<s.spatial_decomposition_count; level++){
4072 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4073 int w= width >> (s.spatial_decomposition_count-level);
4074 int h= height >> (s.spatial_decomposition_count-level);
4075 int stride= width << (s.spatial_decomposition_count-level);
4076 DWTELEM *buf= buffer[0];
4079 if(orientation&1) buf+=w;
4080 if(orientation>1) buf+=stride>>1;
4082 memset(buffer[0], 0, sizeof(int)*width*height);
4083 buf[w/2 + h/2*stride]= 256*256;
4084 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4085 for(y=0; y<height; y++){
4086 for(x=0; x<width; x++){
4087 int64_t d= buffer[0][x + y*width];
4089 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4091 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4093 error= (int)(sqrt(error)+0.5);
4094 errors[level][orientation]= error;
4095 if(g) g=av_gcd(g, error);
4099 printf("static int const visual_weight[][4]={\n");
4100 for(level=0; level<s.spatial_decomposition_count; level++){
4102 for(orientation=0; orientation<4; orientation++){
4103 printf("%8"PRId64",", errors[level][orientation]/g);
4110 int w= width >> (s.spatial_decomposition_count-level);
4111 //int h= height >> (s.spatial_decomposition_count-level);
4112 int stride= width << (s.spatial_decomposition_count-level);
4113 DWTELEM *buf= buffer[0];
4119 memset(buffer[0], 0, sizeof(int)*width*height);
4121 for(y=0; y<height; y++){
4122 for(x=0; x<width; x++){
4123 int tab[4]={0,2,3,1};
4124 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4127 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4131 buf[x + y*stride ]=169;
4132 buf[x + y*stride-w]=64;
4135 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4137 for(y=0; y<height; y++){
4138 for(x=0; x<width; x++){
4139 int64_t d= buffer[0][x + y*width];
4141 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4143 if(FFABS(height/2-y)<9) printf("\n");