2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/intmath.h"
27 #include "rangecoder.h"
30 #include "mpegvideo.h"
36 static const int8_t quant3[256]={
37 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
54 static const int8_t quant3b[256]={
55 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 static const int8_t quant3bA[256]={
73 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 static const int8_t quant5[256]={
91 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
108 static const int8_t quant7[256]={
109 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
110 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
111 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
123 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
124 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
126 static const int8_t quant9[256]={
127 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
128 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
142 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
144 static const int8_t quant11[256]={
145 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
146 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
147 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
159 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
160 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
162 static const int8_t quant13[256]={
163 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
164 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
165 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
166 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
169 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
173 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
176 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
177 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
178 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
182 static const uint8_t obmc32[1024]={
183 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
185 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
186 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
187 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
188 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
189 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
190 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
191 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
192 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
193 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
194 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
195 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
196 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
199 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
200 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
201 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
202 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
203 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
204 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
205 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
206 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
207 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
208 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
209 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
210 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
211 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
212 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
213 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
214 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
217 static const uint8_t obmc16[256]={
218 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
219 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
220 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
221 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
222 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
223 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
226 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
227 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
228 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
229 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
230 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
231 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
232 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
233 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
237 static const uint8_t obmc32[1024]={
238 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
239 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
240 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
241 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
242 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
243 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
244 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
245 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
246 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
247 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
248 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
249 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
250 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
254 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
255 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
256 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
257 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
258 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
259 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
260 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
261 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
262 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
263 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
264 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
265 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
266 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
267 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
268 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
269 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
272 static const uint8_t obmc16[256]={
273 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
274 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
275 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
276 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
277 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
281 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
282 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
283 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
284 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
285 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
286 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
287 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
288 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
292 static const uint8_t obmc32[1024]={
293 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
294 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
295 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
296 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
297 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
298 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
299 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
300 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
301 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
302 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
303 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
304 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
305 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
306 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
309 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
310 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
311 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
312 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
313 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
314 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
315 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
316 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
317 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
318 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
319 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
320 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
321 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
322 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
323 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
324 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
327 static const uint8_t obmc16[256]={
328 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
329 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
330 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
331 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
332 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
333 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
336 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
337 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
338 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
339 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
340 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
341 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
342 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
343 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
349 static const uint8_t obmc8[64]={
350 4, 12, 20, 28, 28, 20, 12, 4,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 20, 60,100,140,140,100, 60, 20,
353 28, 84,140,196,196,140, 84, 28,
354 28, 84,140,196,196,140, 84, 28,
355 20, 60,100,140,140,100, 60, 20,
356 12, 36, 60, 84, 84, 60, 36, 12,
357 4, 12, 20, 28, 28, 20, 12, 4,
362 static const uint8_t obmc4[16]={
370 static const uint8_t * const obmc_tab[4]={
371 obmc32, obmc16, obmc8, obmc4
374 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
376 typedef struct BlockNode{
382 //#define TYPE_SPLIT 1
383 #define BLOCK_INTRA 1
385 //#define TYPE_NOCOLOR 4
386 uint8_t level; //FIXME merge into type?
389 static const BlockNode null_block= { //FIXME add border maybe
390 .color= {128,128,128},
398 #define LOG2_MB_SIZE 4
399 #define MB_SIZE (1<<LOG2_MB_SIZE)
400 #define ENCODER_EXTRA_BITS 4
403 typedef struct x_and_coeff{
408 typedef struct SubBand{
413 int qlog; ///< log(qscale)/log[2^(1/6)]
418 int stride_line; ///< Stride measured in lines, not pixels.
419 x_and_coeff * x_coeff;
420 struct SubBand *parent;
421 uint8_t state[/*7*2*/ 7 + 512][32];
424 typedef struct Plane{
427 SubBand band[MAX_DECOMPOSITIONS][4];
430 int8_t hcoeff[HTAPS_MAX/2];
435 int8_t last_hcoeff[HTAPS_MAX/2];
439 typedef struct SnowContext{
441 AVCodecContext *avctx;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture[MAX_REF_FRAMES];
449 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
450 AVFrame mconly_picture;
451 // uint8_t q_context[16];
452 uint8_t header_state[32];
453 uint8_t block_state[128 + 32*128];
457 int spatial_decomposition_type;
458 int last_spatial_decomposition_type;
459 int temporal_decomposition_type;
460 int spatial_decomposition_count;
461 int last_spatial_decomposition_count;
462 int temporal_decomposition_count;
465 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
466 uint32_t *ref_scores[MAX_REF_FRAMES];
467 DWTELEM *spatial_dwt_buffer;
468 IDWTELEM *spatial_idwt_buffer;
472 int spatial_scalability;
482 #define QBIAS_SHIFT 3
486 int last_block_max_depth;
487 Plane plane[MAX_PLANES];
489 #define ME_CACHE_SIZE 1024
490 int me_cache[ME_CACHE_SIZE];
491 int me_cache_generation;
494 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
500 // Avoid a name clash on SGI IRIX
503 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
504 static uint8_t qexp[QROOT];
506 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
510 const int a= FFABS(v);
511 const int e= av_log2(a);
513 const int el= FFMIN(e, 10);
514 put_rac(c, state+0, 0);
517 put_rac(c, state+1+i, 1); //1..10
520 put_rac(c, state+1+9, 1); //1..10
522 put_rac(c, state+1+FFMIN(i,9), 0);
524 for(i=e-1; i>=el; i--){
525 put_rac(c, state+22+9, (a>>i)&1); //22..31
528 put_rac(c, state+22+i, (a>>i)&1); //22..31
532 put_rac(c, state+11 + el, v < 0); //11..21
535 put_rac(c, state+0, 0);
538 put_rac(c, state+1+i, 1); //1..10
540 put_rac(c, state+1+i, 0);
542 for(i=e-1; i>=0; i--){
543 put_rac(c, state+22+i, (a>>i)&1); //22..31
547 put_rac(c, state+11 + e, v < 0); //11..21
550 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
552 put_rac(c, state+1+9, 0);
554 for(i=e-1; i>=0; i--){
555 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
559 put_rac(c, state+11 + 10, v < 0); //11..21
563 put_rac(c, state+0, 1);
567 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
568 if(get_rac(c, state+0))
573 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
578 for(i=e-1; i>=0; i--){
579 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
582 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
587 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
589 int r= log2>=0 ? 1<<log2 : 1;
595 put_rac(c, state+4+log2, 1);
600 put_rac(c, state+4+log2, 0);
602 for(i=log2-1; i>=0; i--){
603 put_rac(c, state+31-i, (v>>i)&1);
607 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
609 int r= log2>=0 ? 1<<log2 : 1;
614 while(get_rac(c, state+4+log2)){
620 for(i=log2-1; i>=0; i--){
621 v+= get_rac(c, state+31-i)<<i;
627 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
628 const int w= b->width;
629 const int h= b->height;
633 x_and_coeff *xc= b->x_coeff;
634 x_and_coeff *prev_xc= NULL;
635 x_and_coeff *prev2_xc= xc;
636 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
637 x_and_coeff *prev_parent_xc= parent_xc;
639 runs= get_symbol2(&s->c, b->state[30], 0);
640 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
647 if(y && prev_xc->x == 0){
659 if(prev_xc->x == x + 1)
665 if(x>>1 > parent_xc->x){
668 if(x>>1 == parent_xc->x){
672 if(/*ll|*/l|lt|t|rt|p){
673 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
675 v=get_rac(&s->c, &b->state[0][context]);
677 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
678 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
685 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
687 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
688 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
697 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
698 else max_run= FFMIN(run, w-x-1);
700 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
706 (xc++)->x= w+1; //end marker
712 while(parent_xc->x != parent->width+1)
715 prev_parent_xc= parent_xc;
717 parent_xc= prev_parent_xc;
722 (xc++)->x= w+1; //end marker
725 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
726 const int w= b->width;
728 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
729 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
730 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
733 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
738 /* If we are on the second or later slice, restore our index. */
740 new_index = save_state[0];
743 for(y=start_y; y<h; y++){
746 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
747 memset(line, 0, b->width*sizeof(IDWTELEM));
748 v = b->x_coeff[new_index].coeff;
749 x = b->x_coeff[new_index++].x;
751 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
752 register int u= -(v&1);
755 v = b->x_coeff[new_index].coeff;
756 x = b->x_coeff[new_index++].x;
760 /* Save our variables for the next slice. */
761 save_state[0] = new_index;
766 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
767 int plane_index, level, orientation;
769 for(plane_index=0; plane_index<3; plane_index++){
770 for(level=0; level<MAX_DECOMPOSITIONS; level++){
771 for(orientation=level ? 1:0; orientation<4; orientation++){
772 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
776 memset(s->header_state, MID_STATE, sizeof(s->header_state));
777 memset(s->block_state, MID_STATE, sizeof(s->block_state));
780 static int alloc_blocks(SnowContext *s){
781 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
782 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
788 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
792 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
793 uint8_t *bytestream= d->bytestream;
794 uint8_t *bytestream_start= d->bytestream_start;
796 d->bytestream= bytestream;
797 d->bytestream_start= bytestream_start;
800 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
801 const int w= s->b_width << s->block_max_depth;
802 const int rem_depth= s->block_max_depth - level;
803 const int index= (x + y*w) << rem_depth;
804 const int block_w= 1<<rem_depth;
817 for(j=0; j<block_w; j++){
818 for(i=0; i<block_w; i++){
819 s->block[index + i + j*w]= block;
824 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
825 const int offset[3]= {
827 ((y*c->uvstride + x)>>1),
828 ((y*c->uvstride + x)>>1),
832 c->src[0][i]= src [i];
833 c->ref[0][i]= ref [i] + offset[i];
838 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
839 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
840 if(s->ref_frames == 1){
841 *mx = mid_pred(left->mx, top->mx, tr->mx);
842 *my = mid_pred(left->my, top->my, tr->my);
844 const int *scale = scale_mv_ref[ref];
845 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
846 (top ->mx * scale[top ->ref] + 128) >>8,
847 (tr ->mx * scale[tr ->ref] + 128) >>8);
848 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
849 (top ->my * scale[top ->ref] + 128) >>8,
850 (tr ->my * scale[tr ->ref] + 128) >>8);
854 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
855 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
856 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
858 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
862 static void decode_q_branch(SnowContext *s, int level, int x, int y){
863 const int w= s->b_width << s->block_max_depth;
864 const int rem_depth= s->block_max_depth - level;
865 const int index= (x + y*w) << rem_depth;
866 int trx= (x+1)<<rem_depth;
867 const BlockNode *left = x ? &s->block[index-1] : &null_block;
868 const BlockNode *top = y ? &s->block[index-w] : &null_block;
869 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
870 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
871 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
874 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
878 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
880 int l = left->color[0];
881 int cb= left->color[1];
882 int cr= left->color[2];
884 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
885 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
886 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
888 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
891 pred_mv(s, &mx, &my, 0, left, top, tr);
892 l += get_symbol(&s->c, &s->block_state[32], 1);
893 cb+= get_symbol(&s->c, &s->block_state[64], 1);
894 cr+= get_symbol(&s->c, &s->block_state[96], 1);
896 if(s->ref_frames > 1)
897 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
898 pred_mv(s, &mx, &my, ref, left, top, tr);
899 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
900 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
902 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
904 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
905 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
906 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
907 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
911 static void decode_blocks(SnowContext *s){
918 decode_q_branch(s, 0, x, y);
923 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, int stride, int b_w, int b_h, int dx, int dy){
924 static const uint8_t weight[64]={
935 static const uint8_t brane[256]={
936 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
937 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
938 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
939 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
940 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
941 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
942 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
943 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
944 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
945 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
946 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
947 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
948 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
949 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
950 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
951 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
954 static const uint8_t needs[16]={
962 int16_t tmpIt [64*(32+HTAPS_MAX)];
963 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
964 int16_t *tmpI= tmpIt;
965 uint8_t *tmp2= tmp2t[0];
966 const uint8_t *hpel[11];
967 assert(dx<16 && dy<16);
968 r= brane[dx + 16*dy]&15;
969 l= brane[dx + 16*dy]>>4;
971 b= needs[l] | needs[r];
976 for(y=0; y < b_h+HTAPS_MAX-1; y++){
977 for(x=0; x < b_w; x++){
978 int a_1=src[x + HTAPS_MAX/2-4];
979 int a0= src[x + HTAPS_MAX/2-3];
980 int a1= src[x + HTAPS_MAX/2-2];
981 int a2= src[x + HTAPS_MAX/2-1];
982 int a3= src[x + HTAPS_MAX/2+0];
983 int a4= src[x + HTAPS_MAX/2+1];
984 int a5= src[x + HTAPS_MAX/2+2];
985 int a6= src[x + HTAPS_MAX/2+3];
987 if(!p || p->fast_mc){
988 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
992 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
997 if(am&(~255)) am= ~(am>>31);
1006 src += HTAPS_MAX/2 - 1;
1010 for(y=0; y < b_h; y++){
1011 for(x=0; x < b_w+1; x++){
1012 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1013 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1014 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1015 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1016 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1017 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1018 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1019 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1021 if(!p || p->fast_mc)
1022 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1024 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1026 if(am&(~255)) am= ~(am>>31);
1034 src += stride*(HTAPS_MAX/2 - 1);
1038 for(y=0; y < b_h; y++){
1039 for(x=0; x < b_w; x++){
1040 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1041 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1042 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1043 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1044 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1045 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1046 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1047 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1049 if(!p || p->fast_mc)
1050 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1052 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1053 if(am&(~255)) am= ~(am>>31);
1062 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1067 hpel[ 6]= tmp2t[1] + 1;
1069 hpel[ 8]= src + stride;
1070 hpel[ 9]= hpel[1] + stride;
1071 hpel[10]= hpel[8] + 1;
1074 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1075 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1076 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1077 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1080 for(y=0; y < b_h; y++){
1081 for(x=0; x < b_w; x++){
1082 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1083 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1092 const uint8_t *src1= hpel[l];
1093 const uint8_t *src2= hpel[r];
1094 int a= weight[((dx&7) + (8*(dy&7)))];
1096 for(y=0; y < b_h; y++){
1097 for(x=0; x < b_w; x++){
1098 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1107 #define mca(dx,dy,b_w)\
1108 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1110 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, stride, b_w, b_w, dx, dy);\
1122 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1123 if(block->type & BLOCK_INTRA){
1125 const int color = block->color[plane_index];
1126 const int color4= color*0x01010101;
1128 for(y=0; y < b_h; y++){
1129 *(uint32_t*)&dst[0 + y*stride]= color4;
1130 *(uint32_t*)&dst[4 + y*stride]= color4;
1131 *(uint32_t*)&dst[8 + y*stride]= color4;
1132 *(uint32_t*)&dst[12+ y*stride]= color4;
1133 *(uint32_t*)&dst[16+ y*stride]= color4;
1134 *(uint32_t*)&dst[20+ y*stride]= color4;
1135 *(uint32_t*)&dst[24+ y*stride]= color4;
1136 *(uint32_t*)&dst[28+ y*stride]= color4;
1139 for(y=0; y < b_h; y++){
1140 *(uint32_t*)&dst[0 + y*stride]= color4;
1141 *(uint32_t*)&dst[4 + y*stride]= color4;
1142 *(uint32_t*)&dst[8 + y*stride]= color4;
1143 *(uint32_t*)&dst[12+ y*stride]= color4;
1146 for(y=0; y < b_h; y++){
1147 *(uint32_t*)&dst[0 + y*stride]= color4;
1148 *(uint32_t*)&dst[4 + y*stride]= color4;
1151 for(y=0; y < b_h; y++){
1152 *(uint32_t*)&dst[0 + y*stride]= color4;
1155 for(y=0; y < b_h; y++){
1156 for(x=0; x < b_w; x++){
1157 dst[x + y*stride]= color;
1162 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1163 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1164 int mx= block->mx*scale;
1165 int my= block->my*scale;
1166 const int dx= mx&15;
1167 const int dy= my&15;
1168 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1169 sx += (mx>>4) - (HTAPS_MAX/2-1);
1170 sy += (my>>4) - (HTAPS_MAX/2-1);
1171 src += sx + sy*stride;
1172 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1173 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1174 s->dsp.emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1177 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1178 // assert(!(b_w&(b_w-1)));
1179 assert(b_w>1 && b_h>1);
1180 assert((tab_index>=0 && tab_index<4) || b_w==32);
1181 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1182 mc_block(&s->plane[plane_index], dst, src, stride, b_w, b_h, dx, dy);
1185 for(y=0; y<b_h; y+=16){
1186 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1187 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1190 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1191 else if(b_w==2*b_h){
1192 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1193 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1196 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1197 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1202 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1203 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1206 for(y=0; y<b_h; y++){
1207 //FIXME ugly misuse of obmc_stride
1208 const uint8_t *obmc1= obmc + y*obmc_stride;
1209 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1210 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1211 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1212 dst = slice_buffer_get_line(sb, src_y + y);
1213 for(x=0; x<b_w; x++){
1214 int v= obmc1[x] * block[3][x + y*src_stride]
1215 +obmc2[x] * block[2][x + y*src_stride]
1216 +obmc3[x] * block[1][x + y*src_stride]
1217 +obmc4[x] * block[0][x + y*src_stride];
1219 v <<= 8 - LOG2_OBMC_MAX;
1221 v >>= 8 - FRAC_BITS;
1224 v += dst[x + src_x];
1225 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1226 if(v&(~255)) v= ~(v>>31);
1227 dst8[x + y*src_stride] = v;
1229 dst[x + src_x] -= v;
1235 //FIXME name cleanup (b_w, block_w, b_width stuff)
1236 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1237 const int b_width = s->b_width << s->block_max_depth;
1238 const int b_height= s->b_height << s->block_max_depth;
1239 const int b_stride= b_width;
1240 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1241 BlockNode *rt= lt+1;
1242 BlockNode *lb= lt+b_stride;
1243 BlockNode *rb= lb+1;
1245 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1246 uint8_t *tmp = s->scratchbuf;
1253 }else if(b_x + 1 >= b_width){
1260 }else if(b_y + 1 >= b_height){
1265 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1268 if(!sliced && !offset_dst)
1271 }else if(src_x + b_w > w){
1275 obmc -= src_y*obmc_stride;
1277 if(!sliced && !offset_dst)
1278 dst -= src_y*dst_stride;
1280 }else if(src_y + b_h> h){
1284 if(b_w<=0 || b_h<=0) return;
1286 assert(src_stride > 2*MB_SIZE + 5);
1288 if(!sliced && offset_dst)
1289 dst += src_x + src_y*dst_stride;
1290 dst8+= src_x + src_y*src_stride;
1291 // src += src_x + src_y*src_stride;
1293 ptmp= tmp + 3*tmp_step;
1296 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1298 if(same_block(lt, rt)){
1303 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1306 if(same_block(lt, lb)){
1308 }else if(same_block(rt, lb)){
1313 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1316 if(same_block(lt, rb) ){
1318 }else if(same_block(rt, rb)){
1320 }else if(same_block(lb, rb)){
1324 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
1327 for(y=0; y<b_h; y++){
1328 for(x=0; x<b_w; x++){
1329 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
1330 if(add) dst[x + y*dst_stride] += v;
1331 else dst[x + y*dst_stride] -= v;
1334 for(y=0; y<b_h; y++){
1335 uint8_t *obmc2= obmc + (obmc_stride>>1);
1336 for(x=0; x<b_w; x++){
1337 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
1338 if(add) dst[x + y*dst_stride] += v;
1339 else dst[x + y*dst_stride] -= v;
1342 for(y=0; y<b_h; y++){
1343 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1344 for(x=0; x<b_w; x++){
1345 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
1346 if(add) dst[x + y*dst_stride] += v;
1347 else dst[x + y*dst_stride] -= v;
1350 for(y=0; y<b_h; y++){
1351 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
1352 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1353 for(x=0; x<b_w; x++){
1354 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
1355 if(add) dst[x + y*dst_stride] += v;
1356 else dst[x + y*dst_stride] -= v;
1361 s->dwt.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
1363 for(y=0; y<b_h; y++){
1364 //FIXME ugly misuse of obmc_stride
1365 const uint8_t *obmc1= obmc + y*obmc_stride;
1366 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1367 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1368 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1369 for(x=0; x<b_w; x++){
1370 int v= obmc1[x] * block[3][x + y*src_stride]
1371 +obmc2[x] * block[2][x + y*src_stride]
1372 +obmc3[x] * block[1][x + y*src_stride]
1373 +obmc4[x] * block[0][x + y*src_stride];
1375 v <<= 8 - LOG2_OBMC_MAX;
1377 v >>= 8 - FRAC_BITS;
1380 v += dst[x + y*dst_stride];
1381 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1382 if(v&(~255)) v= ~(v>>31);
1383 dst8[x + y*src_stride] = v;
1385 dst[x + y*dst_stride] -= v;
1393 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
1394 Plane *p= &s->plane[plane_index];
1395 const int mb_w= s->b_width << s->block_max_depth;
1396 const int mb_h= s->b_height << s->block_max_depth;
1398 int block_size = MB_SIZE >> s->block_max_depth;
1399 int block_w = plane_index ? block_size/2 : block_size;
1400 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1401 int obmc_stride= plane_index ? block_size : 2*block_size;
1402 int ref_stride= s->current_picture.linesize[plane_index];
1403 uint8_t *dst8= s->current_picture.data[plane_index];
1407 if(s->keyframe || (s->avctx->debug&512)){
1412 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1413 // DWTELEM * line = slice_buffer_get_line(sb, y);
1414 IDWTELEM * line = sb->line[y];
1416 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1417 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1419 if(v&(~255)) v= ~(v>>31);
1420 dst8[x + y*ref_stride]= v;
1424 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1425 // DWTELEM * line = slice_buffer_get_line(sb, y);
1426 IDWTELEM * line = sb->line[y];
1428 line[x] -= 128 << FRAC_BITS;
1429 // buf[x + y*w]-= 128<<FRAC_BITS;
1437 for(mb_x=0; mb_x<=mb_w; mb_x++){
1438 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
1439 block_w*mb_x - block_w/2,
1440 block_w*mb_y - block_w/2,
1443 w, ref_stride, obmc_stride,
1445 add, 0, plane_index);
1449 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
1450 Plane *p= &s->plane[plane_index];
1451 const int mb_w= s->b_width << s->block_max_depth;
1452 const int mb_h= s->b_height << s->block_max_depth;
1454 int block_size = MB_SIZE >> s->block_max_depth;
1455 int block_w = plane_index ? block_size/2 : block_size;
1456 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
1457 const int obmc_stride= plane_index ? block_size : 2*block_size;
1458 int ref_stride= s->current_picture.linesize[plane_index];
1459 uint8_t *dst8= s->current_picture.data[plane_index];
1463 if(s->keyframe || (s->avctx->debug&512)){
1468 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1470 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
1472 if(v&(~255)) v= ~(v>>31);
1473 dst8[x + y*ref_stride]= v;
1477 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
1479 buf[x + y*w]-= 128<<FRAC_BITS;
1487 for(mb_x=0; mb_x<=mb_w; mb_x++){
1488 add_yblock(s, 0, NULL, buf, dst8, obmc,
1489 block_w*mb_x - block_w/2,
1490 block_w*mb_y - block_w/2,
1493 w, ref_stride, obmc_stride,
1495 add, 1, plane_index);
1499 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
1500 const int mb_h= s->b_height << s->block_max_depth;
1502 for(mb_y=0; mb_y<=mb_h; mb_y++)
1503 predict_slice(s, buf, plane_index, add, mb_y);
1506 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
1507 const int w= b->width;
1508 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1509 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1510 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1513 if(s->qlog == LOSSLESS_QLOG) return;
1515 for(y=start_y; y<end_y; y++){
1516 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1517 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1521 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1523 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
1529 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
1530 const int w= b->width;
1533 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
1537 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1539 for(y=start_y; y<end_y; y++){
1541 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
1542 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
1546 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
1547 else line[x] += line[x - 1];
1549 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
1550 else line[x] += line[x - 1];
1553 if(y) line[x] += prev[x];
1559 static void decode_qlogs(SnowContext *s){
1560 int plane_index, level, orientation;
1562 for(plane_index=0; plane_index<3; plane_index++){
1563 for(level=0; level<s->spatial_decomposition_count; level++){
1564 for(orientation=level ? 1:0; orientation<4; orientation++){
1566 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
1567 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
1568 else q= get_symbol(&s->c, s->header_state, 1);
1569 s->plane[plane_index].band[level][orientation].qlog= q;
1575 #define GET_S(dst, check) \
1576 tmp= get_symbol(&s->c, s->header_state, 0);\
1578 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
1583 static int decode_header(SnowContext *s){
1584 int plane_index, tmp;
1587 memset(kstate, MID_STATE, sizeof(kstate));
1589 s->keyframe= get_rac(&s->c, kstate);
1590 if(s->keyframe || s->always_reset){
1592 s->spatial_decomposition_type=
1596 s->block_max_depth= 0;
1599 GET_S(s->version, tmp <= 0U)
1600 s->always_reset= get_rac(&s->c, s->header_state);
1601 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
1602 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
1603 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1604 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
1605 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
1606 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
1607 s->spatial_scalability= get_rac(&s->c, s->header_state);
1608 // s->rate_scalability= get_rac(&s->c, s->header_state);
1609 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
1610 s->max_ref_frames++;
1616 if(get_rac(&s->c, s->header_state)){
1617 for(plane_index=0; plane_index<2; plane_index++){
1618 int htaps, i, sum=0;
1619 Plane *p= &s->plane[plane_index];
1620 p->diag_mc= get_rac(&s->c, s->header_state);
1621 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
1622 if((unsigned)htaps > HTAPS_MAX || htaps==0)
1625 for(i= htaps/2; i; i--){
1626 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
1627 sum += p->hcoeff[i];
1629 p->hcoeff[0]= 32-sum;
1631 s->plane[2].diag_mc= s->plane[1].diag_mc;
1632 s->plane[2].htaps = s->plane[1].htaps;
1633 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
1635 if(get_rac(&s->c, s->header_state)){
1636 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
1641 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
1642 if(s->spatial_decomposition_type > 1U){
1643 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
1646 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
1647 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
1648 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
1652 s->qlog += get_symbol(&s->c, s->header_state, 1);
1653 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
1654 s->qbias += get_symbol(&s->c, s->header_state, 1);
1655 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
1656 if(s->block_max_depth > 1 || s->block_max_depth < 0){
1657 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
1658 s->block_max_depth= 0;
1665 static void init_qexp(void){
1669 for(i=0; i<QROOT; i++){
1671 v *= pow(2, 1.0 / QROOT);
1675 static av_cold int common_init(AVCodecContext *avctx){
1676 SnowContext *s = avctx->priv_data;
1681 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
1683 dsputil_init(&s->dsp, avctx);
1684 ff_dwt_init(&s->dwt);
1687 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
1688 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
1689 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
1690 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
1691 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
1692 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
1711 #define mcfh(dx,dy)\
1712 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
1713 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
1714 mc_block_hpel ## dx ## dy ## 16;\
1715 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
1716 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
1717 mc_block_hpel ## dx ## dy ## 8;
1727 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
1729 width= s->avctx->width;
1730 height= s->avctx->height;
1732 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
1733 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
1735 for(i=0; i<MAX_REF_FRAMES; i++)
1736 for(j=0; j<MAX_REF_FRAMES; j++)
1737 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
1739 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
1740 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
1745 static int common_init_after_header(AVCodecContext *avctx){
1746 SnowContext *s = avctx->priv_data;
1747 int plane_index, level, orientation;
1749 for(plane_index=0; plane_index<3; plane_index++){
1750 int w= s->avctx->width;
1751 int h= s->avctx->height;
1754 w>>= s->chroma_h_shift;
1755 h>>= s->chroma_v_shift;
1757 s->plane[plane_index].width = w;
1758 s->plane[plane_index].height= h;
1760 for(level=s->spatial_decomposition_count-1; level>=0; level--){
1761 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1762 SubBand *b= &s->plane[plane_index].band[level][orientation];
1764 b->buf= s->spatial_dwt_buffer;
1766 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
1767 b->width = (w + !(orientation&1))>>1;
1768 b->height= (h + !(orientation>1))>>1;
1770 b->stride_line = 1 << (s->spatial_decomposition_count - level);
1771 b->buf_x_offset = 0;
1772 b->buf_y_offset = 0;
1776 b->buf_x_offset = (w+1)>>1;
1779 b->buf += b->stride>>1;
1780 b->buf_y_offset = b->stride_line >> 1;
1782 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
1785 b->parent= &s->plane[plane_index].band[level-1][orientation];
1786 //FIXME avoid this realloc
1787 av_freep(&b->x_coeff);
1788 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
1803 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
1804 SubBand *b= &p->band[level][orientation];
1808 int step= 1 << (s->spatial_decomposition_count - level);
1815 //FIXME bias for nonzero ?
1817 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
1818 for(y=0; y<p->height; y++){
1819 for(x=0; x<p->width; x++){
1820 int sx= (x-xo + step/2) / step / Q2_STEP;
1821 int sy= (y-yo + step/2) / step / Q2_STEP;
1822 int v= r0[x + y*p->width] - r1[x + y*p->width];
1823 assert(sx>=0 && sy>=0 && sx < score_stride);
1825 score[sx + sy*score_stride] += v*v;
1826 assert(score[sx + sy*score_stride] >= 0);
1831 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
1832 int level, orientation;
1834 for(level=0; level<s->spatial_decomposition_count; level++){
1835 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1836 SubBand *b= &p->band[level][orientation];
1837 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
1839 dequantize(s, b, dst, b->stride);
1844 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
1845 int level, orientation, ys, xs, x, y, pass;
1846 IDWTELEM best_dequant[height * stride];
1847 IDWTELEM idwt2_buffer[height * stride];
1848 const int score_stride= (width + 10)/Q2_STEP;
1849 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1850 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
1851 int threshold= (s->m.lambda * s->m.lambda) >> 6;
1853 //FIXME pass the copy cleanly ?
1855 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
1856 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
1858 for(level=0; level<s->spatial_decomposition_count; level++){
1859 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1860 SubBand *b= &p->band[level][orientation];
1861 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1862 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
1863 assert(src == b->buf); // code does not depend on this but it is true currently
1865 quantize(s, b, dst, src, b->stride, s->qbias);
1868 for(pass=0; pass<1; pass++){
1869 if(s->qbias == 0) //keyframe
1871 for(level=0; level<s->spatial_decomposition_count; level++){
1872 for(orientation=level ? 1 : 0; orientation<4; orientation++){
1873 SubBand *b= &p->band[level][orientation];
1874 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
1875 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
1877 for(ys= 0; ys<Q2_STEP; ys++){
1878 for(xs= 0; xs<Q2_STEP; xs++){
1879 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1880 dequantize_all(s, p, idwt2_buffer, width, height);
1881 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1882 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1883 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
1884 for(y=ys; y<b->height; y+= Q2_STEP){
1885 for(x=xs; x<b->width; x+= Q2_STEP){
1886 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
1887 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
1888 //FIXME try more than just --
1891 dequantize_all(s, p, idwt2_buffer, width, height);
1892 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
1893 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
1894 for(y=ys; y<b->height; y+= Q2_STEP){
1895 for(x=xs; x<b->width; x+= Q2_STEP){
1896 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
1897 if(score[score_idx] <= best_score[score_idx] + threshold){
1898 best_score[score_idx]= score[score_idx];
1899 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
1900 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
1901 //FIXME copy instead
1910 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
1913 #endif /* QUANTIZE2==1 */
1915 #define USE_HALFPEL_PLANE 0
1917 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
1920 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
1924 int w= s->avctx->width >>is_chroma;
1925 int h= s->avctx->height >>is_chroma;
1926 int ls= frame->linesize[p];
1927 uint8_t *src= frame->data[p];
1929 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1930 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1931 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
1938 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
1945 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1953 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
1961 static void release_buffer(AVCodecContext *avctx){
1962 SnowContext *s = avctx->priv_data;
1965 if(s->last_picture[s->max_ref_frames-1].data[0]){
1966 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
1968 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
1969 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
1973 static int frame_start(SnowContext *s){
1975 int w= s->avctx->width; //FIXME round up to x16 ?
1976 int h= s->avctx->height;
1978 if(s->current_picture.data[0]){
1979 s->dsp.draw_edges(s->current_picture.data[0],
1980 s->current_picture.linesize[0], w , h ,
1981 EDGE_WIDTH , EDGE_TOP | EDGE_BOTTOM);
1982 s->dsp.draw_edges(s->current_picture.data[1],
1983 s->current_picture.linesize[1], w>>1, h>>1,
1984 EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1985 s->dsp.draw_edges(s->current_picture.data[2],
1986 s->current_picture.linesize[2], w>>1, h>>1,
1987 EDGE_WIDTH/2, EDGE_TOP | EDGE_BOTTOM);
1990 release_buffer(s->avctx);
1992 tmp= s->last_picture[s->max_ref_frames-1];
1993 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
1994 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
1995 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
1996 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
1997 s->last_picture[0]= s->current_picture;
1998 s->current_picture= tmp;
2004 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2005 if(i && s->last_picture[i-1].key_frame)
2008 if(s->ref_frames==0){
2009 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2014 s->current_picture.reference= 1;
2015 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2016 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2020 s->current_picture.key_frame= s->keyframe;
2025 static av_cold void common_end(SnowContext *s){
2026 int plane_index, level, orientation, i;
2028 av_freep(&s->spatial_dwt_buffer);
2029 av_freep(&s->spatial_idwt_buffer);
2032 av_freep(&s->m.me.scratchpad);
2033 av_freep(&s->m.me.map);
2034 av_freep(&s->m.me.score_map);
2035 av_freep(&s->m.obmc_scratchpad);
2037 av_freep(&s->block);
2038 av_freep(&s->scratchbuf);
2040 for(i=0; i<MAX_REF_FRAMES; i++){
2041 av_freep(&s->ref_mvs[i]);
2042 av_freep(&s->ref_scores[i]);
2043 if(s->last_picture[i].data[0])
2044 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2047 for(plane_index=0; plane_index<3; plane_index++){
2048 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2049 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2050 SubBand *b= &s->plane[plane_index].band[level][orientation];
2052 av_freep(&b->x_coeff);
2056 if (s->mconly_picture.data[0])
2057 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2058 if (s->current_picture.data[0])
2059 s->avctx->release_buffer(s->avctx, &s->current_picture);
2062 static av_cold int decode_init(AVCodecContext *avctx)
2064 avctx->pix_fmt= PIX_FMT_YUV420P;
2071 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2072 const uint8_t *buf = avpkt->data;
2073 int buf_size = avpkt->size;
2074 SnowContext *s = avctx->priv_data;
2075 RangeCoder * const c= &s->c;
2077 AVFrame *picture = data;
2078 int level, orientation, plane_index;
2080 ff_init_range_decoder(c, buf, buf_size);
2081 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2083 s->current_picture.pict_type= AV_PICTURE_TYPE_I; //FIXME I vs. P
2084 if(decode_header(s)<0)
2086 common_init_after_header(avctx);
2088 // realloc slice buffer for the case that spatial_decomposition_count changed
2089 ff_slice_buffer_destroy(&s->sb);
2090 ff_slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2092 for(plane_index=0; plane_index<3; plane_index++){
2093 Plane *p= &s->plane[plane_index];
2094 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2095 && p->hcoeff[1]==-10
2101 if(frame_start(s) < 0)
2103 //keyframe flag duplication mess FIXME
2104 if(avctx->debug&FF_DEBUG_PICT_INFO)
2105 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2109 for(plane_index=0; plane_index<3; plane_index++){
2110 Plane *p= &s->plane[plane_index];
2114 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2116 if(s->avctx->debug&2048){
2117 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2118 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2122 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2123 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2129 for(level=0; level<s->spatial_decomposition_count; level++){
2130 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2131 SubBand *b= &p->band[level][orientation];
2132 unpack_coeffs(s, b, b->parent, orientation);
2138 const int mb_h= s->b_height << s->block_max_depth;
2139 const int block_size = MB_SIZE >> s->block_max_depth;
2140 const int block_w = plane_index ? block_size/2 : block_size;
2142 DWTCompose cs[MAX_DECOMPOSITIONS];
2147 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2148 for(mb_y=0; mb_y<=mb_h; mb_y++){
2150 int slice_starty = block_w*mb_y;
2151 int slice_h = block_w*(mb_y+1);
2152 if (!(s->keyframe || s->avctx->debug&512)){
2153 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2154 slice_h -= (block_w >> 1);
2157 for(level=0; level<s->spatial_decomposition_count; level++){
2158 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2159 SubBand *b= &p->band[level][orientation];
2162 int our_mb_start = mb_y;
2163 int our_mb_end = (mb_y + 1);
2165 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2166 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2167 if (!(s->keyframe || s->avctx->debug&512)){
2168 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2169 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2171 start_y = FFMIN(b->height, start_y);
2172 end_y = FFMIN(b->height, end_y);
2174 if (start_y != end_y){
2175 if (orientation == 0){
2176 SubBand * correlate_band = &p->band[0][0];
2177 int correlate_end_y = FFMIN(b->height, end_y + 1);
2178 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2179 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2180 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2181 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2184 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2189 for(; yd<slice_h; yd+=4){
2190 ff_spatial_idwt_buffered_slice(&s->dwt, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2193 if(s->qlog == LOSSLESS_QLOG){
2194 for(; yq<slice_h && yq<h; yq++){
2195 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2197 line[x] <<= FRAC_BITS;
2202 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2204 y = FFMIN(p->height, slice_starty);
2205 end_y = FFMIN(p->height, slice_h);
2207 ff_slice_buffer_release(&s->sb, y++);
2210 ff_slice_buffer_flush(&s->sb);
2217 release_buffer(avctx);
2219 if(!(s->avctx->debug&2048))
2220 *picture= s->current_picture;
2222 *picture= s->mconly_picture;
2224 *data_size = sizeof(AVFrame);
2226 bytes_read= c->bytestream - c->bytestream_start;
2227 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2232 static av_cold int decode_end(AVCodecContext *avctx)
2234 SnowContext *s = avctx->priv_data;
2236 ff_slice_buffer_destroy(&s->sb);
2243 AVCodec ff_snow_decoder = {
2247 sizeof(SnowContext),
2252 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2254 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2257 #if CONFIG_SNOW_ENCODER
2258 static av_cold int encode_init(AVCodecContext *avctx)
2260 SnowContext *s = avctx->priv_data;
2263 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2264 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2265 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2269 if(avctx->prediction_method == DWT_97
2270 && (avctx->flags & CODEC_FLAG_QSCALE)
2271 && avctx->global_quality == 0){
2272 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2276 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2278 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2279 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2281 for(plane_index=0; plane_index<3; plane_index++){
2282 s->plane[plane_index].diag_mc= 1;
2283 s->plane[plane_index].htaps= 6;
2284 s->plane[plane_index].hcoeff[0]= 40;
2285 s->plane[plane_index].hcoeff[1]= -10;
2286 s->plane[plane_index].hcoeff[2]= 2;
2287 s->plane[plane_index].fast_mc= 1;
2296 s->m.flags = avctx->flags;
2297 s->m.bit_rate= avctx->bit_rate;
2300 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2301 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2302 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2303 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2304 h263_encode_init(&s->m); //mv_penalty
2306 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2308 if(avctx->flags&CODEC_FLAG_PASS1){
2309 if(!avctx->stats_out)
2310 avctx->stats_out = av_mallocz(256);
2312 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2313 if(ff_rate_control_init(&s->m) < 0)
2316 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2318 avctx->coded_frame= &s->current_picture;
2319 switch(avctx->pix_fmt){
2320 // case PIX_FMT_YUV444P:
2321 // case PIX_FMT_YUV422P:
2322 case PIX_FMT_YUV420P:
2324 // case PIX_FMT_YUV411P:
2325 // case PIX_FMT_YUV410P:
2326 s->colorspace_type= 0;
2328 /* case PIX_FMT_RGB32:
2332 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2335 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2336 s->chroma_h_shift= 1;
2337 s->chroma_v_shift= 1;
2339 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
2340 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
2342 s->avctx->get_buffer(s->avctx, &s->input_picture);
2344 if(s->avctx->me_method == ME_ITER){
2346 int size= s->b_width * s->b_height << 2*s->block_max_depth;
2347 for(i=0; i<s->max_ref_frames; i++){
2348 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
2349 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
2356 //near copy & paste from dsputil, FIXME
2357 static int pix_sum(uint8_t * pix, int line_size, int w)
2362 for (i = 0; i < w; i++) {
2363 for (j = 0; j < w; j++) {
2367 pix += line_size - w;
2372 //near copy & paste from dsputil, FIXME
2373 static int pix_norm1(uint8_t * pix, int line_size, int w)
2376 uint32_t *sq = ff_squareTbl + 256;
2379 for (i = 0; i < w; i++) {
2380 for (j = 0; j < w; j ++) {
2384 pix += line_size - w;
2392 #define P_TOPRIGHT P[3]
2393 #define P_MEDIAN P[4]
2395 #define FLAG_QPEL 1 //must be 1
2397 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2398 uint8_t p_buffer[1024];
2399 uint8_t i_buffer[1024];
2400 uint8_t p_state[sizeof(s->block_state)];
2401 uint8_t i_state[sizeof(s->block_state)];
2403 uint8_t *pbbak= s->c.bytestream;
2404 uint8_t *pbbak_start= s->c.bytestream_start;
2405 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
2406 const int w= s->b_width << s->block_max_depth;
2407 const int h= s->b_height << s->block_max_depth;
2408 const int rem_depth= s->block_max_depth - level;
2409 const int index= (x + y*w) << rem_depth;
2410 const int block_w= 1<<(LOG2_MB_SIZE - level);
2411 int trx= (x+1)<<rem_depth;
2412 int try= (y+1)<<rem_depth;
2413 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2414 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2415 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2416 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2417 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2418 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2419 int pl = left->color[0];
2420 int pcb= left->color[1];
2421 int pcr= left->color[2];
2425 const int stride= s->current_picture.linesize[0];
2426 const int uvstride= s->current_picture.linesize[1];
2427 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2428 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2429 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2431 int16_t last_mv[3][2];
2432 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2433 const int shift= 1+qpel;
2434 MotionEstContext *c= &s->m.me;
2435 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2436 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2437 int my_context= av_log2(2*FFABS(left->my - top->my));
2438 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2439 int ref, best_ref, ref_score, ref_mx, ref_my;
2441 assert(sizeof(s->block_state) >= 256);
2443 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2447 // clip predictors / edge ?
2449 P_LEFT[0]= left->mx;
2450 P_LEFT[1]= left->my;
2453 P_TOPRIGHT[0]= tr->mx;
2454 P_TOPRIGHT[1]= tr->my;
2456 last_mv[0][0]= s->block[index].mx;
2457 last_mv[0][1]= s->block[index].my;
2458 last_mv[1][0]= right->mx;
2459 last_mv[1][1]= right->my;
2460 last_mv[2][0]= bottom->mx;
2461 last_mv[2][1]= bottom->my;
2468 assert(c-> stride == stride);
2469 assert(c->uvstride == uvstride);
2471 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2472 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2473 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2474 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2476 c->xmin = - x*block_w - 16+3;
2477 c->ymin = - y*block_w - 16+3;
2478 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2479 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
2481 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2482 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2483 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2484 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2485 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2486 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2487 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2489 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2490 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2493 c->pred_x= P_LEFT[0];
2494 c->pred_y= P_LEFT[1];
2496 c->pred_x = P_MEDIAN[0];
2497 c->pred_y = P_MEDIAN[1];
2502 for(ref=0; ref<s->ref_frames; ref++){
2503 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2505 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2506 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2508 assert(ref_mx >= c->xmin);
2509 assert(ref_mx <= c->xmax);
2510 assert(ref_my >= c->ymin);
2511 assert(ref_my <= c->ymax);
2513 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2514 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2515 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2516 if(s->ref_mvs[ref]){
2517 s->ref_mvs[ref][index][0]= ref_mx;
2518 s->ref_mvs[ref][index][1]= ref_my;
2519 s->ref_scores[ref][index]= ref_score;
2521 if(score > ref_score){
2528 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
2531 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
2533 pc.bytestream_start=
2534 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2535 memcpy(p_state, s->block_state, sizeof(s->block_state));
2537 if(level!=s->block_max_depth)
2538 put_rac(&pc, &p_state[4 + s_context], 1);
2539 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2540 if(s->ref_frames > 1)
2541 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2542 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2543 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2544 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2545 p_len= pc.bytestream - pc.bytestream_start;
2546 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
2548 block_s= block_w*block_w;
2549 sum = pix_sum(current_data[0], stride, block_w);
2550 l= (sum + block_s/2)/block_s;
2551 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2553 block_s= block_w*block_w>>2;
2554 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2555 cb= (sum + block_s/2)/block_s;
2556 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2557 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2558 cr= (sum + block_s/2)/block_s;
2559 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2562 ic.bytestream_start=
2563 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2564 memcpy(i_state, s->block_state, sizeof(s->block_state));
2565 if(level!=s->block_max_depth)
2566 put_rac(&ic, &i_state[4 + s_context], 1);
2567 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2568 put_symbol(&ic, &i_state[32], l-pl , 1);
2569 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2570 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2571 i_len= ic.bytestream - ic.bytestream_start;
2572 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
2574 // assert(score==256*256*256*64-1);
2575 assert(iscore < 255*255*256 + s->lambda2*10);
2576 assert(iscore >= 0);
2577 assert(l>=0 && l<=255);
2578 assert(pl>=0 && pl<=255);
2581 int varc= iscore >> 8;
2582 int vard= score >> 8;
2583 if (vard <= 64 || vard < varc)
2584 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2586 c->scene_change_score+= s->m.qscale;
2589 if(level!=s->block_max_depth){
2590 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2591 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2592 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2593 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2594 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2595 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2597 if(score2 < score && score2 < iscore)
2602 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2603 memcpy(pbbak, i_buffer, i_len);
2605 s->c.bytestream_start= pbbak_start;
2606 s->c.bytestream= pbbak + i_len;
2607 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2608 memcpy(s->block_state, i_state, sizeof(s->block_state));
2611 memcpy(pbbak, p_buffer, p_len);
2613 s->c.bytestream_start= pbbak_start;
2614 s->c.bytestream= pbbak + p_len;
2615 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2616 memcpy(s->block_state, p_state, sizeof(s->block_state));
2621 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2622 const int w= s->b_width << s->block_max_depth;
2623 const int rem_depth= s->block_max_depth - level;
2624 const int index= (x + y*w) << rem_depth;
2625 int trx= (x+1)<<rem_depth;
2626 BlockNode *b= &s->block[index];
2627 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2628 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2629 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2630 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2631 int pl = left->color[0];
2632 int pcb= left->color[1];
2633 int pcr= left->color[2];
2635 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2636 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2637 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2638 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2641 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2645 if(level!=s->block_max_depth){
2646 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2647 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2649 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2650 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2651 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2652 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2653 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2657 if(b->type & BLOCK_INTRA){
2658 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2659 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2660 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2661 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2662 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2663 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2665 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2666 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2667 if(s->ref_frames > 1)
2668 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2669 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2670 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2671 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2675 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2677 Plane *p= &s->plane[plane_index];
2678 const int block_size = MB_SIZE >> s->block_max_depth;
2679 const int block_w = plane_index ? block_size/2 : block_size;
2680 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2681 const int obmc_stride= plane_index ? block_size : 2*block_size;
2682 const int ref_stride= s->current_picture.linesize[plane_index];
2683 uint8_t *src= s-> input_picture.data[plane_index];
2684 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2685 const int b_stride = s->b_width << s->block_max_depth;
2686 const int w= p->width;
2687 const int h= p->height;
2688 int index= mb_x + mb_y*b_stride;
2689 BlockNode *b= &s->block[index];
2690 BlockNode backup= *b;
2694 b->type|= BLOCK_INTRA;
2695 b->color[plane_index]= 0;
2696 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2699 int mb_x2= mb_x + (i &1) - 1;
2700 int mb_y2= mb_y + (i>>1) - 1;
2701 int x= block_w*mb_x2 + block_w/2;
2702 int y= block_w*mb_y2 + block_w/2;
2704 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2705 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2707 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2708 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2709 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2710 int obmc_v= obmc[index];
2712 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2713 if(x<0) obmc_v += obmc[index + block_w];
2714 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2715 if(x+block_w>w) obmc_v += obmc[index - block_w];
2716 //FIXME precalculate this or simplify it somehow else
2718 d = -dst[index] + (1<<(FRAC_BITS-1));
2720 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2721 aa += obmc_v * obmc_v; //FIXME precalculate this
2727 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2730 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2731 const int b_stride = s->b_width << s->block_max_depth;
2732 const int b_height = s->b_height<< s->block_max_depth;
2733 int index= x + y*b_stride;
2734 const BlockNode *b = &s->block[index];
2735 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2736 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2737 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2738 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2740 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2741 // int my_context= av_log2(2*FFABS(left->my - top->my));
2743 if(x<0 || x>=b_stride || y>=b_height)
2750 00001XXXX 15-30 8-15
2752 //FIXME try accurate rate
2753 //FIXME intra and inter predictors if surrounding blocks are not the same type
2754 if(b->type & BLOCK_INTRA){
2755 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2756 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2757 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2759 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2762 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2763 + av_log2(2*FFABS(dmy))
2764 + av_log2(2*b->ref));
2768 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2769 Plane *p= &s->plane[plane_index];
2770 const int block_size = MB_SIZE >> s->block_max_depth;
2771 const int block_w = plane_index ? block_size/2 : block_size;
2772 const int obmc_stride= plane_index ? block_size : 2*block_size;
2773 const int ref_stride= s->current_picture.linesize[plane_index];
2774 uint8_t *dst= s->current_picture.data[plane_index];
2775 uint8_t *src= s-> input_picture.data[plane_index];
2776 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2777 uint8_t *cur = s->scratchbuf;
2778 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2779 const int b_stride = s->b_width << s->block_max_depth;
2780 const int b_height = s->b_height<< s->block_max_depth;
2781 const int w= p->width;
2782 const int h= p->height;
2785 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2786 int sx= block_w*mb_x - block_w/2;
2787 int sy= block_w*mb_y - block_w/2;
2788 int x0= FFMAX(0,-sx);
2789 int y0= FFMAX(0,-sy);
2790 int x1= FFMIN(block_w*2, w-sx);
2791 int y1= FFMIN(block_w*2, h-sy);
2794 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2796 for(y=y0; y<y1; y++){
2797 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2798 const IDWTELEM *pred1 = pred + y*obmc_stride;
2799 uint8_t *cur1 = cur + y*ref_stride;
2800 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2801 for(x=x0; x<x1; x++){
2802 #if FRAC_BITS >= LOG2_OBMC_MAX
2803 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2805 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2807 v = (v + pred1[x]) >> FRAC_BITS;
2808 if(v&(~255)) v= ~(v>>31);
2813 /* copy the regions where obmc[] = (uint8_t)256 */
2814 if(LOG2_OBMC_MAX == 8
2815 && (mb_x == 0 || mb_x == b_stride-1)
2816 && (mb_y == 0 || mb_y == b_height-1)){
2825 for(y=y0; y<y1; y++)
2826 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2830 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2831 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2832 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2833 * So improving the score of one block is not strictly guaranteed
2834 * to improve the score of the whole frame, thus iterative motion
2835 * estimation does not always converge. */
2836 if(s->avctx->me_cmp == FF_CMP_W97)
2837 distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2838 else if(s->avctx->me_cmp == FF_CMP_W53)
2839 distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2843 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2844 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2849 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2858 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2860 if(mb_x == b_stride-2)
2861 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2863 return distortion + rate*penalty_factor;
2866 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2868 Plane *p= &s->plane[plane_index];
2869 const int block_size = MB_SIZE >> s->block_max_depth;
2870 const int block_w = plane_index ? block_size/2 : block_size;
2871 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2872 const int obmc_stride= plane_index ? block_size : 2*block_size;
2873 const int ref_stride= s->current_picture.linesize[plane_index];
2874 uint8_t *dst= s->current_picture.data[plane_index];
2875 uint8_t *src= s-> input_picture.data[plane_index];
2876 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2877 // const has only been removed from zero_dst to suppress a warning
2878 static IDWTELEM zero_dst[4096]; //FIXME
2879 const int b_stride = s->b_width << s->block_max_depth;
2880 const int w= p->width;
2881 const int h= p->height;
2884 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2887 int mb_x2= mb_x + (i%3) - 1;
2888 int mb_y2= mb_y + (i/3) - 1;
2889 int x= block_w*mb_x2 + block_w/2;
2890 int y= block_w*mb_y2 + block_w/2;
2892 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2893 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2895 //FIXME find a cleaner/simpler way to skip the outside stuff
2896 for(y2= y; y2<0; y2++)
2897 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2898 for(y2= h; y2<y+block_w; y2++)
2899 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2901 for(y2= y; y2<y+block_w; y2++)
2902 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2905 for(y2= y; y2<y+block_w; y2++)
2906 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2909 assert(block_w== 8 || block_w==16);
2910 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2914 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2915 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2923 rate = get_block_bits(s, mb_x, mb_y, 2);
2924 for(i=merged?4:0; i<9; i++){
2925 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2926 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2929 return distortion + rate*penalty_factor;
2932 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
2933 const int w= b->width;
2934 const int h= b->height;
2946 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2947 v= src[x + y*stride];
2950 t= src[x + (y-1)*stride];
2952 lt= src[x - 1 + (y-1)*stride];
2955 rt= src[x + 1 + (y-1)*stride];
2959 l= src[x - 1 + y*stride];
2961 if(orientation==1) ll= src[y + (x-2)*stride];
2962 else ll= src[x - 2 + y*stride];
2968 if(px<b->parent->width && py<b->parent->height)
2969 p= parent[px + py*2*stride];
2971 if(!(/*ll|*/l|lt|t|rt|p)){
2973 runs[run_index++]= run;
2981 max_index= run_index;
2982 runs[run_index++]= run;
2984 run= runs[run_index++];
2986 put_symbol2(&s->c, b->state[30], max_index, 0);
2987 if(run_index <= max_index)
2988 put_symbol2(&s->c, b->state[1], run, 3);
2991 if(s->c.bytestream_end - s->c.bytestream < w*40){
2992 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2997 int /*ll=0, */l=0, lt=0, t=0, rt=0;
2998 v= src[x + y*stride];
3001 t= src[x + (y-1)*stride];
3003 lt= src[x - 1 + (y-1)*stride];
3006 rt= src[x + 1 + (y-1)*stride];
3010 l= src[x - 1 + y*stride];
3012 if(orientation==1) ll= src[y + (x-2)*stride];
3013 else ll= src[x - 2 + y*stride];
3019 if(px<b->parent->width && py<b->parent->height)
3020 p= parent[px + py*2*stride];
3022 if(/*ll|*/l|lt|t|rt|p){
3023 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3025 put_rac(&s->c, &b->state[0][context], !!v);
3028 run= runs[run_index++];
3030 if(run_index <= max_index)
3031 put_symbol2(&s->c, b->state[1], run, 3);
3039 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3040 int l2= 2*FFABS(l) + (l<0);
3041 int t2= 2*FFABS(t) + (t<0);
3043 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3044 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3052 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3053 // encode_subband_qtree(s, b, src, parent, stride, orientation);
3054 // encode_subband_z0run(s, b, src, parent, stride, orientation);
3055 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3056 // encode_subband_dzr(s, b, src, parent, stride, orientation);
3059 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3060 const int b_stride= s->b_width << s->block_max_depth;
3061 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3062 BlockNode backup= *block;
3063 int rd, index, value;
3065 assert(mb_x>=0 && mb_y>=0);
3066 assert(mb_x<b_stride);
3069 block->color[0] = p[0];
3070 block->color[1] = p[1];
3071 block->color[2] = p[2];
3072 block->type |= BLOCK_INTRA;
3074 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3075 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3076 if(s->me_cache[index] == value)
3078 s->me_cache[index]= value;
3082 block->type &= ~BLOCK_INTRA;
3085 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3097 /* special case for int[2] args we discard afterwards,
3098 * fixes compilation problem with gcc 2.95 */
3099 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3100 int p[2] = {p0, p1};
3101 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3104 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3105 const int b_stride= s->b_width << s->block_max_depth;
3106 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3107 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3108 int rd, index, value;
3110 assert(mb_x>=0 && mb_y>=0);
3111 assert(mb_x<b_stride);
3112 assert(((mb_x|mb_y)&1) == 0);
3114 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3115 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3116 if(s->me_cache[index] == value)
3118 s->me_cache[index]= value;
3123 block->type &= ~BLOCK_INTRA;
3124 block[1]= block[b_stride]= block[b_stride+1]= *block;
3126 rd= get_4block_rd(s, mb_x, mb_y, 0);
3133 block[0]= backup[0];
3134 block[1]= backup[1];
3135 block[b_stride]= backup[2];
3136 block[b_stride+1]= backup[3];
3141 static void iterative_me(SnowContext *s){
3142 int pass, mb_x, mb_y;
3143 const int b_width = s->b_width << s->block_max_depth;
3144 const int b_height= s->b_height << s->block_max_depth;
3145 const int b_stride= b_width;
3149 RangeCoder r = s->c;
3150 uint8_t state[sizeof(s->block_state)];
3151 memcpy(state, s->block_state, sizeof(s->block_state));
3152 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3153 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3154 encode_q_branch(s, 0, mb_x, mb_y);
3156 memcpy(s->block_state, state, sizeof(s->block_state));
3159 for(pass=0; pass<25; pass++){
3162 for(mb_y= 0; mb_y<b_height; mb_y++){
3163 for(mb_x= 0; mb_x<b_width; mb_x++){
3164 int dia_change, i, j, ref;
3165 int best_rd= INT_MAX, ref_rd;
3166 BlockNode backup, ref_b;
3167 const int index= mb_x + mb_y * b_stride;
3168 BlockNode *block= &s->block[index];
3169 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3170 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3171 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3172 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3173 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3174 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3175 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3176 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3177 const int b_w= (MB_SIZE >> s->block_max_depth);
3178 uint8_t obmc_edged[b_w*2][b_w*2];
3180 if(pass && (block->type & BLOCK_OPT))
3182 block->type |= BLOCK_OPT;
3186 if(!s->me_cache_generation)
3187 memset(s->me_cache, 0, sizeof(s->me_cache));
3188 s->me_cache_generation += 1<<22;
3190 //FIXME precalculate
3193 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3195 for(y=0; y<b_w*2; y++)
3196 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3197 if(mb_x==b_stride-1)
3198 for(y=0; y<b_w*2; y++)
3199 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3201 for(x=0; x<b_w*2; x++)
3202 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3203 for(y=1; y<b_w; y++)
3204 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3206 if(mb_y==b_height-1){
3207 for(x=0; x<b_w*2; x++)
3208 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3209 for(y=b_w; y<b_w*2-1; y++)
3210 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3214 //skip stuff outside the picture
3215 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3216 uint8_t *src= s-> input_picture.data[0];
3217 uint8_t *dst= s->current_picture.data[0];
3218 const int stride= s->current_picture.linesize[0];
3219 const int block_w= MB_SIZE >> s->block_max_depth;
3220 const int sx= block_w*mb_x - block_w/2;
3221 const int sy= block_w*mb_y - block_w/2;
3222 const int w= s->plane[0].width;
3223 const int h= s->plane[0].height;
3227 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3228 for(y=h; y<sy+block_w*2; y++)
3229 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3231 for(y=sy; y<sy+block_w*2; y++)
3232 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3234 if(sx+block_w*2 > w){
3235 for(y=sy; y<sy+block_w*2; y++)
3236 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3240 // intra(black) = neighbors' contribution to the current block
3242 color[i]= get_dc(s, mb_x, mb_y, i);
3244 // get previous score (cannot be cached due to OBMC)
3245 if(pass > 0 && (block->type&BLOCK_INTRA)){
3246 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3247 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3249 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3253 for(ref=0; ref < s->ref_frames; ref++){
3254 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3255 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3260 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3261 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3263 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3265 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3267 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3269 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3272 //FIXME avoid subpel interpolation / round to nearest integer
3275 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3277 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3278 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3279 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3280 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3286 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3289 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3291 //FIXME or try the standard 2 pass qpel or similar
3293 mvr[0][0]= block->mx;
3294 mvr[0][1]= block->my;
3295 if(ref_rd > best_rd){
3302 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3303 //FIXME RD style color selection
3304 if(!same_block(block, &backup)){
3305 if(tb ) tb ->type &= ~BLOCK_OPT;
3306 if(lb ) lb ->type &= ~BLOCK_OPT;
3307 if(rb ) rb ->type &= ~BLOCK_OPT;
3308 if(bb ) bb ->type &= ~BLOCK_OPT;
3309 if(tlb) tlb->type &= ~BLOCK_OPT;
3310 if(trb) trb->type &= ~BLOCK_OPT;
3311 if(blb) blb->type &= ~BLOCK_OPT;
3312 if(brb) brb->type &= ~BLOCK_OPT;
3317 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3322 if(s->block_max_depth == 1){
3324 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3325 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3327 int best_rd, init_rd;
3328 const int index= mb_x + mb_y * b_stride;
3331 b[0]= &s->block[index];
3333 b[2]= b[0]+b_stride;
3335 if(same_block(b[0], b[1]) &&
3336 same_block(b[0], b[2]) &&
3337 same_block(b[0], b[3]))
3340 if(!s->me_cache_generation)
3341 memset(s->me_cache, 0, sizeof(s->me_cache));
3342 s->me_cache_generation += 1<<22;
3344 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3346 //FIXME more multiref search?
3347 check_4block_inter(s, mb_x, mb_y,
3348 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3349 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3352 if(!(b[i]->type&BLOCK_INTRA))
3353 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3355 if(init_rd != best_rd)
3359 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3363 static void encode_blocks(SnowContext *s, int search){
3368 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
3372 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
3373 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3377 if(s->avctx->me_method == ME_ITER || !search)
3378 encode_q_branch2(s, 0, x, y);
3380 encode_q_branch (s, 0, x, y);
3385 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3386 const int w= b->width;
3387 const int h= b->height;
3388 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3389 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3390 int x,y, thres1, thres2;
3392 if(s->qlog == LOSSLESS_QLOG){
3395 dst[x + y*stride]= src[x + y*stride];
3399 bias= bias ? 0 : (3*qmul)>>3;
3400 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3406 int i= src[x + y*stride];
3408 if((unsigned)(i+thres1) > thres2){
3411 i/= qmul; //FIXME optimize
3412 dst[x + y*stride]= i;
3416 i/= qmul; //FIXME optimize
3417 dst[x + y*stride]= -i;
3420 dst[x + y*stride]= 0;
3426 int i= src[x + y*stride];
3428 if((unsigned)(i+thres1) > thres2){
3431 i= (i + bias) / qmul; //FIXME optimize
3432 dst[x + y*stride]= i;
3436 i= (i + bias) / qmul; //FIXME optimize
3437 dst[x + y*stride]= -i;
3440 dst[x + y*stride]= 0;
3446 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3447 const int w= b->width;
3448 const int h= b->height;
3449 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3450 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3451 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3454 if(s->qlog == LOSSLESS_QLOG) return;
3458 int i= src[x + y*stride];
3460 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3462 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3468 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3469 const int w= b->width;
3470 const int h= b->height;
3473 for(y=h-1; y>=0; y--){
3474 for(x=w-1; x>=0; x--){
3475 int i= x + y*stride;
3479 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3480 else src[i] -= src[i - 1];
3482 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3483 else src[i] -= src[i - 1];
3486 if(y) src[i] -= src[i - stride];
3492 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3493 const int w= b->width;
3494 const int h= b->height;
3499 int i= x + y*stride;
3503 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3504 else src[i] += src[i - 1];
3506 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3507 else src[i] += src[i - 1];
3510 if(y) src[i] += src[i - stride];
3516 static void encode_qlogs(SnowContext *s){
3517 int plane_index, level, orientation;
3519 for(plane_index=0; plane_index<2; plane_index++){
3520 for(level=0; level<s->spatial_decomposition_count; level++){
3521 for(orientation=level ? 1:0; orientation<4; orientation++){
3522 if(orientation==2) continue;
3523 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3529 static void encode_header(SnowContext *s){
3533 memset(kstate, MID_STATE, sizeof(kstate));
3535 put_rac(&s->c, kstate, s->keyframe);
3536 if(s->keyframe || s->always_reset){
3538 s->last_spatial_decomposition_type=
3542 s->last_block_max_depth= 0;
3543 for(plane_index=0; plane_index<2; plane_index++){
3544 Plane *p= &s->plane[plane_index];
3547 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3551 put_symbol(&s->c, s->header_state, s->version, 0);
3552 put_rac(&s->c, s->header_state, s->always_reset);
3553 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3554 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3555 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3556 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3557 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3558 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3559 put_rac(&s->c, s->header_state, s->spatial_scalability);
3560 // put_rac(&s->c, s->header_state, s->rate_scalability);
3561 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3568 for(plane_index=0; plane_index<2; plane_index++){
3569 Plane *p= &s->plane[plane_index];
3570 update_mc |= p->last_htaps != p->htaps;
3571 update_mc |= p->last_diag_mc != p->diag_mc;
3572 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3574 put_rac(&s->c, s->header_state, update_mc);
3576 for(plane_index=0; plane_index<2; plane_index++){
3577 Plane *p= &s->plane[plane_index];
3578 put_rac(&s->c, s->header_state, p->diag_mc);
3579 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3580 for(i= p->htaps/2; i; i--)
3581 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3584 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3585 put_rac(&s->c, s->header_state, 1);
3586 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3589 put_rac(&s->c, s->header_state, 0);
3592 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3593 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3594 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3595 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3596 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3600 static void update_last_header_values(SnowContext *s){
3604 for(plane_index=0; plane_index<2; plane_index++){
3605 Plane *p= &s->plane[plane_index];
3606 p->last_diag_mc= p->diag_mc;
3607 p->last_htaps = p->htaps;
3608 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3612 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3613 s->last_qlog = s->qlog;
3614 s->last_qbias = s->qbias;
3615 s->last_mv_scale = s->mv_scale;
3616 s->last_block_max_depth = s->block_max_depth;
3617 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3620 static int qscale2qlog(int qscale){
3621 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3622 + 61*QROOT/8; //<64 >60
3625 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3627 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3628 * FIXME we know exact mv bits at this point,
3629 * but ratecontrol isn't set up to include them. */
3630 uint32_t coef_sum= 0;
3631 int level, orientation, delta_qlog;
3633 for(level=0; level<s->spatial_decomposition_count; level++){
3634 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3635 SubBand *b= &s->plane[0].band[level][orientation];
3636 IDWTELEM *buf= b->ibuf;
3637 const int w= b->width;
3638 const int h= b->height;
3639 const int stride= b->stride;
3640 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3641 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3642 const int qdiv= (1<<16)/qmul;
3644 //FIXME this is ugly
3647 buf[x+y*stride]= b->buf[x+y*stride];
3649 decorrelate(s, b, buf, stride, 1, 0);
3652 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3656 /* ugly, ratecontrol just takes a sqrt again */
3657 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3658 assert(coef_sum < INT_MAX);
3660 if(pict->pict_type == AV_PICTURE_TYPE_I){
3661 s->m.current_picture.mb_var_sum= coef_sum;
3662 s->m.current_picture.mc_mb_var_sum= 0;
3664 s->m.current_picture.mc_mb_var_sum= coef_sum;
3665 s->m.current_picture.mb_var_sum= 0;
3668 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3669 if (pict->quality < 0)
3671 s->lambda= pict->quality * 3/2;
3672 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3673 s->qlog+= delta_qlog;
3677 static void calculate_visual_weight(SnowContext *s, Plane *p){
3678 int width = p->width;
3679 int height= p->height;
3680 int level, orientation, x, y;
3682 for(level=0; level<s->spatial_decomposition_count; level++){
3683 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3684 SubBand *b= &p->band[level][orientation];
3685 IDWTELEM *ibuf= b->ibuf;
3688 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3689 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3690 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3691 for(y=0; y<height; y++){
3692 for(x=0; x<width; x++){
3693 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3698 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3703 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3704 SnowContext *s = avctx->priv_data;
3705 RangeCoder * const c= &s->c;
3706 AVFrame *pict = data;
3707 const int width= s->avctx->width;
3708 const int height= s->avctx->height;
3709 int level, orientation, plane_index, i, y;
3710 uint8_t rc_header_bak[sizeof(s->header_state)];
3711 uint8_t rc_block_bak[sizeof(s->block_state)];
3713 ff_init_range_encoder(c, buf, buf_size);
3714 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3718 for(y=0; y<(height>>shift); y++)
3719 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3720 &pict->data[i][y * pict->linesize[i]],
3723 s->new_picture = *pict;
3725 s->m.picture_number= avctx->frame_number;
3726 if(avctx->flags&CODEC_FLAG_PASS2){
3728 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3729 s->keyframe= pict->pict_type==AV_PICTURE_TYPE_I;
3730 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3731 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3732 if (pict->quality < 0)
3736 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3738 pict->pict_type= s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
3741 if(s->pass1_rc && avctx->frame_number == 0)
3742 pict->quality= 2*FF_QP2LAMBDA;
3744 s->qlog= qscale2qlog(pict->quality);
3745 s->lambda = pict->quality * 3/2;
3747 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3748 s->qlog= LOSSLESS_QLOG;
3750 }//else keep previous frame's qlog until after motion estimation
3754 s->m.current_picture_ptr= &s->m.current_picture;
3755 s->m.last_picture.pts= s->m.current_picture.pts;
3756 s->m.current_picture.pts= pict->pts;
3757 if(pict->pict_type == AV_PICTURE_TYPE_P){
3758 int block_width = (width +15)>>4;
3759 int block_height= (height+15)>>4;
3760 int stride= s->current_picture.linesize[0];
3762 assert(s->current_picture.data[0]);
3763 assert(s->last_picture[0].data[0]);
3765 s->m.avctx= s->avctx;
3766 s->m.current_picture.data[0]= s->current_picture.data[0];
3767 s->m. last_picture.data[0]= s->last_picture[0].data[0];
3768 s->m. new_picture.data[0]= s-> input_picture.data[0];
3769 s->m. last_picture_ptr= &s->m. last_picture;
3771 s->m. last_picture.linesize[0]=
3772 s->m. new_picture.linesize[0]=
3773 s->m.current_picture.linesize[0]= stride;
3774 s->m.uvlinesize= s->current_picture.linesize[1];
3776 s->m.height= height;
3777 s->m.mb_width = block_width;
3778 s->m.mb_height= block_height;
3779 s->m.mb_stride= s->m.mb_width+1;
3780 s->m.b8_stride= 2*s->m.mb_width+1;
3782 s->m.pict_type= pict->pict_type;
3783 s->m.me_method= s->avctx->me_method;
3784 s->m.me.scene_change_score=0;
3785 s->m.flags= s->avctx->flags;
3786 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3787 s->m.out_format= FMT_H263;
3788 s->m.unrestricted_mv= 1;
3790 s->m.lambda = s->lambda;
3791 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3792 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3794 s->m.dsp= s->dsp; //move
3800 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
3801 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
3806 if(pict->pict_type == AV_PICTURE_TYPE_I)
3807 s->spatial_decomposition_count= 5;
3809 s->spatial_decomposition_count= 5;
3811 s->m.pict_type = pict->pict_type;
3812 s->qbias= pict->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
3814 common_init_after_header(avctx);
3816 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3817 for(plane_index=0; plane_index<3; plane_index++){
3818 calculate_visual_weight(s, &s->plane[plane_index]);
3823 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3824 encode_blocks(s, 1);
3825 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3827 for(plane_index=0; plane_index<3; plane_index++){
3828 Plane *p= &s->plane[plane_index];
3832 // int bits= put_bits_count(&s->c.pb);
3834 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
3836 if(pict->data[plane_index]) //FIXME gray hack
3839 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3842 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
3845 && pict->pict_type == AV_PICTURE_TYPE_P
3846 && !(avctx->flags&CODEC_FLAG_PASS2)
3847 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3848 ff_init_range_encoder(c, buf, buf_size);
3849 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3850 pict->pict_type= AV_PICTURE_TYPE_I;
3852 s->current_picture.key_frame=1;
3856 if(s->qlog == LOSSLESS_QLOG){
3859 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3865 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
3871 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
3873 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3875 if(s->pass1_rc && plane_index==0){
3876 int delta_qlog = ratecontrol_1pass(s, pict);
3877 if (delta_qlog <= INT_MIN)
3880 //reordering qlog in the bitstream would eliminate this reset
3881 ff_init_range_encoder(c, buf, buf_size);
3882 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
3883 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
3885 encode_blocks(s, 0);
3889 for(level=0; level<s->spatial_decomposition_count; level++){
3890 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3891 SubBand *b= &p->band[level][orientation];
3894 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
3896 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == AV_PICTURE_TYPE_P, 0);
3897 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
3898 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3900 correlate(s, b, b->ibuf, b->stride, 1, 0);
3904 for(level=0; level<s->spatial_decomposition_count; level++){
3905 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3906 SubBand *b= &p->band[level][orientation];
3908 dequantize(s, b, b->ibuf, b->stride);
3912 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3913 if(s->qlog == LOSSLESS_QLOG){
3916 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
3920 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3923 if(pict->pict_type == AV_PICTURE_TYPE_I){
3926 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
3927 pict->data[plane_index][y*pict->linesize[plane_index] + x];
3931 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
3932 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
3935 if(s->avctx->flags&CODEC_FLAG_PSNR){
3938 if(pict->data[plane_index]) //FIXME gray hack
3941 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3945 s->avctx->error[plane_index] += error;
3946 s->current_picture.error[plane_index] = error;
3951 update_last_header_values(s);
3953 release_buffer(avctx);
3955 s->current_picture.coded_picture_number = avctx->frame_number;
3956 s->current_picture.pict_type = pict->pict_type;
3957 s->current_picture.quality = pict->quality;
3958 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3959 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
3960 s->m.current_picture.display_picture_number =
3961 s->m.current_picture.coded_picture_number = avctx->frame_number;
3962 s->m.current_picture.quality = pict->quality;
3963 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3965 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
3967 if(avctx->flags&CODEC_FLAG_PASS1)
3968 ff_write_pass1_stats(&s->m);
3969 s->m.last_pict_type = s->m.pict_type;
3970 avctx->frame_bits = s->m.frame_bits;
3971 avctx->mv_bits = s->m.mv_bits;
3972 avctx->misc_bits = s->m.misc_bits;
3973 avctx->p_tex_bits = s->m.p_tex_bits;
3977 return ff_rac_terminate(c);
3980 static av_cold int encode_end(AVCodecContext *avctx)
3982 SnowContext *s = avctx->priv_data;
3985 if (s->input_picture.data[0])
3986 avctx->release_buffer(avctx, &s->input_picture);
3987 av_free(avctx->stats_out);
3992 AVCodec ff_snow_encoder = {
3996 sizeof(SnowContext),
4000 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4010 #include "libavutil/lfg.h"
4015 int buffer[2][width*height];
4019 s.spatial_decomposition_count=6;
4020 s.spatial_decomposition_type=1;
4022 av_lfg_init(&prng, 1);
4024 printf("testing 5/3 DWT\n");
4025 for(i=0; i<width*height; i++)
4026 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4028 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4029 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4031 for(i=0; i<width*height; i++)
4032 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4034 printf("testing 9/7 DWT\n");
4035 s.spatial_decomposition_type=0;
4036 for(i=0; i<width*height; i++)
4037 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4039 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4040 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4042 for(i=0; i<width*height; i++)
4043 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4046 printf("testing AC coder\n");
4047 memset(s.header_state, 0, sizeof(s.header_state));
4048 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4049 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4051 for(i=-256; i<256; i++){
4052 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4054 ff_rac_terminate(&s.c);
4056 memset(s.header_state, 0, sizeof(s.header_state));
4057 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4058 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4060 for(i=-256; i<256; i++){
4062 j= get_symbol(&s.c, s.header_state, 1);
4063 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4067 int level, orientation, x, y;
4068 int64_t errors[8][4];
4071 memset(errors, 0, sizeof(errors));
4072 s.spatial_decomposition_count=3;
4073 s.spatial_decomposition_type=0;
4074 for(level=0; level<s.spatial_decomposition_count; level++){
4075 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4076 int w= width >> (s.spatial_decomposition_count-level);
4077 int h= height >> (s.spatial_decomposition_count-level);
4078 int stride= width << (s.spatial_decomposition_count-level);
4079 DWTELEM *buf= buffer[0];
4082 if(orientation&1) buf+=w;
4083 if(orientation>1) buf+=stride>>1;
4085 memset(buffer[0], 0, sizeof(int)*width*height);
4086 buf[w/2 + h/2*stride]= 256*256;
4087 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4088 for(y=0; y<height; y++){
4089 for(x=0; x<width; x++){
4090 int64_t d= buffer[0][x + y*width];
4092 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4094 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4096 error= (int)(sqrt(error)+0.5);
4097 errors[level][orientation]= error;
4098 if(g) g=av_gcd(g, error);
4102 printf("static int const visual_weight[][4]={\n");
4103 for(level=0; level<s.spatial_decomposition_count; level++){
4105 for(orientation=0; orientation<4; orientation++){
4106 printf("%8"PRId64",", errors[level][orientation]/g);
4113 int w= width >> (s.spatial_decomposition_count-level);
4114 //int h= height >> (s.spatial_decomposition_count-level);
4115 int stride= width << (s.spatial_decomposition_count-level);
4116 DWTELEM *buf= buffer[0];
4122 memset(buffer[0], 0, sizeof(int)*width*height);
4124 for(y=0; y<height; y++){
4125 for(x=0; x<width; x++){
4126 int tab[4]={0,2,3,1};
4127 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4130 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4134 buf[x + y*stride ]=169;
4135 buf[x + y*stride-w]=64;
4138 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4140 for(y=0; y<height; y++){
4141 for(x=0; x<width; x++){
4142 int64_t d= buffer[0][x + y*width];
4144 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4146 if(FFABS(height/2-y)<9) printf("\n");