2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t * const obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
468 int spatial_scalability;
478 #define QBIAS_SHIFT 3
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
503 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
504 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506 static void iterative_me(SnowContext *s);
508 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
512 buf->base_buffer = base_buffer;
513 buf->line_count = line_count;
514 buf->line_width = line_width;
515 buf->data_count = max_allocated_lines;
516 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
517 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
519 for(i = 0; i < max_allocated_lines; i++){
520 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
523 buf->data_stack_top = max_allocated_lines - 1;
526 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
534 return buf->line[line];
536 offset = buf->line_width * line;
537 buffer = buf->data_stack[buf->data_stack_top];
538 buf->data_stack_top--;
539 buf->line[line] = buffer;
544 static void slice_buffer_release(slice_buffer * buf, int line)
549 assert(line >= 0 && line < buf->line_count);
550 assert(buf->line[line]);
552 offset = buf->line_width * line;
553 buffer = buf->line[line];
554 buf->data_stack_top++;
555 buf->data_stack[buf->data_stack_top] = buffer;
556 buf->line[line] = NULL;
559 static void slice_buffer_flush(slice_buffer * buf)
562 for(i = 0; i < buf->line_count; i++){
564 slice_buffer_release(buf, i);
568 static void slice_buffer_destroy(slice_buffer * buf)
571 slice_buffer_flush(buf);
573 for(i = buf->data_count - 1; i >= 0; i--){
574 av_freep(&buf->data_stack[i]);
576 av_freep(&buf->data_stack);
577 av_freep(&buf->line);
581 // Avoid a name clash on SGI IRIX
584 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
585 static uint8_t qexp[QROOT];
587 static inline int mirror(int v, int m){
588 while((unsigned)v > (unsigned)m){
595 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
599 const int a= FFABS(v);
600 const int e= av_log2(a);
602 const int el= FFMIN(e, 10);
603 put_rac(c, state+0, 0);
606 put_rac(c, state+1+i, 1); //1..10
609 put_rac(c, state+1+9, 1); //1..10
611 put_rac(c, state+1+FFMIN(i,9), 0);
613 for(i=e-1; i>=el; i--){
614 put_rac(c, state+22+9, (a>>i)&1); //22..31
617 put_rac(c, state+22+i, (a>>i)&1); //22..31
621 put_rac(c, state+11 + el, v < 0); //11..21
624 put_rac(c, state+0, 0);
627 put_rac(c, state+1+i, 1); //1..10
629 put_rac(c, state+1+i, 0);
631 for(i=e-1; i>=0; i--){
632 put_rac(c, state+22+i, (a>>i)&1); //22..31
636 put_rac(c, state+11 + e, v < 0); //11..21
639 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
641 put_rac(c, state+1+FFMIN(i,9), 0);
643 for(i=e-1; i>=0; i--){
644 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
648 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
652 put_rac(c, state+0, 1);
656 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
657 if(get_rac(c, state+0))
662 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
667 for(i=e-1; i>=0; i--){
668 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
671 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
678 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
680 int r= log2>=0 ? 1<<log2 : 1;
686 put_rac(c, state+4+log2, 1);
691 put_rac(c, state+4+log2, 0);
693 for(i=log2-1; i>=0; i--){
694 put_rac(c, state+31-i, (v>>i)&1);
698 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
700 int r= log2>=0 ? 1<<log2 : 1;
705 while(get_rac(c, state+4+log2)){
711 for(i=log2-1; i>=0; i--){
712 v+= get_rac(c, state+31-i)<<i;
718 static av_always_inline void
719 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
720 int dst_step, int src_step, int ref_step,
721 int width, int mul, int add, int shift,
722 int highpass, int inverse){
723 const int mirror_left= !highpass;
724 const int mirror_right= (width&1) ^ highpass;
725 const int w= (width>>1) - 1 + (highpass & width);
728 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
737 LIFT(src[i*src_step],
738 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
744 LIFT(src[w*src_step],
745 ((mul*2*ref[w*ref_step]+add)>>shift),
750 static av_always_inline void
751 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
752 int dst_step, int src_step, int ref_step,
753 int width, int mul, int add, int shift,
754 int highpass, int inverse){
755 const int mirror_left= !highpass;
756 const int mirror_right= (width&1) ^ highpass;
757 const int w= (width>>1) - 1 + (highpass & width);
760 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
762 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
769 LIFT(src[i*src_step],
770 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
776 LIFT(src[w*src_step],
777 ((mul*2*ref[w*ref_step]+add)>>shift),
783 static av_always_inline void
784 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
785 int dst_step, int src_step, int ref_step,
786 int width, int mul, int add, int shift,
787 int highpass, int inverse){
788 const int mirror_left= !highpass;
789 const int mirror_right= (width&1) ^ highpass;
790 const int w= (width>>1) - 1 + (highpass & width);
794 #define LIFTS(src, ref, inv) \
796 (src) + (((ref) + 4*(src))>>shift): \
797 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
799 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
806 LIFTS(src[i*src_step],
807 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
813 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
816 static av_always_inline void
817 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
818 int dst_step, int src_step, int ref_step,
819 int width, int mul, int add, int shift,
820 int highpass, int inverse){
821 const int mirror_left= !highpass;
822 const int mirror_right= (width&1) ^ highpass;
823 const int w= (width>>1) - 1 + (highpass & width);
827 #define LIFTS(src, ref, inv) \
829 (src) + (((ref) + 4*(src))>>shift): \
830 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
832 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
839 LIFTS(src[i*src_step],
840 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
846 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
851 static void horizontal_decompose53i(DWTELEM *b, int width){
853 const int width2= width>>1;
855 const int w2= (width+1)>>1;
857 for(x=0; x<width2; x++){
859 temp[x+w2]= b[2*x + 1];
873 for(x=1; x+1<width2; x+=2){
877 A2 += (A1 + A3 + 2)>>2;
881 A1= temp[x+1+width2];
884 A4 += (A1 + A3 + 2)>>2;
890 A2 += (A1 + A3 + 2)>>2;
895 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
896 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
900 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
903 for(i=0; i<width; i++){
904 b1[i] -= (b0[i] + b2[i])>>1;
908 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
911 for(i=0; i<width; i++){
912 b1[i] += (b0[i] + b2[i] + 2)>>2;
916 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
918 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
919 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
921 for(y=-2; y<height; y+=2){
922 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
923 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
925 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
926 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
928 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
929 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
936 static void horizontal_decompose97i(DWTELEM *b, int width){
938 const int w2= (width+1)>>1;
940 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
941 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
942 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
943 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
947 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
950 for(i=0; i<width; i++){
951 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
955 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
958 for(i=0; i<width; i++){
959 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
963 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
966 for(i=0; i<width; i++){
968 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
970 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
975 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
978 for(i=0; i<width; i++){
979 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
983 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
985 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
986 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
987 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
988 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
990 for(y=-4; y<height; y+=2){
991 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
992 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
994 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
995 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
997 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
998 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
999 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1000 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1009 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1012 for(level=0; level<decomposition_count; level++){
1014 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1015 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1020 static void horizontal_compose53i(IDWTELEM *b, int width){
1021 IDWTELEM temp[width];
1022 const int width2= width>>1;
1023 const int w2= (width+1)>>1;
1035 for(x=1; x+1<width2; x+=2){
1039 A2 += (A1 + A3 + 2)>>2;
1043 A1= temp[x+1+width2];
1046 A4 += (A1 + A3 + 2)>>2;
1052 A2 += (A1 + A3 + 2)>>2;
1056 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1057 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1059 for(x=0; x<width2; x++){
1061 b[2*x + 1]= temp[x+w2];
1067 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1070 for(i=0; i<width; i++){
1071 b1[i] += (b0[i] + b2[i])>>1;
1075 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1078 for(i=0; i<width; i++){
1079 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1083 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1084 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1085 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1089 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1090 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1091 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1095 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1098 IDWTELEM *b0= cs->b0;
1099 IDWTELEM *b1= cs->b1;
1100 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1101 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1103 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1104 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1106 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1107 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1114 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1116 IDWTELEM *b0= cs->b0;
1117 IDWTELEM *b1= cs->b1;
1118 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1119 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1121 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1122 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1124 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1125 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1132 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1134 spatial_compose53i_init(&cs, buffer, height, stride);
1135 while(cs.y <= height)
1136 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1140 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1141 IDWTELEM temp[width];
1142 const int w2= (width+1)>>1;
1144 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1145 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1146 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1147 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1150 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1153 for(i=0; i<width; i++){
1154 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1158 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1161 for(i=0; i<width; i++){
1162 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1166 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1169 for(i=0; i<width; i++){
1171 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1173 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1178 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1181 for(i=0; i<width; i++){
1182 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1186 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1189 for(i=0; i<width; i++){
1190 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1191 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1193 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1195 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1197 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1201 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1202 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1203 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1204 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1205 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1209 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1210 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1211 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1212 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1213 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1217 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1220 IDWTELEM *b0= cs->b0;
1221 IDWTELEM *b1= cs->b1;
1222 IDWTELEM *b2= cs->b2;
1223 IDWTELEM *b3= cs->b3;
1224 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1225 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1227 if(y>0 && y+4<height){
1228 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1230 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1231 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1232 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1233 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1236 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1237 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1246 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1248 IDWTELEM *b0= cs->b0;
1249 IDWTELEM *b1= cs->b1;
1250 IDWTELEM *b2= cs->b2;
1251 IDWTELEM *b3= cs->b3;
1252 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1253 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1255 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1256 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1257 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1258 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1260 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1261 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1270 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1272 spatial_compose97i_init(&cs, buffer, height, stride);
1273 while(cs.y <= height)
1274 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1277 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1279 for(level=decomposition_count-1; level>=0; level--){
1281 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1282 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1287 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1289 for(level=decomposition_count-1; level>=0; level--){
1291 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1292 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1297 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1298 const int support = type==1 ? 3 : 5;
1302 for(level=decomposition_count-1; level>=0; level--){
1303 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1305 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1307 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1314 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1315 const int support = type==1 ? 3 : 5;
1319 for(level=decomposition_count-1; level>=0; level--){
1320 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1322 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1324 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1331 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1332 DWTCompose cs[MAX_DECOMPOSITIONS];
1334 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1335 for(y=0; y<height; y+=4)
1336 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1339 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1340 const int w= b->width;
1341 const int h= b->height;
1353 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1354 v= src[x + y*stride];
1357 t= src[x + (y-1)*stride];
1359 lt= src[x - 1 + (y-1)*stride];
1362 rt= src[x + 1 + (y-1)*stride];
1366 l= src[x - 1 + y*stride];
1368 if(orientation==1) ll= src[y + (x-2)*stride];
1369 else ll= src[x - 2 + y*stride];
1375 if(px<b->parent->width && py<b->parent->height)
1376 p= parent[px + py*2*stride];
1378 if(!(/*ll|*/l|lt|t|rt|p)){
1380 runs[run_index++]= run;
1388 max_index= run_index;
1389 runs[run_index++]= run;
1391 run= runs[run_index++];
1393 put_symbol2(&s->c, b->state[30], max_index, 0);
1394 if(run_index <= max_index)
1395 put_symbol2(&s->c, b->state[1], run, 3);
1398 if(s->c.bytestream_end - s->c.bytestream < w*40){
1399 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1404 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1405 v= src[x + y*stride];
1408 t= src[x + (y-1)*stride];
1410 lt= src[x - 1 + (y-1)*stride];
1413 rt= src[x + 1 + (y-1)*stride];
1417 l= src[x - 1 + y*stride];
1419 if(orientation==1) ll= src[y + (x-2)*stride];
1420 else ll= src[x - 2 + y*stride];
1426 if(px<b->parent->width && py<b->parent->height)
1427 p= parent[px + py*2*stride];
1429 if(/*ll|*/l|lt|t|rt|p){
1430 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1432 put_rac(&s->c, &b->state[0][context], !!v);
1435 run= runs[run_index++];
1437 if(run_index <= max_index)
1438 put_symbol2(&s->c, b->state[1], run, 3);
1446 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1447 int l2= 2*FFABS(l) + (l<0);
1448 int t2= 2*FFABS(t) + (t<0);
1450 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1451 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1459 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1460 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1461 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1462 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1463 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1466 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1467 const int w= b->width;
1468 const int h= b->height;
1473 x_and_coeff *xc= b->x_coeff;
1474 x_and_coeff *prev_xc= NULL;
1475 x_and_coeff *prev2_xc= xc;
1476 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1477 x_and_coeff *prev_parent_xc= parent_xc;
1479 runs= get_symbol2(&s->c, b->state[30], 0);
1480 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1485 int lt=0, t=0, rt=0;
1487 if(y && prev_xc->x == 0){
1499 if(prev_xc->x == x + 1)
1505 if(x>>1 > parent_xc->x){
1508 if(x>>1 == parent_xc->x){
1509 p= parent_xc->coeff;
1512 if(/*ll|*/l|lt|t|rt|p){
1513 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1515 v=get_rac(&s->c, &b->state[0][context]);
1517 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1518 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1525 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1527 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1528 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1537 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1538 else max_run= FFMIN(run, w-x-1);
1540 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1546 (xc++)->x= w+1; //end marker
1552 while(parent_xc->x != parent->width+1)
1555 prev_parent_xc= parent_xc;
1557 parent_xc= prev_parent_xc;
1562 (xc++)->x= w+1; //end marker
1566 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1567 const int w= b->width;
1569 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1570 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1571 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1574 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1579 /* If we are on the second or later slice, restore our index. */
1581 new_index = save_state[0];
1584 for(y=start_y; y<h; y++){
1587 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1588 memset(line, 0, b->width*sizeof(IDWTELEM));
1589 v = b->x_coeff[new_index].coeff;
1590 x = b->x_coeff[new_index++].x;
1592 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1593 register int u= -(v&1);
1594 line[x] = (t^u) - u;
1596 v = b->x_coeff[new_index].coeff;
1597 x = b->x_coeff[new_index++].x;
1601 /* Save our variables for the next slice. */
1602 save_state[0] = new_index;
1607 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1608 int plane_index, level, orientation;
1610 for(plane_index=0; plane_index<3; plane_index++){
1611 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1612 for(orientation=level ? 1:0; orientation<4; orientation++){
1613 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1617 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1618 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1621 static int alloc_blocks(SnowContext *s){
1622 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1623 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1628 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1632 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1633 uint8_t *bytestream= d->bytestream;
1634 uint8_t *bytestream_start= d->bytestream_start;
1636 d->bytestream= bytestream;
1637 d->bytestream_start= bytestream_start;
1640 //near copy & paste from dsputil, FIXME
1641 static int pix_sum(uint8_t * pix, int line_size, int w)
1646 for (i = 0; i < w; i++) {
1647 for (j = 0; j < w; j++) {
1651 pix += line_size - w;
1656 //near copy & paste from dsputil, FIXME
1657 static int pix_norm1(uint8_t * pix, int line_size, int w)
1660 uint32_t *sq = ff_squareTbl + 256;
1663 for (i = 0; i < w; i++) {
1664 for (j = 0; j < w; j ++) {
1668 pix += line_size - w;
1673 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1674 const int w= s->b_width << s->block_max_depth;
1675 const int rem_depth= s->block_max_depth - level;
1676 const int index= (x + y*w) << rem_depth;
1677 const int block_w= 1<<rem_depth;
1690 for(j=0; j<block_w; j++){
1691 for(i=0; i<block_w; i++){
1692 s->block[index + i + j*w]= block;
1697 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1698 const int offset[3]= {
1700 ((y*c->uvstride + x)>>1),
1701 ((y*c->uvstride + x)>>1),
1705 c->src[0][i]= src [i];
1706 c->ref[0][i]= ref [i] + offset[i];
1711 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1712 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1713 if(s->ref_frames == 1){
1714 *mx = mid_pred(left->mx, top->mx, tr->mx);
1715 *my = mid_pred(left->my, top->my, tr->my);
1717 const int *scale = scale_mv_ref[ref];
1718 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1719 (top ->mx * scale[top ->ref] + 128) >>8,
1720 (tr ->mx * scale[tr ->ref] + 128) >>8);
1721 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1722 (top ->my * scale[top ->ref] + 128) >>8,
1723 (tr ->my * scale[tr ->ref] + 128) >>8);
1730 #define P_TOPRIGHT P[3]
1731 #define P_MEDIAN P[4]
1733 #define FLAG_QPEL 1 //must be 1
1735 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1736 uint8_t p_buffer[1024];
1737 uint8_t i_buffer[1024];
1738 uint8_t p_state[sizeof(s->block_state)];
1739 uint8_t i_state[sizeof(s->block_state)];
1741 uint8_t *pbbak= s->c.bytestream;
1742 uint8_t *pbbak_start= s->c.bytestream_start;
1743 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1744 const int w= s->b_width << s->block_max_depth;
1745 const int h= s->b_height << s->block_max_depth;
1746 const int rem_depth= s->block_max_depth - level;
1747 const int index= (x + y*w) << rem_depth;
1748 const int block_w= 1<<(LOG2_MB_SIZE - level);
1749 int trx= (x+1)<<rem_depth;
1750 int try= (y+1)<<rem_depth;
1751 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1752 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1753 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1754 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1755 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1756 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1757 int pl = left->color[0];
1758 int pcb= left->color[1];
1759 int pcr= left->color[2];
1763 const int stride= s->current_picture.linesize[0];
1764 const int uvstride= s->current_picture.linesize[1];
1765 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1766 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1767 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1769 int16_t last_mv[3][2];
1770 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1771 const int shift= 1+qpel;
1772 MotionEstContext *c= &s->m.me;
1773 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1774 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1775 int my_context= av_log2(2*FFABS(left->my - top->my));
1776 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1777 int ref, best_ref, ref_score, ref_mx, ref_my;
1779 assert(sizeof(s->block_state) >= 256);
1781 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1785 // clip predictors / edge ?
1787 P_LEFT[0]= left->mx;
1788 P_LEFT[1]= left->my;
1791 P_TOPRIGHT[0]= tr->mx;
1792 P_TOPRIGHT[1]= tr->my;
1794 last_mv[0][0]= s->block[index].mx;
1795 last_mv[0][1]= s->block[index].my;
1796 last_mv[1][0]= right->mx;
1797 last_mv[1][1]= right->my;
1798 last_mv[2][0]= bottom->mx;
1799 last_mv[2][1]= bottom->my;
1806 assert(c-> stride == stride);
1807 assert(c->uvstride == uvstride);
1809 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1810 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1811 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1812 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1814 c->xmin = - x*block_w - 16+2;
1815 c->ymin = - y*block_w - 16+2;
1816 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1820 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1821 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1822 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1823 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1824 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1825 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1827 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1828 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831 c->pred_x= P_LEFT[0];
1832 c->pred_y= P_LEFT[1];
1834 c->pred_x = P_MEDIAN[0];
1835 c->pred_y = P_MEDIAN[1];
1840 for(ref=0; ref<s->ref_frames; ref++){
1841 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1843 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1844 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1846 assert(ref_mx >= c->xmin);
1847 assert(ref_mx <= c->xmax);
1848 assert(ref_my >= c->ymin);
1849 assert(ref_my <= c->ymax);
1851 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1852 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1853 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1854 if(s->ref_mvs[ref]){
1855 s->ref_mvs[ref][index][0]= ref_mx;
1856 s->ref_mvs[ref][index][1]= ref_my;
1857 s->ref_scores[ref][index]= ref_score;
1859 if(score > ref_score){
1866 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1869 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1871 pc.bytestream_start=
1872 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1873 memcpy(p_state, s->block_state, sizeof(s->block_state));
1875 if(level!=s->block_max_depth)
1876 put_rac(&pc, &p_state[4 + s_context], 1);
1877 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1878 if(s->ref_frames > 1)
1879 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1880 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1881 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1882 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1883 p_len= pc.bytestream - pc.bytestream_start;
1884 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1886 block_s= block_w*block_w;
1887 sum = pix_sum(current_data[0], stride, block_w);
1888 l= (sum + block_s/2)/block_s;
1889 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1891 block_s= block_w*block_w>>2;
1892 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1893 cb= (sum + block_s/2)/block_s;
1894 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1895 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1896 cr= (sum + block_s/2)/block_s;
1897 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900 ic.bytestream_start=
1901 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1902 memcpy(i_state, s->block_state, sizeof(s->block_state));
1903 if(level!=s->block_max_depth)
1904 put_rac(&ic, &i_state[4 + s_context], 1);
1905 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1906 put_symbol(&ic, &i_state[32], l-pl , 1);
1907 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1908 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1909 i_len= ic.bytestream - ic.bytestream_start;
1910 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1912 // assert(score==256*256*256*64-1);
1913 assert(iscore < 255*255*256 + s->lambda2*10);
1914 assert(iscore >= 0);
1915 assert(l>=0 && l<=255);
1916 assert(pl>=0 && pl<=255);
1919 int varc= iscore >> 8;
1920 int vard= score >> 8;
1921 if (vard <= 64 || vard < varc)
1922 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1924 c->scene_change_score+= s->m.qscale;
1927 if(level!=s->block_max_depth){
1928 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1929 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1931 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1933 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1935 if(score2 < score && score2 < iscore)
1940 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1941 memcpy(pbbak, i_buffer, i_len);
1943 s->c.bytestream_start= pbbak_start;
1944 s->c.bytestream= pbbak + i_len;
1945 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1946 memcpy(s->block_state, i_state, sizeof(s->block_state));
1949 memcpy(pbbak, p_buffer, p_len);
1951 s->c.bytestream_start= pbbak_start;
1952 s->c.bytestream= pbbak + p_len;
1953 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1954 memcpy(s->block_state, p_state, sizeof(s->block_state));
1959 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1960 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1961 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1963 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1967 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1968 const int w= s->b_width << s->block_max_depth;
1969 const int rem_depth= s->block_max_depth - level;
1970 const int index= (x + y*w) << rem_depth;
1971 int trx= (x+1)<<rem_depth;
1972 BlockNode *b= &s->block[index];
1973 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1974 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1975 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1976 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1977 int pl = left->color[0];
1978 int pcb= left->color[1];
1979 int pcr= left->color[2];
1981 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1982 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1983 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1984 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1991 if(level!=s->block_max_depth){
1992 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1993 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1995 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2003 if(b->type & BLOCK_INTRA){
2004 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2005 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2006 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2007 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2008 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2009 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2011 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2012 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2013 if(s->ref_frames > 1)
2014 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2015 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2016 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2017 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2021 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2022 const int w= s->b_width << s->block_max_depth;
2023 const int rem_depth= s->block_max_depth - level;
2024 const int index= (x + y*w) << rem_depth;
2025 int trx= (x+1)<<rem_depth;
2026 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2027 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2028 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2029 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2030 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2037 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2039 int l = left->color[0];
2040 int cb= left->color[1];
2041 int cr= left->color[2];
2043 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2044 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2045 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2047 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2050 pred_mv(s, &mx, &my, 0, left, top, tr);
2051 l += get_symbol(&s->c, &s->block_state[32], 1);
2052 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2053 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2055 if(s->ref_frames > 1)
2056 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2057 pred_mv(s, &mx, &my, ref, left, top, tr);
2058 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2059 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2061 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2070 static void encode_blocks(SnowContext *s, int search){
2075 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2079 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2080 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2084 if(s->avctx->me_method == ME_ITER || !search)
2085 encode_q_branch2(s, 0, x, y);
2087 encode_q_branch (s, 0, x, y);
2092 static void decode_blocks(SnowContext *s){
2099 decode_q_branch(s, 0, x, y);
2104 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2105 static const uint8_t weight[64]={
2116 static const uint8_t brane[256]={
2117 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2118 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2119 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2120 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2121 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2122 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2123 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2124 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2125 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2126 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2127 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2128 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2129 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2130 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2131 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2132 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2135 static const uint8_t needs[16]={
2143 int16_t tmpIt [64*(32+HTAPS_MAX)];
2144 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2145 int16_t *tmpI= tmpIt;
2146 uint8_t *tmp2= tmp2t[0];
2147 const uint8_t *hpel[11];
2148 assert(dx<16 && dy<16);
2149 r= brane[dx + 16*dy]&15;
2150 l= brane[dx + 16*dy]>>4;
2152 b= needs[l] | needs[r];
2153 if(p && !p->diag_mc)
2157 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2158 for(x=0; x < b_w; x++){
2159 int a_1=src[x + HTAPS_MAX/2-4];
2160 int a0= src[x + HTAPS_MAX/2-3];
2161 int a1= src[x + HTAPS_MAX/2-2];
2162 int a2= src[x + HTAPS_MAX/2-1];
2163 int a3= src[x + HTAPS_MAX/2+0];
2164 int a4= src[x + HTAPS_MAX/2+1];
2165 int a5= src[x + HTAPS_MAX/2+2];
2166 int a6= src[x + HTAPS_MAX/2+3];
2168 if(!p || p->fast_mc){
2169 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2173 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2178 if(am&(~255)) am= ~(am>>31);
2187 src += HTAPS_MAX/2 - 1;
2191 for(y=0; y < b_h; y++){
2192 for(x=0; x < b_w+1; x++){
2193 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2194 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2195 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2196 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2197 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2198 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2199 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2200 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2202 if(!p || p->fast_mc)
2203 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2205 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2207 if(am&(~255)) am= ~(am>>31);
2215 src += stride*(HTAPS_MAX/2 - 1);
2219 for(y=0; y < b_h; y++){
2220 for(x=0; x < b_w; x++){
2221 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2222 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2223 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2224 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2225 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2226 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2227 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2228 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2230 if(!p || p->fast_mc)
2231 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2233 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2234 if(am&(~255)) am= ~(am>>31);
2243 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2248 hpel[ 6]= tmp2t[1] + 1;
2250 hpel[ 8]= src + stride;
2251 hpel[ 9]= hpel[1] + stride;
2252 hpel[10]= hpel[8] + 1;
2255 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2256 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2257 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2258 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2261 for(y=0; y < b_h; y++){
2262 for(x=0; x < b_w; x++){
2263 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2264 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2273 const uint8_t *src1= hpel[l];
2274 const uint8_t *src2= hpel[r];
2275 int a= weight[((dx&7) + (8*(dy&7)))];
2277 for(y=0; y < b_h; y++){
2278 for(x=0; x < b_w; x++){
2279 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2288 #define mca(dx,dy,b_w)\
2289 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2290 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2292 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2304 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2305 if(block->type & BLOCK_INTRA){
2307 const int color = block->color[plane_index];
2308 const int color4= color*0x01010101;
2310 for(y=0; y < b_h; y++){
2311 *(uint32_t*)&dst[0 + y*stride]= color4;
2312 *(uint32_t*)&dst[4 + y*stride]= color4;
2313 *(uint32_t*)&dst[8 + y*stride]= color4;
2314 *(uint32_t*)&dst[12+ y*stride]= color4;
2315 *(uint32_t*)&dst[16+ y*stride]= color4;
2316 *(uint32_t*)&dst[20+ y*stride]= color4;
2317 *(uint32_t*)&dst[24+ y*stride]= color4;
2318 *(uint32_t*)&dst[28+ y*stride]= color4;
2321 for(y=0; y < b_h; y++){
2322 *(uint32_t*)&dst[0 + y*stride]= color4;
2323 *(uint32_t*)&dst[4 + y*stride]= color4;
2324 *(uint32_t*)&dst[8 + y*stride]= color4;
2325 *(uint32_t*)&dst[12+ y*stride]= color4;
2328 for(y=0; y < b_h; y++){
2329 *(uint32_t*)&dst[0 + y*stride]= color4;
2330 *(uint32_t*)&dst[4 + y*stride]= color4;
2333 for(y=0; y < b_h; y++){
2334 *(uint32_t*)&dst[0 + y*stride]= color4;
2337 for(y=0; y < b_h; y++){
2338 for(x=0; x < b_w; x++){
2339 dst[x + y*stride]= color;
2344 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2345 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2346 int mx= block->mx*scale;
2347 int my= block->my*scale;
2348 const int dx= mx&15;
2349 const int dy= my&15;
2350 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2351 sx += (mx>>4) - (HTAPS_MAX/2-1);
2352 sy += (my>>4) - (HTAPS_MAX/2-1);
2353 src += sx + sy*stride;
2354 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2355 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2356 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2359 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2360 // assert(!(b_w&(b_w-1)));
2361 assert(b_w>1 && b_h>1);
2362 assert((tab_index>=0 && tab_index<4) || b_w==32);
2363 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2364 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2367 for(y=0; y<b_h; y+=16){
2368 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2369 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2373 else if(b_w==2*b_h){
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2378 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2379 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2384 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2385 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2388 for(y=0; y<b_h; y++){
2389 //FIXME ugly misuse of obmc_stride
2390 const uint8_t *obmc1= obmc + y*obmc_stride;
2391 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2392 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2393 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2394 dst = slice_buffer_get_line(sb, src_y + y);
2395 for(x=0; x<b_w; x++){
2396 int v= obmc1[x] * block[3][x + y*src_stride]
2397 +obmc2[x] * block[2][x + y*src_stride]
2398 +obmc3[x] * block[1][x + y*src_stride]
2399 +obmc4[x] * block[0][x + y*src_stride];
2401 v <<= 8 - LOG2_OBMC_MAX;
2403 v >>= 8 - FRAC_BITS;
2406 v += dst[x + src_x];
2407 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2408 if(v&(~255)) v= ~(v>>31);
2409 dst8[x + y*src_stride] = v;
2411 dst[x + src_x] -= v;
2417 //FIXME name cleanup (b_w, block_w, b_width stuff)
2418 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2419 const int b_width = s->b_width << s->block_max_depth;
2420 const int b_height= s->b_height << s->block_max_depth;
2421 const int b_stride= b_width;
2422 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2423 BlockNode *rt= lt+1;
2424 BlockNode *lb= lt+b_stride;
2425 BlockNode *rb= lb+1;
2427 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2428 uint8_t *tmp = s->scratchbuf;
2435 }else if(b_x + 1 >= b_width){
2442 }else if(b_y + 1 >= b_height){
2447 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2450 if(!sliced && !offset_dst)
2453 }else if(src_x + b_w > w){
2457 obmc -= src_y*obmc_stride;
2459 if(!sliced && !offset_dst)
2460 dst -= src_y*dst_stride;
2462 }else if(src_y + b_h> h){
2466 if(b_w<=0 || b_h<=0) return;
2468 assert(src_stride > 2*MB_SIZE + 5);
2470 if(!sliced && offset_dst)
2471 dst += src_x + src_y*dst_stride;
2472 dst8+= src_x + src_y*src_stride;
2473 // src += src_x + src_y*src_stride;
2475 ptmp= tmp + 3*tmp_step;
2478 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2480 if(same_block(lt, rt)){
2485 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2488 if(same_block(lt, lb)){
2490 }else if(same_block(rt, lb)){
2495 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2498 if(same_block(lt, rb) ){
2500 }else if(same_block(rt, rb)){
2502 }else if(same_block(lb, rb)){
2506 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2509 for(y=0; y<b_h; y++){
2510 for(x=0; x<b_w; x++){
2511 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2512 if(add) dst[x + y*dst_stride] += v;
2513 else dst[x + y*dst_stride] -= v;
2516 for(y=0; y<b_h; y++){
2517 uint8_t *obmc2= obmc + (obmc_stride>>1);
2518 for(x=0; x<b_w; x++){
2519 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2520 if(add) dst[x + y*dst_stride] += v;
2521 else dst[x + y*dst_stride] -= v;
2524 for(y=0; y<b_h; y++){
2525 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2526 for(x=0; x<b_w; x++){
2527 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2528 if(add) dst[x + y*dst_stride] += v;
2529 else dst[x + y*dst_stride] -= v;
2532 for(y=0; y<b_h; y++){
2533 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2534 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2535 for(x=0; x<b_w; x++){
2536 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2537 if(add) dst[x + y*dst_stride] += v;
2538 else dst[x + y*dst_stride] -= v;
2543 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2545 for(y=0; y<b_h; y++){
2546 //FIXME ugly misuse of obmc_stride
2547 const uint8_t *obmc1= obmc + y*obmc_stride;
2548 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2549 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2550 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2551 for(x=0; x<b_w; x++){
2552 int v= obmc1[x] * block[3][x + y*src_stride]
2553 +obmc2[x] * block[2][x + y*src_stride]
2554 +obmc3[x] * block[1][x + y*src_stride]
2555 +obmc4[x] * block[0][x + y*src_stride];
2557 v <<= 8 - LOG2_OBMC_MAX;
2559 v >>= 8 - FRAC_BITS;
2562 v += dst[x + y*dst_stride];
2563 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2564 if(v&(~255)) v= ~(v>>31);
2565 dst8[x + y*src_stride] = v;
2567 dst[x + y*dst_stride] -= v;
2575 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2576 Plane *p= &s->plane[plane_index];
2577 const int mb_w= s->b_width << s->block_max_depth;
2578 const int mb_h= s->b_height << s->block_max_depth;
2580 int block_size = MB_SIZE >> s->block_max_depth;
2581 int block_w = plane_index ? block_size/2 : block_size;
2582 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2583 int obmc_stride= plane_index ? block_size : 2*block_size;
2584 int ref_stride= s->current_picture.linesize[plane_index];
2585 uint8_t *dst8= s->current_picture.data[plane_index];
2589 if(s->keyframe || (s->avctx->debug&512)){
2594 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2595 // DWTELEM * line = slice_buffer_get_line(sb, y);
2596 IDWTELEM * line = sb->line[y];
2598 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2599 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601 if(v&(~255)) v= ~(v>>31);
2602 dst8[x + y*ref_stride]= v;
2606 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2607 // DWTELEM * line = slice_buffer_get_line(sb, y);
2608 IDWTELEM * line = sb->line[y];
2610 line[x] -= 128 << FRAC_BITS;
2611 // buf[x + y*w]-= 128<<FRAC_BITS;
2619 for(mb_x=0; mb_x<=mb_w; mb_x++){
2620 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2621 block_w*mb_x - block_w/2,
2622 block_w*mb_y - block_w/2,
2625 w, ref_stride, obmc_stride,
2627 add, 0, plane_index);
2631 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2632 Plane *p= &s->plane[plane_index];
2633 const int mb_w= s->b_width << s->block_max_depth;
2634 const int mb_h= s->b_height << s->block_max_depth;
2636 int block_size = MB_SIZE >> s->block_max_depth;
2637 int block_w = plane_index ? block_size/2 : block_size;
2638 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2639 const int obmc_stride= plane_index ? block_size : 2*block_size;
2640 int ref_stride= s->current_picture.linesize[plane_index];
2641 uint8_t *dst8= s->current_picture.data[plane_index];
2645 if(s->keyframe || (s->avctx->debug&512)){
2650 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2652 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 if(v&(~255)) v= ~(v>>31);
2655 dst8[x + y*ref_stride]= v;
2659 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2661 buf[x + y*w]-= 128<<FRAC_BITS;
2669 for(mb_x=0; mb_x<=mb_w; mb_x++){
2670 add_yblock(s, 0, NULL, buf, dst8, obmc,
2671 block_w*mb_x - block_w/2,
2672 block_w*mb_y - block_w/2,
2675 w, ref_stride, obmc_stride,
2677 add, 1, plane_index);
2681 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2682 const int mb_h= s->b_height << s->block_max_depth;
2684 for(mb_y=0; mb_y<=mb_h; mb_y++)
2685 predict_slice(s, buf, plane_index, add, mb_y);
2688 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2690 Plane *p= &s->plane[plane_index];
2691 const int block_size = MB_SIZE >> s->block_max_depth;
2692 const int block_w = plane_index ? block_size/2 : block_size;
2693 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2694 const int obmc_stride= plane_index ? block_size : 2*block_size;
2695 const int ref_stride= s->current_picture.linesize[plane_index];
2696 uint8_t *src= s-> input_picture.data[plane_index];
2697 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2698 const int b_stride = s->b_width << s->block_max_depth;
2699 const int w= p->width;
2700 const int h= p->height;
2701 int index= mb_x + mb_y*b_stride;
2702 BlockNode *b= &s->block[index];
2703 BlockNode backup= *b;
2707 b->type|= BLOCK_INTRA;
2708 b->color[plane_index]= 0;
2709 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2712 int mb_x2= mb_x + (i &1) - 1;
2713 int mb_y2= mb_y + (i>>1) - 1;
2714 int x= block_w*mb_x2 + block_w/2;
2715 int y= block_w*mb_y2 + block_w/2;
2717 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2718 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2720 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2721 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2722 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2723 int obmc_v= obmc[index];
2725 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2726 if(x<0) obmc_v += obmc[index + block_w];
2727 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2728 if(x+block_w>w) obmc_v += obmc[index - block_w];
2729 //FIXME precalculate this or simplify it somehow else
2731 d = -dst[index] + (1<<(FRAC_BITS-1));
2733 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2734 aa += obmc_v * obmc_v; //FIXME precalculate this
2740 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2743 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2744 const int b_stride = s->b_width << s->block_max_depth;
2745 const int b_height = s->b_height<< s->block_max_depth;
2746 int index= x + y*b_stride;
2747 const BlockNode *b = &s->block[index];
2748 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2749 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2750 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2751 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2753 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2754 // int my_context= av_log2(2*FFABS(left->my - top->my));
2756 if(x<0 || x>=b_stride || y>=b_height)
2763 00001XXXX 15-30 8-15
2765 //FIXME try accurate rate
2766 //FIXME intra and inter predictors if surrounding blocks are not the same type
2767 if(b->type & BLOCK_INTRA){
2768 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2769 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2770 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2772 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2775 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2776 + av_log2(2*FFABS(dmy))
2777 + av_log2(2*b->ref));
2781 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2782 Plane *p= &s->plane[plane_index];
2783 const int block_size = MB_SIZE >> s->block_max_depth;
2784 const int block_w = plane_index ? block_size/2 : block_size;
2785 const int obmc_stride= plane_index ? block_size : 2*block_size;
2786 const int ref_stride= s->current_picture.linesize[plane_index];
2787 uint8_t *dst= s->current_picture.data[plane_index];
2788 uint8_t *src= s-> input_picture.data[plane_index];
2789 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2790 uint8_t *cur = s->scratchbuf;
2791 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2792 const int b_stride = s->b_width << s->block_max_depth;
2793 const int b_height = s->b_height<< s->block_max_depth;
2794 const int w= p->width;
2795 const int h= p->height;
2798 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2799 int sx= block_w*mb_x - block_w/2;
2800 int sy= block_w*mb_y - block_w/2;
2801 int x0= FFMAX(0,-sx);
2802 int y0= FFMAX(0,-sy);
2803 int x1= FFMIN(block_w*2, w-sx);
2804 int y1= FFMIN(block_w*2, h-sy);
2807 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2809 for(y=y0; y<y1; y++){
2810 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2811 const IDWTELEM *pred1 = pred + y*obmc_stride;
2812 uint8_t *cur1 = cur + y*ref_stride;
2813 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2814 for(x=x0; x<x1; x++){
2815 #if FRAC_BITS >= LOG2_OBMC_MAX
2816 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2818 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2820 v = (v + pred1[x]) >> FRAC_BITS;
2821 if(v&(~255)) v= ~(v>>31);
2826 /* copy the regions where obmc[] = (uint8_t)256 */
2827 if(LOG2_OBMC_MAX == 8
2828 && (mb_x == 0 || mb_x == b_stride-1)
2829 && (mb_y == 0 || mb_y == b_height-1)){
2838 for(y=y0; y<y1; y++)
2839 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2843 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2844 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2845 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2846 * So improving the score of one block is not strictly guaranteed
2847 * to improve the score of the whole frame, thus iterative motion
2848 * estimation does not always converge. */
2849 if(s->avctx->me_cmp == FF_CMP_W97)
2850 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2851 else if(s->avctx->me_cmp == FF_CMP_W53)
2852 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2856 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2857 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2862 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2871 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2873 if(mb_x == b_stride-2)
2874 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2876 return distortion + rate*penalty_factor;
2879 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2881 Plane *p= &s->plane[plane_index];
2882 const int block_size = MB_SIZE >> s->block_max_depth;
2883 const int block_w = plane_index ? block_size/2 : block_size;
2884 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2885 const int obmc_stride= plane_index ? block_size : 2*block_size;
2886 const int ref_stride= s->current_picture.linesize[plane_index];
2887 uint8_t *dst= s->current_picture.data[plane_index];
2888 uint8_t *src= s-> input_picture.data[plane_index];
2889 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2890 // const has only been removed from zero_dst to suppress a warning
2891 static IDWTELEM zero_dst[4096]; //FIXME
2892 const int b_stride = s->b_width << s->block_max_depth;
2893 const int w= p->width;
2894 const int h= p->height;
2897 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900 int mb_x2= mb_x + (i%3) - 1;
2901 int mb_y2= mb_y + (i/3) - 1;
2902 int x= block_w*mb_x2 + block_w/2;
2903 int y= block_w*mb_y2 + block_w/2;
2905 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2906 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2908 //FIXME find a cleaner/simpler way to skip the outside stuff
2909 for(y2= y; y2<0; y2++)
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911 for(y2= h; y2<y+block_w; y2++)
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2914 for(y2= y; y2<y+block_w; y2++)
2915 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2918 for(y2= y; y2<y+block_w; y2++)
2919 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2922 assert(block_w== 8 || block_w==16);
2923 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2927 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2928 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2936 rate = get_block_bits(s, mb_x, mb_y, 2);
2937 for(i=merged?4:0; i<9; i++){
2938 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2939 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2942 return distortion + rate*penalty_factor;
2945 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2946 const int b_stride= s->b_width << s->block_max_depth;
2947 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2948 BlockNode backup= *block;
2949 int rd, index, value;
2951 assert(mb_x>=0 && mb_y>=0);
2952 assert(mb_x<b_stride);
2955 block->color[0] = p[0];
2956 block->color[1] = p[1];
2957 block->color[2] = p[2];
2958 block->type |= BLOCK_INTRA;
2960 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2961 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2962 if(s->me_cache[index] == value)
2964 s->me_cache[index]= value;
2968 block->type &= ~BLOCK_INTRA;
2971 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2983 /* special case for int[2] args we discard afterwards,
2984 * fixes compilation problem with gcc 2.95 */
2985 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2986 int p[2] = {p0, p1};
2987 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2990 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2991 const int b_stride= s->b_width << s->block_max_depth;
2992 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2993 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2994 int rd, index, value;
2996 assert(mb_x>=0 && mb_y>=0);
2997 assert(mb_x<b_stride);
2998 assert(((mb_x|mb_y)&1) == 0);
3000 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3001 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3002 if(s->me_cache[index] == value)
3004 s->me_cache[index]= value;
3009 block->type &= ~BLOCK_INTRA;
3010 block[1]= block[b_stride]= block[b_stride+1]= *block;
3012 rd= get_4block_rd(s, mb_x, mb_y, 0);
3019 block[0]= backup[0];
3020 block[1]= backup[1];
3021 block[b_stride]= backup[2];
3022 block[b_stride+1]= backup[3];
3027 static void iterative_me(SnowContext *s){
3028 int pass, mb_x, mb_y;
3029 const int b_width = s->b_width << s->block_max_depth;
3030 const int b_height= s->b_height << s->block_max_depth;
3031 const int b_stride= b_width;
3035 RangeCoder r = s->c;
3036 uint8_t state[sizeof(s->block_state)];
3037 memcpy(state, s->block_state, sizeof(s->block_state));
3038 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3039 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3040 encode_q_branch(s, 0, mb_x, mb_y);
3042 memcpy(s->block_state, state, sizeof(s->block_state));
3045 for(pass=0; pass<25; pass++){
3048 for(mb_y= 0; mb_y<b_height; mb_y++){
3049 for(mb_x= 0; mb_x<b_width; mb_x++){
3050 int dia_change, i, j, ref;
3051 int best_rd= INT_MAX, ref_rd;
3052 BlockNode backup, ref_b;
3053 const int index= mb_x + mb_y * b_stride;
3054 BlockNode *block= &s->block[index];
3055 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3056 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3057 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3058 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3059 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3060 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3061 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3062 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3063 const int b_w= (MB_SIZE >> s->block_max_depth);
3064 uint8_t obmc_edged[b_w*2][b_w*2];
3066 if(pass && (block->type & BLOCK_OPT))
3068 block->type |= BLOCK_OPT;
3072 if(!s->me_cache_generation)
3073 memset(s->me_cache, 0, sizeof(s->me_cache));
3074 s->me_cache_generation += 1<<22;
3076 //FIXME precalculate
3079 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3081 for(y=0; y<b_w*2; y++)
3082 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3083 if(mb_x==b_stride-1)
3084 for(y=0; y<b_w*2; y++)
3085 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3087 for(x=0; x<b_w*2; x++)
3088 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3089 for(y=1; y<b_w; y++)
3090 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3092 if(mb_y==b_height-1){
3093 for(x=0; x<b_w*2; x++)
3094 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3095 for(y=b_w; y<b_w*2-1; y++)
3096 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3100 //skip stuff outside the picture
3101 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3102 uint8_t *src= s-> input_picture.data[0];
3103 uint8_t *dst= s->current_picture.data[0];
3104 const int stride= s->current_picture.linesize[0];
3105 const int block_w= MB_SIZE >> s->block_max_depth;
3106 const int sx= block_w*mb_x - block_w/2;
3107 const int sy= block_w*mb_y - block_w/2;
3108 const int w= s->plane[0].width;
3109 const int h= s->plane[0].height;
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114 for(y=h; y<sy+block_w*2; y++)
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3117 for(y=sy; y<sy+block_w*2; y++)
3118 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3120 if(sx+block_w*2 > w){
3121 for(y=sy; y<sy+block_w*2; y++)
3122 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3126 // intra(black) = neighbors' contribution to the current block
3128 color[i]= get_dc(s, mb_x, mb_y, i);
3130 // get previous score (cannot be cached due to OBMC)
3131 if(pass > 0 && (block->type&BLOCK_INTRA)){
3132 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3133 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3135 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3139 for(ref=0; ref < s->ref_frames; ref++){
3140 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3141 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3146 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3147 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3149 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3151 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3153 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3155 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158 //FIXME avoid subpel interpolation / round to nearest integer
3161 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3172 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3175 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3177 //FIXME or try the standard 2 pass qpel or similar
3179 mvr[0][0]= block->mx;
3180 mvr[0][1]= block->my;
3181 if(ref_rd > best_rd){
3189 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3190 //FIXME RD style color selection
3192 if(!same_block(block, &backup)){
3193 if(tb ) tb ->type &= ~BLOCK_OPT;
3194 if(lb ) lb ->type &= ~BLOCK_OPT;
3195 if(rb ) rb ->type &= ~BLOCK_OPT;
3196 if(bb ) bb ->type &= ~BLOCK_OPT;
3197 if(tlb) tlb->type &= ~BLOCK_OPT;
3198 if(trb) trb->type &= ~BLOCK_OPT;
3199 if(blb) blb->type &= ~BLOCK_OPT;
3200 if(brb) brb->type &= ~BLOCK_OPT;
3205 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3210 if(s->block_max_depth == 1){
3212 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3213 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3215 int best_rd, init_rd;
3216 const int index= mb_x + mb_y * b_stride;
3219 b[0]= &s->block[index];
3221 b[2]= b[0]+b_stride;
3223 if(same_block(b[0], b[1]) &&
3224 same_block(b[0], b[2]) &&
3225 same_block(b[0], b[3]))
3228 if(!s->me_cache_generation)
3229 memset(s->me_cache, 0, sizeof(s->me_cache));
3230 s->me_cache_generation += 1<<22;
3232 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3234 //FIXME more multiref search?
3235 check_4block_inter(s, mb_x, mb_y,
3236 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3237 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3240 if(!(b[i]->type&BLOCK_INTRA))
3241 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3243 if(init_rd != best_rd)
3247 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3251 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3252 const int w= b->width;
3253 const int h= b->height;
3254 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3255 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3256 int x,y, thres1, thres2;
3258 if(s->qlog == LOSSLESS_QLOG){
3261 dst[x + y*stride]= src[x + y*stride];
3265 bias= bias ? 0 : (3*qmul)>>3;
3266 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3272 int i= src[x + y*stride];
3274 if((unsigned)(i+thres1) > thres2){
3277 i/= qmul; //FIXME optimize
3278 dst[x + y*stride]= i;
3282 i/= qmul; //FIXME optimize
3283 dst[x + y*stride]= -i;
3286 dst[x + y*stride]= 0;
3292 int i= src[x + y*stride];
3294 if((unsigned)(i+thres1) > thres2){
3297 i= (i + bias) / qmul; //FIXME optimize
3298 dst[x + y*stride]= i;
3302 i= (i + bias) / qmul; //FIXME optimize
3303 dst[x + y*stride]= -i;
3306 dst[x + y*stride]= 0;
3312 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3313 const int w= b->width;
3314 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3315 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3316 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3319 if(s->qlog == LOSSLESS_QLOG) return;
3321 for(y=start_y; y<end_y; y++){
3322 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3323 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3327 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3329 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3335 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3336 const int w= b->width;
3337 const int h= b->height;
3338 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3339 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3340 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3343 if(s->qlog == LOSSLESS_QLOG) return;
3347 int i= src[x + y*stride];
3349 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3351 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3357 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3358 const int w= b->width;
3359 const int h= b->height;
3362 for(y=h-1; y>=0; y--){
3363 for(x=w-1; x>=0; x--){
3364 int i= x + y*stride;
3368 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3369 else src[i] -= src[i - 1];
3371 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3372 else src[i] -= src[i - 1];
3375 if(y) src[i] -= src[i - stride];
3381 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3382 const int w= b->width;
3385 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3389 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3391 for(y=start_y; y<end_y; y++){
3393 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3394 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3398 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3399 else line[x] += line[x - 1];
3401 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3402 else line[x] += line[x - 1];
3405 if(y) line[x] += prev[x];
3411 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3412 const int w= b->width;
3413 const int h= b->height;
3418 int i= x + y*stride;
3422 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3423 else src[i] += src[i - 1];
3425 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3426 else src[i] += src[i - 1];
3429 if(y) src[i] += src[i - stride];
3435 static void encode_qlogs(SnowContext *s){
3436 int plane_index, level, orientation;
3438 for(plane_index=0; plane_index<2; plane_index++){
3439 for(level=0; level<s->spatial_decomposition_count; level++){
3440 for(orientation=level ? 1:0; orientation<4; orientation++){
3441 if(orientation==2) continue;
3442 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3448 static void encode_header(SnowContext *s){
3452 memset(kstate, MID_STATE, sizeof(kstate));
3454 put_rac(&s->c, kstate, s->keyframe);
3455 if(s->keyframe || s->always_reset){
3457 s->last_spatial_decomposition_type=
3461 s->last_block_max_depth= 0;
3462 for(plane_index=0; plane_index<2; plane_index++){
3463 Plane *p= &s->plane[plane_index];
3466 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3470 put_symbol(&s->c, s->header_state, s->version, 0);
3471 put_rac(&s->c, s->header_state, s->always_reset);
3472 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3473 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3474 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3475 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3476 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3477 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3478 put_rac(&s->c, s->header_state, s->spatial_scalability);
3479 // put_rac(&s->c, s->header_state, s->rate_scalability);
3480 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3487 for(plane_index=0; plane_index<2; plane_index++){
3488 Plane *p= &s->plane[plane_index];
3489 update_mc |= p->last_htaps != p->htaps;
3490 update_mc |= p->last_diag_mc != p->diag_mc;
3491 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3493 put_rac(&s->c, s->header_state, update_mc);
3495 for(plane_index=0; plane_index<2; plane_index++){
3496 Plane *p= &s->plane[plane_index];
3497 put_rac(&s->c, s->header_state, p->diag_mc);
3498 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3499 for(i= p->htaps/2; i; i--)
3500 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3503 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3504 put_rac(&s->c, s->header_state, 1);
3505 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3508 put_rac(&s->c, s->header_state, 0);
3511 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3512 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3513 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3514 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3515 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3519 static void update_last_header_values(SnowContext *s){
3523 for(plane_index=0; plane_index<2; plane_index++){
3524 Plane *p= &s->plane[plane_index];
3525 p->last_diag_mc= p->diag_mc;
3526 p->last_htaps = p->htaps;
3527 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3531 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3532 s->last_qlog = s->qlog;
3533 s->last_qbias = s->qbias;
3534 s->last_mv_scale = s->mv_scale;
3535 s->last_block_max_depth = s->block_max_depth;
3536 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3539 static void decode_qlogs(SnowContext *s){
3540 int plane_index, level, orientation;
3542 for(plane_index=0; plane_index<3; plane_index++){
3543 for(level=0; level<s->spatial_decomposition_count; level++){
3544 for(orientation=level ? 1:0; orientation<4; orientation++){
3546 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3547 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3548 else q= get_symbol(&s->c, s->header_state, 1);
3549 s->plane[plane_index].band[level][orientation].qlog= q;
3555 static int decode_header(SnowContext *s){
3559 memset(kstate, MID_STATE, sizeof(kstate));
3561 s->keyframe= get_rac(&s->c, kstate);
3562 if(s->keyframe || s->always_reset){
3564 s->spatial_decomposition_type=
3568 s->block_max_depth= 0;
3571 s->version= get_symbol(&s->c, s->header_state, 0);
3573 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3576 s->always_reset= get_rac(&s->c, s->header_state);
3577 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3578 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3579 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3580 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3581 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3582 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3583 s->spatial_scalability= get_rac(&s->c, s->header_state);
3584 // s->rate_scalability= get_rac(&s->c, s->header_state);
3585 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3591 if(get_rac(&s->c, s->header_state)){
3592 for(plane_index=0; plane_index<2; plane_index++){
3593 int htaps, i, sum=0;
3594 Plane *p= &s->plane[plane_index];
3595 p->diag_mc= get_rac(&s->c, s->header_state);
3596 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3597 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3600 for(i= htaps/2; i; i--){
3601 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3602 sum += p->hcoeff[i];
3604 p->hcoeff[0]= 32-sum;
3606 s->plane[2].diag_mc= s->plane[1].diag_mc;
3607 s->plane[2].htaps = s->plane[1].htaps;
3608 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3610 if(get_rac(&s->c, s->header_state)){
3611 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3616 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3617 if(s->spatial_decomposition_type > 1){
3618 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3622 s->qlog += get_symbol(&s->c, s->header_state, 1);
3623 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3624 s->qbias += get_symbol(&s->c, s->header_state, 1);
3625 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3626 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3627 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3628 s->block_max_depth= 0;
3635 static void init_qexp(void){
3639 for(i=0; i<QROOT; i++){
3641 v *= pow(2, 1.0 / QROOT);
3645 static av_cold int common_init(AVCodecContext *avctx){
3646 SnowContext *s = avctx->priv_data;
3652 dsputil_init(&s->dsp, avctx);
3655 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3656 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3657 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3658 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3659 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3660 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3679 #define mcfh(dx,dy)\
3680 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3681 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3682 mc_block_hpel ## dx ## dy ## 16;\
3683 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3684 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3685 mc_block_hpel ## dx ## dy ## 8;
3695 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3697 width= s->avctx->width;
3698 height= s->avctx->height;
3700 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3701 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3703 for(i=0; i<MAX_REF_FRAMES; i++)
3704 for(j=0; j<MAX_REF_FRAMES; j++)
3705 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3707 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3708 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3713 static int common_init_after_header(AVCodecContext *avctx){
3714 SnowContext *s = avctx->priv_data;
3715 int plane_index, level, orientation;
3717 for(plane_index=0; plane_index<3; plane_index++){
3718 int w= s->avctx->width;
3719 int h= s->avctx->height;
3722 w>>= s->chroma_h_shift;
3723 h>>= s->chroma_v_shift;
3725 s->plane[plane_index].width = w;
3726 s->plane[plane_index].height= h;
3728 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3729 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3730 SubBand *b= &s->plane[plane_index].band[level][orientation];
3732 b->buf= s->spatial_dwt_buffer;
3734 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3735 b->width = (w + !(orientation&1))>>1;
3736 b->height= (h + !(orientation>1))>>1;
3738 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3739 b->buf_x_offset = 0;
3740 b->buf_y_offset = 0;
3744 b->buf_x_offset = (w+1)>>1;
3747 b->buf += b->stride>>1;
3748 b->buf_y_offset = b->stride_line >> 1;
3750 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3753 b->parent= &s->plane[plane_index].band[level-1][orientation];
3754 //FIXME avoid this realloc
3755 av_freep(&b->x_coeff);
3756 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3766 static int qscale2qlog(int qscale){
3767 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3768 + 61*QROOT/8; //<64 >60
3771 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3773 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3774 * FIXME we know exact mv bits at this point,
3775 * but ratecontrol isn't set up to include them. */
3776 uint32_t coef_sum= 0;
3777 int level, orientation, delta_qlog;
3779 for(level=0; level<s->spatial_decomposition_count; level++){
3780 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3781 SubBand *b= &s->plane[0].band[level][orientation];
3782 IDWTELEM *buf= b->ibuf;
3783 const int w= b->width;
3784 const int h= b->height;
3785 const int stride= b->stride;
3786 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3787 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3788 const int qdiv= (1<<16)/qmul;
3790 //FIXME this is ugly
3793 buf[x+y*stride]= b->buf[x+y*stride];
3795 decorrelate(s, b, buf, stride, 1, 0);
3798 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3802 /* ugly, ratecontrol just takes a sqrt again */
3803 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3804 assert(coef_sum < INT_MAX);
3806 if(pict->pict_type == FF_I_TYPE){
3807 s->m.current_picture.mb_var_sum= coef_sum;
3808 s->m.current_picture.mc_mb_var_sum= 0;
3810 s->m.current_picture.mc_mb_var_sum= coef_sum;
3811 s->m.current_picture.mb_var_sum= 0;
3814 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3815 if (pict->quality < 0)
3817 s->lambda= pict->quality * 3/2;
3818 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3819 s->qlog+= delta_qlog;
3823 static void calculate_visual_weight(SnowContext *s, Plane *p){
3824 int width = p->width;
3825 int height= p->height;
3826 int level, orientation, x, y;
3828 for(level=0; level<s->spatial_decomposition_count; level++){
3829 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3830 SubBand *b= &p->band[level][orientation];
3831 IDWTELEM *ibuf= b->ibuf;
3834 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3835 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3836 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3837 for(y=0; y<height; y++){
3838 for(x=0; x<width; x++){
3839 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3844 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3854 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3855 SubBand *b= &p->band[level][orientation];
3859 int step= 1 << (s->spatial_decomposition_count - level);
3866 //FIXME bias for nonzero ?
3868 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3869 for(y=0; y<p->height; y++){
3870 for(x=0; x<p->width; x++){
3871 int sx= (x-xo + step/2) / step / Q2_STEP;
3872 int sy= (y-yo + step/2) / step / Q2_STEP;
3873 int v= r0[x + y*p->width] - r1[x + y*p->width];
3874 assert(sx>=0 && sy>=0 && sx < score_stride);
3876 score[sx + sy*score_stride] += v*v;
3877 assert(score[sx + sy*score_stride] >= 0);
3882 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3883 int level, orientation;
3885 for(level=0; level<s->spatial_decomposition_count; level++){
3886 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3887 SubBand *b= &p->band[level][orientation];
3888 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3890 dequantize(s, b, dst, b->stride);
3895 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3896 int level, orientation, ys, xs, x, y, pass;
3897 IDWTELEM best_dequant[height * stride];
3898 IDWTELEM idwt2_buffer[height * stride];
3899 const int score_stride= (width + 10)/Q2_STEP;
3900 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3901 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3902 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3904 //FIXME pass the copy cleanly ?
3906 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3907 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3909 for(level=0; level<s->spatial_decomposition_count; level++){
3910 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3911 SubBand *b= &p->band[level][orientation];
3912 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3913 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3914 assert(src == b->buf); // code does not depend on this but it is true currently
3916 quantize(s, b, dst, src, b->stride, s->qbias);
3919 for(pass=0; pass<1; pass++){
3920 if(s->qbias == 0) //keyframe
3922 for(level=0; level<s->spatial_decomposition_count; level++){
3923 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3924 SubBand *b= &p->band[level][orientation];
3925 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3926 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3928 for(ys= 0; ys<Q2_STEP; ys++){
3929 for(xs= 0; xs<Q2_STEP; xs++){
3930 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3931 dequantize_all(s, p, idwt2_buffer, width, height);
3932 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3933 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3934 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3935 for(y=ys; y<b->height; y+= Q2_STEP){
3936 for(x=xs; x<b->width; x+= Q2_STEP){
3937 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3938 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3939 //FIXME try more than just --
3942 dequantize_all(s, p, idwt2_buffer, width, height);
3943 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3944 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3945 for(y=ys; y<b->height; y+= Q2_STEP){
3946 for(x=xs; x<b->width; x+= Q2_STEP){
3947 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3948 if(score[score_idx] <= best_score[score_idx] + threshold){
3949 best_score[score_idx]= score[score_idx];
3950 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3951 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3952 //FIXME copy instead
3961 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3964 #endif /* QUANTIZE2==1 */
3966 static av_cold int encode_init(AVCodecContext *avctx)
3968 SnowContext *s = avctx->priv_data;
3971 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3972 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3973 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3977 if(avctx->prediction_method == DWT_97
3978 && (avctx->flags & CODEC_FLAG_QSCALE)
3979 && avctx->global_quality == 0){
3980 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3984 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3986 s->chroma_h_shift= 1; //FIXME XXX
3987 s->chroma_v_shift= 1;
3989 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3990 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3992 for(plane_index=0; plane_index<3; plane_index++){
3993 s->plane[plane_index].diag_mc= 1;
3994 s->plane[plane_index].htaps= 6;
3995 s->plane[plane_index].hcoeff[0]= 40;
3996 s->plane[plane_index].hcoeff[1]= -10;
3997 s->plane[plane_index].hcoeff[2]= 2;
3998 s->plane[plane_index].fast_mc= 1;
4007 s->m.flags = avctx->flags;
4008 s->m.bit_rate= avctx->bit_rate;
4011 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4012 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4013 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4014 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4015 h263_encode_init(&s->m); //mv_penalty
4017 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4019 if(avctx->flags&CODEC_FLAG_PASS1){
4020 if(!avctx->stats_out)
4021 avctx->stats_out = av_mallocz(256);
4023 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4024 if(ff_rate_control_init(&s->m) < 0)
4027 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4029 avctx->coded_frame= &s->current_picture;
4030 switch(avctx->pix_fmt){
4031 // case PIX_FMT_YUV444P:
4032 // case PIX_FMT_YUV422P:
4033 case PIX_FMT_YUV420P:
4035 // case PIX_FMT_YUV411P:
4036 // case PIX_FMT_YUV410P:
4037 s->colorspace_type= 0;
4039 /* case PIX_FMT_RGB32:
4043 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4046 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4047 s->chroma_h_shift= 1;
4048 s->chroma_v_shift= 1;
4050 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4051 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4053 s->avctx->get_buffer(s->avctx, &s->input_picture);
4055 if(s->avctx->me_method == ME_ITER){
4057 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4058 for(i=0; i<s->max_ref_frames; i++){
4059 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4060 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4067 #define USE_HALFPEL_PLANE 0
4069 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4072 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4076 int w= s->avctx->width >>is_chroma;
4077 int h= s->avctx->height >>is_chroma;
4078 int ls= frame->linesize[p];
4079 uint8_t *src= frame->data[p];
4081 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4082 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4083 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4090 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4097 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4105 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4113 static int frame_start(SnowContext *s){
4115 int w= s->avctx->width; //FIXME round up to x16 ?
4116 int h= s->avctx->height;
4118 if(s->current_picture.data[0]){
4119 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4120 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4121 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4124 tmp= s->last_picture[s->max_ref_frames-1];
4125 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4126 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4127 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4128 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4129 s->last_picture[0]= s->current_picture;
4130 s->current_picture= tmp;
4136 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4137 if(i && s->last_picture[i-1].key_frame)
4142 s->current_picture.reference= 1;
4143 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4144 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4148 s->current_picture.key_frame= s->keyframe;
4153 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4154 SnowContext *s = avctx->priv_data;
4155 RangeCoder * const c= &s->c;
4156 AVFrame *pict = data;
4157 const int width= s->avctx->width;
4158 const int height= s->avctx->height;
4159 int level, orientation, plane_index, i, y;
4160 uint8_t rc_header_bak[sizeof(s->header_state)];
4161 uint8_t rc_block_bak[sizeof(s->block_state)];
4163 ff_init_range_encoder(c, buf, buf_size);
4164 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4168 for(y=0; y<(height>>shift); y++)
4169 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4170 &pict->data[i][y * pict->linesize[i]],
4173 s->new_picture = *pict;
4175 s->m.picture_number= avctx->frame_number;
4176 if(avctx->flags&CODEC_FLAG_PASS2){
4178 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4179 s->keyframe= pict->pict_type==FF_I_TYPE;
4180 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4181 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4182 if (pict->quality < 0)
4186 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4188 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4191 if(s->pass1_rc && avctx->frame_number == 0)
4192 pict->quality= 2*FF_QP2LAMBDA;
4194 s->qlog= qscale2qlog(pict->quality);
4195 s->lambda = pict->quality * 3/2;
4197 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4198 s->qlog= LOSSLESS_QLOG;
4200 }//else keep previous frame's qlog until after motion estimation
4204 s->m.current_picture_ptr= &s->m.current_picture;
4205 if(pict->pict_type == FF_P_TYPE){
4206 int block_width = (width +15)>>4;
4207 int block_height= (height+15)>>4;
4208 int stride= s->current_picture.linesize[0];
4210 assert(s->current_picture.data[0]);
4211 assert(s->last_picture[0].data[0]);
4213 s->m.avctx= s->avctx;
4214 s->m.current_picture.data[0]= s->current_picture.data[0];
4215 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4216 s->m. new_picture.data[0]= s-> input_picture.data[0];
4217 s->m. last_picture_ptr= &s->m. last_picture;
4219 s->m. last_picture.linesize[0]=
4220 s->m. new_picture.linesize[0]=
4221 s->m.current_picture.linesize[0]= stride;
4222 s->m.uvlinesize= s->current_picture.linesize[1];
4224 s->m.height= height;
4225 s->m.mb_width = block_width;
4226 s->m.mb_height= block_height;
4227 s->m.mb_stride= s->m.mb_width+1;
4228 s->m.b8_stride= 2*s->m.mb_width+1;
4230 s->m.pict_type= pict->pict_type;
4231 s->m.me_method= s->avctx->me_method;
4232 s->m.me.scene_change_score=0;
4233 s->m.flags= s->avctx->flags;
4234 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4235 s->m.out_format= FMT_H263;
4236 s->m.unrestricted_mv= 1;
4238 s->m.lambda = s->lambda;
4239 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4240 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4242 s->m.dsp= s->dsp; //move
4248 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4249 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4254 if(pict->pict_type == FF_I_TYPE)
4255 s->spatial_decomposition_count= 5;
4257 s->spatial_decomposition_count= 5;
4259 s->m.pict_type = pict->pict_type;
4260 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4262 common_init_after_header(avctx);
4264 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4265 for(plane_index=0; plane_index<3; plane_index++){
4266 calculate_visual_weight(s, &s->plane[plane_index]);
4271 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4272 encode_blocks(s, 1);
4273 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4275 for(plane_index=0; plane_index<3; plane_index++){
4276 Plane *p= &s->plane[plane_index];
4280 // int bits= put_bits_count(&s->c.pb);
4282 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4284 if(pict->data[plane_index]) //FIXME gray hack
4287 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4290 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4293 && pict->pict_type == FF_P_TYPE
4294 && !(avctx->flags&CODEC_FLAG_PASS2)
4295 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4296 ff_init_range_encoder(c, buf, buf_size);
4297 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4298 pict->pict_type= FF_I_TYPE;
4300 s->current_picture.key_frame=1;
4304 if(s->qlog == LOSSLESS_QLOG){
4307 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4313 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4319 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4321 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4323 if(s->pass1_rc && plane_index==0){
4324 int delta_qlog = ratecontrol_1pass(s, pict);
4325 if (delta_qlog <= INT_MIN)
4328 //reordering qlog in the bitstream would eliminate this reset
4329 ff_init_range_encoder(c, buf, buf_size);
4330 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4331 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4333 encode_blocks(s, 0);
4337 for(level=0; level<s->spatial_decomposition_count; level++){
4338 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4339 SubBand *b= &p->band[level][orientation];
4342 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4344 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4345 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4346 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4348 correlate(s, b, b->ibuf, b->stride, 1, 0);
4352 for(level=0; level<s->spatial_decomposition_count; level++){
4353 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4354 SubBand *b= &p->band[level][orientation];
4356 dequantize(s, b, b->ibuf, b->stride);
4360 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4361 if(s->qlog == LOSSLESS_QLOG){
4364 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4368 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4371 if(pict->pict_type == FF_I_TYPE){
4374 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4375 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4379 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4380 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4383 if(s->avctx->flags&CODEC_FLAG_PSNR){
4386 if(pict->data[plane_index]) //FIXME gray hack
4389 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4393 s->avctx->error[plane_index] += error;
4394 s->current_picture.error[plane_index] = error;
4399 update_last_header_values(s);
4401 if(s->last_picture[s->max_ref_frames-1].data[0]){
4402 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4404 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4405 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4408 s->current_picture.coded_picture_number = avctx->frame_number;
4409 s->current_picture.pict_type = pict->pict_type;
4410 s->current_picture.quality = pict->quality;
4411 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4412 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4413 s->m.current_picture.display_picture_number =
4414 s->m.current_picture.coded_picture_number = avctx->frame_number;
4415 s->m.current_picture.quality = pict->quality;
4416 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4418 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4420 if(avctx->flags&CODEC_FLAG_PASS1)
4421 ff_write_pass1_stats(&s->m);
4422 s->m.last_pict_type = s->m.pict_type;
4423 avctx->frame_bits = s->m.frame_bits;
4424 avctx->mv_bits = s->m.mv_bits;
4425 avctx->misc_bits = s->m.misc_bits;
4426 avctx->p_tex_bits = s->m.p_tex_bits;
4430 return ff_rac_terminate(c);
4433 static av_cold void common_end(SnowContext *s){
4434 int plane_index, level, orientation, i;
4436 av_freep(&s->spatial_dwt_buffer);
4437 av_freep(&s->spatial_idwt_buffer);
4440 av_freep(&s->m.me.scratchpad);
4441 av_freep(&s->m.me.map);
4442 av_freep(&s->m.me.score_map);
4443 av_freep(&s->m.obmc_scratchpad);
4445 av_freep(&s->block);
4446 av_freep(&s->scratchbuf);
4448 for(i=0; i<MAX_REF_FRAMES; i++){
4449 av_freep(&s->ref_mvs[i]);
4450 av_freep(&s->ref_scores[i]);
4451 if(s->last_picture[i].data[0])
4452 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4455 for(plane_index=0; plane_index<3; plane_index++){
4456 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4457 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4458 SubBand *b= &s->plane[plane_index].band[level][orientation];
4460 av_freep(&b->x_coeff);
4466 static av_cold int encode_end(AVCodecContext *avctx)
4468 SnowContext *s = avctx->priv_data;
4471 av_free(avctx->stats_out);
4476 static av_cold int decode_init(AVCodecContext *avctx)
4478 avctx->pix_fmt= PIX_FMT_YUV420P;
4485 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4486 SnowContext *s = avctx->priv_data;
4487 RangeCoder * const c= &s->c;
4489 AVFrame *picture = data;
4490 int level, orientation, plane_index, i;
4492 ff_init_range_decoder(c, buf, buf_size);
4493 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4495 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4496 if(decode_header(s)<0)
4498 common_init_after_header(avctx);
4500 // realloc slice buffer for the case that spatial_decomposition_count changed
4501 slice_buffer_destroy(&s->sb);
4502 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4504 for(plane_index=0; plane_index<3; plane_index++){
4505 Plane *p= &s->plane[plane_index];
4506 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4507 && p->hcoeff[1]==-10
4511 if(!s->block) alloc_blocks(s);
4514 //keyframe flag duplication mess FIXME
4515 if(avctx->debug&FF_DEBUG_PICT_INFO)
4516 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4520 for(plane_index=0; plane_index<3; plane_index++){
4521 Plane *p= &s->plane[plane_index];
4525 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4527 if(s->avctx->debug&2048){
4528 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4529 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4533 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4534 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4540 for(level=0; level<s->spatial_decomposition_count; level++){
4541 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4542 SubBand *b= &p->band[level][orientation];
4543 unpack_coeffs(s, b, b->parent, orientation);
4549 const int mb_h= s->b_height << s->block_max_depth;
4550 const int block_size = MB_SIZE >> s->block_max_depth;
4551 const int block_w = plane_index ? block_size/2 : block_size;
4553 DWTCompose cs[MAX_DECOMPOSITIONS];
4558 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4559 for(mb_y=0; mb_y<=mb_h; mb_y++){
4561 int slice_starty = block_w*mb_y;
4562 int slice_h = block_w*(mb_y+1);
4563 if (!(s->keyframe || s->avctx->debug&512)){
4564 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4565 slice_h -= (block_w >> 1);
4568 for(level=0; level<s->spatial_decomposition_count; level++){
4569 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4570 SubBand *b= &p->band[level][orientation];
4573 int our_mb_start = mb_y;
4574 int our_mb_end = (mb_y + 1);
4576 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4577 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4578 if (!(s->keyframe || s->avctx->debug&512)){
4579 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4580 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4582 start_y = FFMIN(b->height, start_y);
4583 end_y = FFMIN(b->height, end_y);
4585 if (start_y != end_y){
4586 if (orientation == 0){
4587 SubBand * correlate_band = &p->band[0][0];
4588 int correlate_end_y = FFMIN(b->height, end_y + 1);
4589 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4590 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4591 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4592 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4595 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4600 for(; yd<slice_h; yd+=4){
4601 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4604 if(s->qlog == LOSSLESS_QLOG){
4605 for(; yq<slice_h && yq<h; yq++){
4606 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4608 line[x] <<= FRAC_BITS;
4613 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4615 y = FFMIN(p->height, slice_starty);
4616 end_y = FFMIN(p->height, slice_h);
4618 slice_buffer_release(&s->sb, y++);
4621 slice_buffer_flush(&s->sb);
4628 if(s->last_picture[s->max_ref_frames-1].data[0]){
4629 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4631 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4632 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4635 if(!(s->avctx->debug&2048))
4636 *picture= s->current_picture;
4638 *picture= s->mconly_picture;
4640 *data_size = sizeof(AVFrame);
4642 bytes_read= c->bytestream - c->bytestream_start;
4643 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4648 static av_cold int decode_end(AVCodecContext *avctx)
4650 SnowContext *s = avctx->priv_data;
4652 slice_buffer_destroy(&s->sb);
4659 AVCodec snow_decoder = {
4663 sizeof(SnowContext),
4668 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4670 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4673 #ifdef CONFIG_SNOW_ENCODER
4674 AVCodec snow_encoder = {
4678 sizeof(SnowContext),
4682 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4696 int buffer[2][width*height];
4699 s.spatial_decomposition_count=6;
4700 s.spatial_decomposition_type=1;
4702 printf("testing 5/3 DWT\n");
4703 for(i=0; i<width*height; i++)
4704 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4706 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4707 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4709 for(i=0; i<width*height; i++)
4710 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4712 printf("testing 9/7 DWT\n");
4713 s.spatial_decomposition_type=0;
4714 for(i=0; i<width*height; i++)
4715 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4717 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4718 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4720 for(i=0; i<width*height; i++)
4721 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4724 printf("testing AC coder\n");
4725 memset(s.header_state, 0, sizeof(s.header_state));
4726 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4727 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4729 for(i=-256; i<256; i++){
4730 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4732 ff_rac_terminate(&s.c);
4734 memset(s.header_state, 0, sizeof(s.header_state));
4735 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4736 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4738 for(i=-256; i<256; i++){
4740 j= get_symbol(&s.c, s.header_state, 1);
4741 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4745 int level, orientation, x, y;
4746 int64_t errors[8][4];
4749 memset(errors, 0, sizeof(errors));
4750 s.spatial_decomposition_count=3;
4751 s.spatial_decomposition_type=0;
4752 for(level=0; level<s.spatial_decomposition_count; level++){
4753 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4754 int w= width >> (s.spatial_decomposition_count-level);
4755 int h= height >> (s.spatial_decomposition_count-level);
4756 int stride= width << (s.spatial_decomposition_count-level);
4757 DWTELEM *buf= buffer[0];
4760 if(orientation&1) buf+=w;
4761 if(orientation>1) buf+=stride>>1;
4763 memset(buffer[0], 0, sizeof(int)*width*height);
4764 buf[w/2 + h/2*stride]= 256*256;
4765 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4766 for(y=0; y<height; y++){
4767 for(x=0; x<width; x++){
4768 int64_t d= buffer[0][x + y*width];
4770 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4772 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4774 error= (int)(sqrt(error)+0.5);
4775 errors[level][orientation]= error;
4776 if(g) g=ff_gcd(g, error);
4780 printf("static int const visual_weight[][4]={\n");
4781 for(level=0; level<s.spatial_decomposition_count; level++){
4783 for(orientation=0; orientation<4; orientation++){
4784 printf("%8"PRId64",", errors[level][orientation]/g);
4791 int w= width >> (s.spatial_decomposition_count-level);
4792 //int h= height >> (s.spatial_decomposition_count-level);
4793 int stride= width << (s.spatial_decomposition_count-level);
4794 DWTELEM *buf= buffer[0];
4800 memset(buffer[0], 0, sizeof(int)*width*height);
4802 for(y=0; y<height; y++){
4803 for(x=0; x<width; x++){
4804 int tab[4]={0,2,3,1};
4805 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4808 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4812 buf[x + y*stride ]=169;
4813 buf[x + y*stride-w]=64;
4816 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4818 for(y=0; y<height; y++){
4819 for(x=0; x<width; x++){
4820 int64_t d= buffer[0][x + y*width];
4822 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4824 if(FFABS(height/2-y)<9) printf("\n");