2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
468 int spatial_scalability;
478 #define QBIAS_SHIFT 3
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
504 static void iterative_me(SnowContext *s);
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
517 for(i = 0; i < max_allocated_lines; i++){
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 buf->data_stack_top = max_allocated_lines - 1;
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
529 assert(buf->data_stack_top >= 0);
530 // assert(!buf->line[line]);
532 return buf->line[line];
534 offset = buf->line_width * line;
535 buffer = buf->data_stack[buf->data_stack_top];
536 buf->data_stack_top--;
537 buf->line[line] = buffer;
542 static void slice_buffer_release(slice_buffer * buf, int line)
547 assert(line >= 0 && line < buf->line_count);
548 assert(buf->line[line]);
550 offset = buf->line_width * line;
551 buffer = buf->line[line];
552 buf->data_stack_top++;
553 buf->data_stack[buf->data_stack_top] = buffer;
554 buf->line[line] = NULL;
557 static void slice_buffer_flush(slice_buffer * buf)
560 for(i = 0; i < buf->line_count; i++){
562 slice_buffer_release(buf, i);
566 static void slice_buffer_destroy(slice_buffer * buf)
569 slice_buffer_flush(buf);
571 for(i = buf->data_count - 1; i >= 0; i--){
572 av_freep(&buf->data_stack[i]);
574 av_freep(&buf->data_stack);
575 av_freep(&buf->line);
579 // Avoid a name clash on SGI IRIX
582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583 static uint8_t qexp[QROOT];
585 static inline int mirror(int v, int m){
586 while((unsigned)v > (unsigned)m){
593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597 const int a= FFABS(v);
598 const int e= av_log2(a);
600 const int el= FFMIN(e, 10);
601 put_rac(c, state+0, 0);
604 put_rac(c, state+1+i, 1); //1..10
607 put_rac(c, state+1+9, 1); //1..10
609 put_rac(c, state+1+FFMIN(i,9), 0);
611 for(i=e-1; i>=el; i--){
612 put_rac(c, state+22+9, (a>>i)&1); //22..31
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
619 put_rac(c, state+11 + el, v < 0); //11..21
622 put_rac(c, state+0, 0);
625 put_rac(c, state+1+i, 1); //1..10
627 put_rac(c, state+1+i, 0);
629 for(i=e-1; i>=0; i--){
630 put_rac(c, state+22+i, (a>>i)&1); //22..31
634 put_rac(c, state+11 + e, v < 0); //11..21
637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
639 put_rac(c, state+1+FFMIN(i,9), 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650 put_rac(c, state+0, 1);
654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655 if(get_rac(c, state+0))
660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
665 for(i=e-1; i>=0; i--){
666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
678 int r= log2>=0 ? 1<<log2 : 1;
684 put_rac(c, state+4+log2, 1);
689 put_rac(c, state+4+log2, 0);
691 for(i=log2-1; i>=0; i--){
692 put_rac(c, state+31-i, (v>>i)&1);
696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
698 int r= log2>=0 ? 1<<log2 : 1;
703 while(get_rac(c, state+4+log2)){
709 for(i=log2-1; i>=0; i--){
710 v+= get_rac(c, state+31-i)<<i;
716 static av_always_inline void
717 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
718 int dst_step, int src_step, int ref_step,
719 int width, int mul, int add, int shift,
720 int highpass, int inverse){
721 const int mirror_left= !highpass;
722 const int mirror_right= (width&1) ^ highpass;
723 const int w= (width>>1) - 1 + (highpass & width);
726 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
728 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
735 LIFT(src[i*src_step],
736 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
742 LIFT(src[w*src_step],
743 ((mul*2*ref[w*ref_step]+add)>>shift),
748 static av_always_inline void
749 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
750 int dst_step, int src_step, int ref_step,
751 int width, int mul, int add, int shift,
752 int highpass, int inverse){
753 const int mirror_left= !highpass;
754 const int mirror_right= (width&1) ^ highpass;
755 const int w= (width>>1) - 1 + (highpass & width);
758 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
760 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
767 LIFT(src[i*src_step],
768 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
774 LIFT(src[w*src_step],
775 ((mul*2*ref[w*ref_step]+add)>>shift),
781 static av_always_inline void
782 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
783 int dst_step, int src_step, int ref_step,
784 int width, int mul, int add, int shift,
785 int highpass, int inverse){
786 const int mirror_left= !highpass;
787 const int mirror_right= (width&1) ^ highpass;
788 const int w= (width>>1) - 1 + (highpass & width);
792 #define LIFTS(src, ref, inv) \
794 (src) + (((ref) + 4*(src))>>shift): \
795 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
797 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
804 LIFTS(src[i*src_step],
805 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
811 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
814 static av_always_inline void
815 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
816 int dst_step, int src_step, int ref_step,
817 int width, int mul, int add, int shift,
818 int highpass, int inverse){
819 const int mirror_left= !highpass;
820 const int mirror_right= (width&1) ^ highpass;
821 const int w= (width>>1) - 1 + (highpass & width);
825 #define LIFTS(src, ref, inv) \
827 (src) + (((ref) + 4*(src))>>shift): \
828 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
830 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
837 LIFTS(src[i*src_step],
838 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
844 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
849 static void horizontal_decompose53i(DWTELEM *b, int width){
851 const int width2= width>>1;
853 const int w2= (width+1)>>1;
855 for(x=0; x<width2; x++){
857 temp[x+w2]= b[2*x + 1];
871 for(x=1; x+1<width2; x+=2){
875 A2 += (A1 + A3 + 2)>>2;
879 A1= temp[x+1+width2];
882 A4 += (A1 + A3 + 2)>>2;
888 A2 += (A1 + A3 + 2)>>2;
893 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
894 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
898 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
901 for(i=0; i<width; i++){
902 b1[i] -= (b0[i] + b2[i])>>1;
906 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
909 for(i=0; i<width; i++){
910 b1[i] += (b0[i] + b2[i] + 2)>>2;
914 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
916 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
917 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
919 for(y=-2; y<height; y+=2){
920 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
921 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
923 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
924 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
926 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
927 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
934 static void horizontal_decompose97i(DWTELEM *b, int width){
936 const int w2= (width+1)>>1;
938 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
939 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
940 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
941 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
945 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
948 for(i=0; i<width; i++){
949 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
953 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
956 for(i=0; i<width; i++){
957 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
961 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
964 for(i=0; i<width; i++){
966 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
968 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
973 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
976 for(i=0; i<width; i++){
977 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
981 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
983 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
984 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
985 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
986 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
988 for(y=-4; y<height; y+=2){
989 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
990 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
992 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
993 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
995 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
996 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
997 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
998 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1007 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1010 for(level=0; level<decomposition_count; level++){
1012 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1013 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1018 static void horizontal_compose53i(IDWTELEM *b, int width){
1019 IDWTELEM temp[width];
1020 const int width2= width>>1;
1021 const int w2= (width+1)>>1;
1033 for(x=1; x+1<width2; x+=2){
1037 A2 += (A1 + A3 + 2)>>2;
1041 A1= temp[x+1+width2];
1044 A4 += (A1 + A3 + 2)>>2;
1050 A2 += (A1 + A3 + 2)>>2;
1054 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1055 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1057 for(x=0; x<width2; x++){
1059 b[2*x + 1]= temp[x+w2];
1065 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1068 for(i=0; i<width; i++){
1069 b1[i] += (b0[i] + b2[i])>>1;
1073 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1076 for(i=0; i<width; i++){
1077 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1081 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1082 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1083 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1087 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1088 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1089 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1093 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1096 IDWTELEM *b0= cs->b0;
1097 IDWTELEM *b1= cs->b1;
1098 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1099 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1101 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1102 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1104 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1105 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1112 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1114 IDWTELEM *b0= cs->b0;
1115 IDWTELEM *b1= cs->b1;
1116 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1117 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1119 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1120 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1122 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1123 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1130 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1132 spatial_compose53i_init(&cs, buffer, height, stride);
1133 while(cs.y <= height)
1134 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1138 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1139 IDWTELEM temp[width];
1140 const int w2= (width+1)>>1;
1142 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1143 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1144 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1145 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1148 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1151 for(i=0; i<width; i++){
1152 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1156 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1159 for(i=0; i<width; i++){
1160 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1164 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1167 for(i=0; i<width; i++){
1169 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1171 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1176 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1179 for(i=0; i<width; i++){
1180 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1184 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1187 for(i=0; i<width; i++){
1188 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1189 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1191 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1193 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1195 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1199 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1200 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1201 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1202 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1203 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1207 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1208 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1209 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1210 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1211 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1215 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1218 IDWTELEM *b0= cs->b0;
1219 IDWTELEM *b1= cs->b1;
1220 IDWTELEM *b2= cs->b2;
1221 IDWTELEM *b3= cs->b3;
1222 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1223 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1225 if(y>0 && y+4<height){
1226 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1228 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1229 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1230 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1231 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1234 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1235 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1244 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1246 IDWTELEM *b0= cs->b0;
1247 IDWTELEM *b1= cs->b1;
1248 IDWTELEM *b2= cs->b2;
1249 IDWTELEM *b3= cs->b3;
1250 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1251 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1253 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1254 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1255 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1256 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1258 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1259 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1268 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1270 spatial_compose97i_init(&cs, buffer, height, stride);
1271 while(cs.y <= height)
1272 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1275 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1277 for(level=decomposition_count-1; level>=0; level--){
1279 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1280 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1285 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1287 for(level=decomposition_count-1; level>=0; level--){
1289 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1290 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1295 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1296 const int support = type==1 ? 3 : 5;
1300 for(level=decomposition_count-1; level>=0; level--){
1301 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1303 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1305 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1312 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1313 const int support = type==1 ? 3 : 5;
1317 for(level=decomposition_count-1; level>=0; level--){
1318 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1320 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1322 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1329 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1330 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1332 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1333 for(y=0; y<height; y+=4)
1334 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1337 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1338 const int w= b->width;
1339 const int h= b->height;
1351 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1352 v= src[x + y*stride];
1355 t= src[x + (y-1)*stride];
1357 lt= src[x - 1 + (y-1)*stride];
1360 rt= src[x + 1 + (y-1)*stride];
1364 l= src[x - 1 + y*stride];
1366 if(orientation==1) ll= src[y + (x-2)*stride];
1367 else ll= src[x - 2 + y*stride];
1373 if(px<b->parent->width && py<b->parent->height)
1374 p= parent[px + py*2*stride];
1376 if(!(/*ll|*/l|lt|t|rt|p)){
1378 runs[run_index++]= run;
1386 max_index= run_index;
1387 runs[run_index++]= run;
1389 run= runs[run_index++];
1391 put_symbol2(&s->c, b->state[30], max_index, 0);
1392 if(run_index <= max_index)
1393 put_symbol2(&s->c, b->state[1], run, 3);
1396 if(s->c.bytestream_end - s->c.bytestream < w*40){
1397 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1402 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1403 v= src[x + y*stride];
1406 t= src[x + (y-1)*stride];
1408 lt= src[x - 1 + (y-1)*stride];
1411 rt= src[x + 1 + (y-1)*stride];
1415 l= src[x - 1 + y*stride];
1417 if(orientation==1) ll= src[y + (x-2)*stride];
1418 else ll= src[x - 2 + y*stride];
1424 if(px<b->parent->width && py<b->parent->height)
1425 p= parent[px + py*2*stride];
1427 if(/*ll|*/l|lt|t|rt|p){
1428 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1430 put_rac(&s->c, &b->state[0][context], !!v);
1433 run= runs[run_index++];
1435 if(run_index <= max_index)
1436 put_symbol2(&s->c, b->state[1], run, 3);
1444 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1445 int l2= 2*FFABS(l) + (l<0);
1446 int t2= 2*FFABS(t) + (t<0);
1448 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1449 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1457 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1458 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1459 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1460 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1461 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1464 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1465 const int w= b->width;
1466 const int h= b->height;
1471 x_and_coeff *xc= b->x_coeff;
1472 x_and_coeff *prev_xc= NULL;
1473 x_and_coeff *prev2_xc= xc;
1474 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1475 x_and_coeff *prev_parent_xc= parent_xc;
1477 runs= get_symbol2(&s->c, b->state[30], 0);
1478 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1483 int lt=0, t=0, rt=0;
1485 if(y && prev_xc->x == 0){
1497 if(prev_xc->x == x + 1)
1503 if(x>>1 > parent_xc->x){
1506 if(x>>1 == parent_xc->x){
1507 p= parent_xc->coeff;
1510 if(/*ll|*/l|lt|t|rt|p){
1511 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1513 v=get_rac(&s->c, &b->state[0][context]);
1515 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1516 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1525 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1526 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1535 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1536 else max_run= FFMIN(run, w-x-1);
1538 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1544 (xc++)->x= w+1; //end marker
1550 while(parent_xc->x != parent->width+1)
1553 prev_parent_xc= parent_xc;
1555 parent_xc= prev_parent_xc;
1560 (xc++)->x= w+1; //end marker
1564 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1565 const int w= b->width;
1567 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1568 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1569 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1572 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1577 /* If we are on the second or later slice, restore our index. */
1579 new_index = save_state[0];
1582 for(y=start_y; y<h; y++){
1585 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1586 memset(line, 0, b->width*sizeof(IDWTELEM));
1587 v = b->x_coeff[new_index].coeff;
1588 x = b->x_coeff[new_index++].x;
1590 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1591 register int u= -(v&1);
1592 line[x] = (t^u) - u;
1594 v = b->x_coeff[new_index].coeff;
1595 x = b->x_coeff[new_index++].x;
1599 /* Save our variables for the next slice. */
1600 save_state[0] = new_index;
1605 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1606 int plane_index, level, orientation;
1608 for(plane_index=0; plane_index<3; plane_index++){
1609 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1610 for(orientation=level ? 1:0; orientation<4; orientation++){
1611 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1615 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1616 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1619 static int alloc_blocks(SnowContext *s){
1620 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1621 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1626 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1630 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1631 uint8_t *bytestream= d->bytestream;
1632 uint8_t *bytestream_start= d->bytestream_start;
1634 d->bytestream= bytestream;
1635 d->bytestream_start= bytestream_start;
1638 //near copy & paste from dsputil, FIXME
1639 static int pix_sum(uint8_t * pix, int line_size, int w)
1644 for (i = 0; i < w; i++) {
1645 for (j = 0; j < w; j++) {
1649 pix += line_size - w;
1654 //near copy & paste from dsputil, FIXME
1655 static int pix_norm1(uint8_t * pix, int line_size, int w)
1658 uint32_t *sq = ff_squareTbl + 256;
1661 for (i = 0; i < w; i++) {
1662 for (j = 0; j < w; j ++) {
1666 pix += line_size - w;
1671 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1672 const int w= s->b_width << s->block_max_depth;
1673 const int rem_depth= s->block_max_depth - level;
1674 const int index= (x + y*w) << rem_depth;
1675 const int block_w= 1<<rem_depth;
1688 for(j=0; j<block_w; j++){
1689 for(i=0; i<block_w; i++){
1690 s->block[index + i + j*w]= block;
1695 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1696 const int offset[3]= {
1698 ((y*c->uvstride + x)>>1),
1699 ((y*c->uvstride + x)>>1),
1703 c->src[0][i]= src [i];
1704 c->ref[0][i]= ref [i] + offset[i];
1709 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1710 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1711 if(s->ref_frames == 1){
1712 *mx = mid_pred(left->mx, top->mx, tr->mx);
1713 *my = mid_pred(left->my, top->my, tr->my);
1715 const int *scale = scale_mv_ref[ref];
1716 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1717 (top ->mx * scale[top ->ref] + 128) >>8,
1718 (tr ->mx * scale[tr ->ref] + 128) >>8);
1719 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1720 (top ->my * scale[top ->ref] + 128) >>8,
1721 (tr ->my * scale[tr ->ref] + 128) >>8);
1728 #define P_TOPRIGHT P[3]
1729 #define P_MEDIAN P[4]
1731 #define FLAG_QPEL 1 //must be 1
1733 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1734 uint8_t p_buffer[1024];
1735 uint8_t i_buffer[1024];
1736 uint8_t p_state[sizeof(s->block_state)];
1737 uint8_t i_state[sizeof(s->block_state)];
1739 uint8_t *pbbak= s->c.bytestream;
1740 uint8_t *pbbak_start= s->c.bytestream_start;
1741 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1742 const int w= s->b_width << s->block_max_depth;
1743 const int h= s->b_height << s->block_max_depth;
1744 const int rem_depth= s->block_max_depth - level;
1745 const int index= (x + y*w) << rem_depth;
1746 const int block_w= 1<<(LOG2_MB_SIZE - level);
1747 int trx= (x+1)<<rem_depth;
1748 int try= (y+1)<<rem_depth;
1749 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1750 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1751 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1752 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1753 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1754 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1755 int pl = left->color[0];
1756 int pcb= left->color[1];
1757 int pcr= left->color[2];
1761 const int stride= s->current_picture.linesize[0];
1762 const int uvstride= s->current_picture.linesize[1];
1763 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1764 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1765 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1767 int16_t last_mv[3][2];
1768 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1769 const int shift= 1+qpel;
1770 MotionEstContext *c= &s->m.me;
1771 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1772 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1773 int my_context= av_log2(2*FFABS(left->my - top->my));
1774 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1775 int ref, best_ref, ref_score, ref_mx, ref_my;
1777 assert(sizeof(s->block_state) >= 256);
1779 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1783 // clip predictors / edge ?
1785 P_LEFT[0]= left->mx;
1786 P_LEFT[1]= left->my;
1789 P_TOPRIGHT[0]= tr->mx;
1790 P_TOPRIGHT[1]= tr->my;
1792 last_mv[0][0]= s->block[index].mx;
1793 last_mv[0][1]= s->block[index].my;
1794 last_mv[1][0]= right->mx;
1795 last_mv[1][1]= right->my;
1796 last_mv[2][0]= bottom->mx;
1797 last_mv[2][1]= bottom->my;
1804 assert(c-> stride == stride);
1805 assert(c->uvstride == uvstride);
1807 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1808 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1809 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1810 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1812 c->xmin = - x*block_w - 16+2;
1813 c->ymin = - y*block_w - 16+2;
1814 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1815 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1818 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1819 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1820 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1821 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1822 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1823 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1825 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1826 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1829 c->pred_x= P_LEFT[0];
1830 c->pred_y= P_LEFT[1];
1832 c->pred_x = P_MEDIAN[0];
1833 c->pred_y = P_MEDIAN[1];
1838 for(ref=0; ref<s->ref_frames; ref++){
1839 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1841 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1842 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1844 assert(ref_mx >= c->xmin);
1845 assert(ref_mx <= c->xmax);
1846 assert(ref_my >= c->ymin);
1847 assert(ref_my <= c->ymax);
1849 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1850 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1851 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1852 if(s->ref_mvs[ref]){
1853 s->ref_mvs[ref][index][0]= ref_mx;
1854 s->ref_mvs[ref][index][1]= ref_my;
1855 s->ref_scores[ref][index]= ref_score;
1857 if(score > ref_score){
1864 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1867 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1869 pc.bytestream_start=
1870 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1871 memcpy(p_state, s->block_state, sizeof(s->block_state));
1873 if(level!=s->block_max_depth)
1874 put_rac(&pc, &p_state[4 + s_context], 1);
1875 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1876 if(s->ref_frames > 1)
1877 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1878 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1879 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1880 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1881 p_len= pc.bytestream - pc.bytestream_start;
1882 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1884 block_s= block_w*block_w;
1885 sum = pix_sum(current_data[0], stride, block_w);
1886 l= (sum + block_s/2)/block_s;
1887 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1889 block_s= block_w*block_w>>2;
1890 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1891 cb= (sum + block_s/2)/block_s;
1892 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1893 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1894 cr= (sum + block_s/2)/block_s;
1895 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1898 ic.bytestream_start=
1899 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1900 memcpy(i_state, s->block_state, sizeof(s->block_state));
1901 if(level!=s->block_max_depth)
1902 put_rac(&ic, &i_state[4 + s_context], 1);
1903 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1904 put_symbol(&ic, &i_state[32], l-pl , 1);
1905 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1906 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1907 i_len= ic.bytestream - ic.bytestream_start;
1908 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1910 // assert(score==256*256*256*64-1);
1911 assert(iscore < 255*255*256 + s->lambda2*10);
1912 assert(iscore >= 0);
1913 assert(l>=0 && l<=255);
1914 assert(pl>=0 && pl<=255);
1917 int varc= iscore >> 8;
1918 int vard= score >> 8;
1919 if (vard <= 64 || vard < varc)
1920 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1922 c->scene_change_score+= s->m.qscale;
1925 if(level!=s->block_max_depth){
1926 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1927 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1928 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1929 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1931 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1933 if(score2 < score && score2 < iscore)
1938 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1939 memcpy(pbbak, i_buffer, i_len);
1941 s->c.bytestream_start= pbbak_start;
1942 s->c.bytestream= pbbak + i_len;
1943 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1944 memcpy(s->block_state, i_state, sizeof(s->block_state));
1947 memcpy(pbbak, p_buffer, p_len);
1949 s->c.bytestream_start= pbbak_start;
1950 s->c.bytestream= pbbak + p_len;
1951 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1952 memcpy(s->block_state, p_state, sizeof(s->block_state));
1957 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1958 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1959 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1961 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1965 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1966 const int w= s->b_width << s->block_max_depth;
1967 const int rem_depth= s->block_max_depth - level;
1968 const int index= (x + y*w) << rem_depth;
1969 int trx= (x+1)<<rem_depth;
1970 BlockNode *b= &s->block[index];
1971 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1972 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1973 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1974 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1975 int pl = left->color[0];
1976 int pcb= left->color[1];
1977 int pcr= left->color[2];
1979 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1980 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1981 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1982 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1985 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1989 if(level!=s->block_max_depth){
1990 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1991 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1993 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1994 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1995 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2001 if(b->type & BLOCK_INTRA){
2002 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2003 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2004 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2005 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2006 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2007 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2009 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2010 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2011 if(s->ref_frames > 1)
2012 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2013 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2014 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2015 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2019 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2020 const int w= s->b_width << s->block_max_depth;
2021 const int rem_depth= s->block_max_depth - level;
2022 const int index= (x + y*w) << rem_depth;
2023 int trx= (x+1)<<rem_depth;
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2031 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2035 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2037 int l = left->color[0];
2038 int cb= left->color[1];
2039 int cr= left->color[2];
2041 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2042 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2043 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2045 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2048 pred_mv(s, &mx, &my, 0, left, top, tr);
2049 l += get_symbol(&s->c, &s->block_state[32], 1);
2050 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2051 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2053 if(s->ref_frames > 1)
2054 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2055 pred_mv(s, &mx, &my, ref, left, top, tr);
2056 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2057 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2059 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2061 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2062 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2068 static void encode_blocks(SnowContext *s, int search){
2073 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2077 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2078 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2082 if(s->avctx->me_method == ME_ITER || !search)
2083 encode_q_branch2(s, 0, x, y);
2085 encode_q_branch (s, 0, x, y);
2090 static void decode_blocks(SnowContext *s){
2097 decode_q_branch(s, 0, x, y);
2102 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2103 const static uint8_t weight[64]={
2114 const static uint8_t brane[256]={
2115 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2116 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2117 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2118 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2119 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2120 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2121 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2122 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2123 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2124 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2125 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2126 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2127 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2128 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2129 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2130 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2133 const static uint8_t needs[16]={
2141 int16_t tmpIt [64*(32+HTAPS_MAX)];
2142 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2143 int16_t *tmpI= tmpIt;
2144 uint8_t *tmp2= tmp2t[0];
2145 const uint8_t *hpel[11];
2146 assert(dx<16 && dy<16);
2147 r= brane[dx + 16*dy]&15;
2148 l= brane[dx + 16*dy]>>4;
2150 b= needs[l] | needs[r];
2151 if(p && !p->diag_mc)
2155 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2156 for(x=0; x < b_w; x++){
2157 int a_1=src[x + HTAPS_MAX/2-4];
2158 int a0= src[x + HTAPS_MAX/2-3];
2159 int a1= src[x + HTAPS_MAX/2-2];
2160 int a2= src[x + HTAPS_MAX/2-1];
2161 int a3= src[x + HTAPS_MAX/2+0];
2162 int a4= src[x + HTAPS_MAX/2+1];
2163 int a5= src[x + HTAPS_MAX/2+2];
2164 int a6= src[x + HTAPS_MAX/2+3];
2166 if(!p || p->fast_mc){
2167 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2171 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2176 if(am&(~255)) am= ~(am>>31);
2185 src += HTAPS_MAX/2 - 1;
2189 for(y=0; y < b_h; y++){
2190 for(x=0; x < b_w+1; x++){
2191 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2192 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2193 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2194 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2195 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2196 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2197 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2198 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2200 if(!p || p->fast_mc)
2201 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2203 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2205 if(am&(~255)) am= ~(am>>31);
2213 src += stride*(HTAPS_MAX/2 - 1);
2217 for(y=0; y < b_h; y++){
2218 for(x=0; x < b_w; x++){
2219 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2220 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2221 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2222 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2223 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2224 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2225 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2226 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2228 if(!p || p->fast_mc)
2229 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2231 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2232 if(am&(~255)) am= ~(am>>31);
2241 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2246 hpel[ 6]= tmp2t[1] + 1;
2248 hpel[ 8]= src + stride;
2249 hpel[ 9]= hpel[1] + stride;
2250 hpel[10]= hpel[8] + 1;
2253 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2254 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2255 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2256 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2259 for(y=0; y < b_h; y++){
2260 for(x=0; x < b_w; x++){
2261 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2262 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2271 const uint8_t *src1= hpel[l];
2272 const uint8_t *src2= hpel[r];
2273 int a= weight[((dx&7) + (8*(dy&7)))];
2275 for(y=0; y < b_h; y++){
2276 for(x=0; x < b_w; x++){
2277 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2286 #define mca(dx,dy,b_w)\
2287 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2288 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2290 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2302 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2303 if(block->type & BLOCK_INTRA){
2305 const int color = block->color[plane_index];
2306 const int color4= color*0x01010101;
2308 for(y=0; y < b_h; y++){
2309 *(uint32_t*)&dst[0 + y*stride]= color4;
2310 *(uint32_t*)&dst[4 + y*stride]= color4;
2311 *(uint32_t*)&dst[8 + y*stride]= color4;
2312 *(uint32_t*)&dst[12+ y*stride]= color4;
2313 *(uint32_t*)&dst[16+ y*stride]= color4;
2314 *(uint32_t*)&dst[20+ y*stride]= color4;
2315 *(uint32_t*)&dst[24+ y*stride]= color4;
2316 *(uint32_t*)&dst[28+ y*stride]= color4;
2319 for(y=0; y < b_h; y++){
2320 *(uint32_t*)&dst[0 + y*stride]= color4;
2321 *(uint32_t*)&dst[4 + y*stride]= color4;
2322 *(uint32_t*)&dst[8 + y*stride]= color4;
2323 *(uint32_t*)&dst[12+ y*stride]= color4;
2326 for(y=0; y < b_h; y++){
2327 *(uint32_t*)&dst[0 + y*stride]= color4;
2328 *(uint32_t*)&dst[4 + y*stride]= color4;
2331 for(y=0; y < b_h; y++){
2332 *(uint32_t*)&dst[0 + y*stride]= color4;
2335 for(y=0; y < b_h; y++){
2336 for(x=0; x < b_w; x++){
2337 dst[x + y*stride]= color;
2342 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2343 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2344 int mx= block->mx*scale;
2345 int my= block->my*scale;
2346 const int dx= mx&15;
2347 const int dy= my&15;
2348 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2349 sx += (mx>>4) - (HTAPS_MAX/2-1);
2350 sy += (my>>4) - (HTAPS_MAX/2-1);
2351 src += sx + sy*stride;
2352 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2353 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2354 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2357 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2358 // assert(!(b_w&(b_w-1)));
2359 assert(b_w>1 && b_h>1);
2360 assert((tab_index>=0 && tab_index<4) || b_w==32);
2361 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2362 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2365 for(y=0; y<b_h; y+=16){
2366 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2367 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2370 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2371 else if(b_w==2*b_h){
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2373 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2376 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2377 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2382 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2383 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2386 for(y=0; y<b_h; y++){
2387 //FIXME ugly misuse of obmc_stride
2388 const uint8_t *obmc1= obmc + y*obmc_stride;
2389 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2390 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2391 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2392 dst = slice_buffer_get_line(sb, src_y + y);
2393 for(x=0; x<b_w; x++){
2394 int v= obmc1[x] * block[3][x + y*src_stride]
2395 +obmc2[x] * block[2][x + y*src_stride]
2396 +obmc3[x] * block[1][x + y*src_stride]
2397 +obmc4[x] * block[0][x + y*src_stride];
2399 v <<= 8 - LOG2_OBMC_MAX;
2401 v >>= 8 - FRAC_BITS;
2404 v += dst[x + src_x];
2405 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2406 if(v&(~255)) v= ~(v>>31);
2407 dst8[x + y*src_stride] = v;
2409 dst[x + src_x] -= v;
2415 //FIXME name cleanup (b_w, block_w, b_width stuff)
2416 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2417 const int b_width = s->b_width << s->block_max_depth;
2418 const int b_height= s->b_height << s->block_max_depth;
2419 const int b_stride= b_width;
2420 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2421 BlockNode *rt= lt+1;
2422 BlockNode *lb= lt+b_stride;
2423 BlockNode *rb= lb+1;
2425 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2426 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2433 }else if(b_x + 1 >= b_width){
2440 }else if(b_y + 1 >= b_height){
2445 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2448 if(!sliced && !offset_dst)
2451 }else if(src_x + b_w > w){
2455 obmc -= src_y*obmc_stride;
2457 if(!sliced && !offset_dst)
2458 dst -= src_y*dst_stride;
2460 }else if(src_y + b_h> h){
2464 if(b_w<=0 || b_h<=0) return;
2466 assert(src_stride > 2*MB_SIZE + 5);
2468 if(!sliced && offset_dst)
2469 dst += src_x + src_y*dst_stride;
2470 dst8+= src_x + src_y*src_stride;
2471 // src += src_x + src_y*src_stride;
2473 ptmp= tmp + 3*tmp_step;
2476 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2478 if(same_block(lt, rt)){
2483 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2486 if(same_block(lt, lb)){
2488 }else if(same_block(rt, lb)){
2493 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2496 if(same_block(lt, rb) ){
2498 }else if(same_block(rt, rb)){
2500 }else if(same_block(lb, rb)){
2504 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2507 for(y=0; y<b_h; y++){
2508 for(x=0; x<b_w; x++){
2509 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2510 if(add) dst[x + y*dst_stride] += v;
2511 else dst[x + y*dst_stride] -= v;
2514 for(y=0; y<b_h; y++){
2515 uint8_t *obmc2= obmc + (obmc_stride>>1);
2516 for(x=0; x<b_w; x++){
2517 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2518 if(add) dst[x + y*dst_stride] += v;
2519 else dst[x + y*dst_stride] -= v;
2522 for(y=0; y<b_h; y++){
2523 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2524 for(x=0; x<b_w; x++){
2525 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2526 if(add) dst[x + y*dst_stride] += v;
2527 else dst[x + y*dst_stride] -= v;
2530 for(y=0; y<b_h; y++){
2531 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2532 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533 for(x=0; x<b_w; x++){
2534 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2535 if(add) dst[x + y*dst_stride] += v;
2536 else dst[x + y*dst_stride] -= v;
2541 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2543 for(y=0; y<b_h; y++){
2544 //FIXME ugly misuse of obmc_stride
2545 const uint8_t *obmc1= obmc + y*obmc_stride;
2546 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2547 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2548 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2549 for(x=0; x<b_w; x++){
2550 int v= obmc1[x] * block[3][x + y*src_stride]
2551 +obmc2[x] * block[2][x + y*src_stride]
2552 +obmc3[x] * block[1][x + y*src_stride]
2553 +obmc4[x] * block[0][x + y*src_stride];
2555 v <<= 8 - LOG2_OBMC_MAX;
2557 v >>= 8 - FRAC_BITS;
2560 v += dst[x + y*dst_stride];
2561 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2562 if(v&(~255)) v= ~(v>>31);
2563 dst8[x + y*src_stride] = v;
2565 dst[x + y*dst_stride] -= v;
2572 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2573 Plane *p= &s->plane[plane_index];
2574 const int mb_w= s->b_width << s->block_max_depth;
2575 const int mb_h= s->b_height << s->block_max_depth;
2577 int block_size = MB_SIZE >> s->block_max_depth;
2578 int block_w = plane_index ? block_size/2 : block_size;
2579 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2580 int obmc_stride= plane_index ? block_size : 2*block_size;
2581 int ref_stride= s->current_picture.linesize[plane_index];
2582 uint8_t *dst8= s->current_picture.data[plane_index];
2586 if(s->keyframe || (s->avctx->debug&512)){
2591 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2592 // DWTELEM * line = slice_buffer_get_line(sb, y);
2593 IDWTELEM * line = sb->line[y];
2595 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2596 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2598 if(v&(~255)) v= ~(v>>31);
2599 dst8[x + y*ref_stride]= v;
2603 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2604 // DWTELEM * line = slice_buffer_get_line(sb, y);
2605 IDWTELEM * line = sb->line[y];
2607 line[x] -= 128 << FRAC_BITS;
2608 // buf[x + y*w]-= 128<<FRAC_BITS;
2616 for(mb_x=0; mb_x<=mb_w; mb_x++){
2617 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2618 block_w*mb_x - block_w/2,
2619 block_w*mb_y - block_w/2,
2622 w, ref_stride, obmc_stride,
2624 add, 0, plane_index);
2628 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2629 Plane *p= &s->plane[plane_index];
2630 const int mb_w= s->b_width << s->block_max_depth;
2631 const int mb_h= s->b_height << s->block_max_depth;
2633 int block_size = MB_SIZE >> s->block_max_depth;
2634 int block_w = plane_index ? block_size/2 : block_size;
2635 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2636 const int obmc_stride= plane_index ? block_size : 2*block_size;
2637 int ref_stride= s->current_picture.linesize[plane_index];
2638 uint8_t *dst8= s->current_picture.data[plane_index];
2642 if(s->keyframe || (s->avctx->debug&512)){
2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2649 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2651 if(v&(~255)) v= ~(v>>31);
2652 dst8[x + y*ref_stride]= v;
2656 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2658 buf[x + y*w]-= 128<<FRAC_BITS;
2666 for(mb_x=0; mb_x<=mb_w; mb_x++){
2667 add_yblock(s, 0, NULL, buf, dst8, obmc,
2668 block_w*mb_x - block_w/2,
2669 block_w*mb_y - block_w/2,
2672 w, ref_stride, obmc_stride,
2674 add, 1, plane_index);
2678 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2679 const int mb_h= s->b_height << s->block_max_depth;
2681 for(mb_y=0; mb_y<=mb_h; mb_y++)
2682 predict_slice(s, buf, plane_index, add, mb_y);
2685 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2687 Plane *p= &s->plane[plane_index];
2688 const int block_size = MB_SIZE >> s->block_max_depth;
2689 const int block_w = plane_index ? block_size/2 : block_size;
2690 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2691 const int obmc_stride= plane_index ? block_size : 2*block_size;
2692 const int ref_stride= s->current_picture.linesize[plane_index];
2693 uint8_t *src= s-> input_picture.data[plane_index];
2694 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2695 const int b_stride = s->b_width << s->block_max_depth;
2696 const int w= p->width;
2697 const int h= p->height;
2698 int index= mb_x + mb_y*b_stride;
2699 BlockNode *b= &s->block[index];
2700 BlockNode backup= *b;
2704 b->type|= BLOCK_INTRA;
2705 b->color[plane_index]= 0;
2706 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2709 int mb_x2= mb_x + (i &1) - 1;
2710 int mb_y2= mb_y + (i>>1) - 1;
2711 int x= block_w*mb_x2 + block_w/2;
2712 int y= block_w*mb_y2 + block_w/2;
2714 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2715 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2717 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2718 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2719 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2720 int obmc_v= obmc[index];
2722 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2723 if(x<0) obmc_v += obmc[index + block_w];
2724 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2725 if(x+block_w>w) obmc_v += obmc[index - block_w];
2726 //FIXME precalculate this or simplify it somehow else
2728 d = -dst[index] + (1<<(FRAC_BITS-1));
2730 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2731 aa += obmc_v * obmc_v; //FIXME precalculate this
2737 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2740 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2741 const int b_stride = s->b_width << s->block_max_depth;
2742 const int b_height = s->b_height<< s->block_max_depth;
2743 int index= x + y*b_stride;
2744 const BlockNode *b = &s->block[index];
2745 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2746 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2747 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2748 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2750 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2751 // int my_context= av_log2(2*FFABS(left->my - top->my));
2753 if(x<0 || x>=b_stride || y>=b_height)
2760 00001XXXX 15-30 8-15
2762 //FIXME try accurate rate
2763 //FIXME intra and inter predictors if surrounding blocks are not the same type
2764 if(b->type & BLOCK_INTRA){
2765 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2766 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2767 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2769 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2772 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2773 + av_log2(2*FFABS(dmy))
2774 + av_log2(2*b->ref));
2778 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2779 Plane *p= &s->plane[plane_index];
2780 const int block_size = MB_SIZE >> s->block_max_depth;
2781 const int block_w = plane_index ? block_size/2 : block_size;
2782 const int obmc_stride= plane_index ? block_size : 2*block_size;
2783 const int ref_stride= s->current_picture.linesize[plane_index];
2784 uint8_t *dst= s->current_picture.data[plane_index];
2785 uint8_t *src= s-> input_picture.data[plane_index];
2786 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2787 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2788 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2789 const int b_stride = s->b_width << s->block_max_depth;
2790 const int b_height = s->b_height<< s->block_max_depth;
2791 const int w= p->width;
2792 const int h= p->height;
2795 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2796 int sx= block_w*mb_x - block_w/2;
2797 int sy= block_w*mb_y - block_w/2;
2798 int x0= FFMAX(0,-sx);
2799 int y0= FFMAX(0,-sy);
2800 int x1= FFMIN(block_w*2, w-sx);
2801 int y1= FFMIN(block_w*2, h-sy);
2804 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2806 for(y=y0; y<y1; y++){
2807 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2808 const IDWTELEM *pred1 = pred + y*obmc_stride;
2809 uint8_t *cur1 = cur + y*ref_stride;
2810 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2811 for(x=x0; x<x1; x++){
2812 #if FRAC_BITS >= LOG2_OBMC_MAX
2813 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2815 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2817 v = (v + pred1[x]) >> FRAC_BITS;
2818 if(v&(~255)) v= ~(v>>31);
2823 /* copy the regions where obmc[] = (uint8_t)256 */
2824 if(LOG2_OBMC_MAX == 8
2825 && (mb_x == 0 || mb_x == b_stride-1)
2826 && (mb_y == 0 || mb_y == b_height-1)){
2835 for(y=y0; y<y1; y++)
2836 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2840 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2841 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2842 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2843 * So improving the score of one block is not strictly guaranteed
2844 * to improve the score of the whole frame, thus iterative motion
2845 * estimation does not always converge. */
2846 if(s->avctx->me_cmp == FF_CMP_W97)
2847 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2848 else if(s->avctx->me_cmp == FF_CMP_W53)
2849 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2853 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2854 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2859 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2868 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2870 if(mb_x == b_stride-2)
2871 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2873 return distortion + rate*penalty_factor;
2876 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2878 Plane *p= &s->plane[plane_index];
2879 const int block_size = MB_SIZE >> s->block_max_depth;
2880 const int block_w = plane_index ? block_size/2 : block_size;
2881 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2882 const int obmc_stride= plane_index ? block_size : 2*block_size;
2883 const int ref_stride= s->current_picture.linesize[plane_index];
2884 uint8_t *dst= s->current_picture.data[plane_index];
2885 uint8_t *src= s-> input_picture.data[plane_index];
2886 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2887 // const has only been removed from zero_dst to suppress a warning
2888 static IDWTELEM zero_dst[4096]; //FIXME
2889 const int b_stride = s->b_width << s->block_max_depth;
2890 const int w= p->width;
2891 const int h= p->height;
2894 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2897 int mb_x2= mb_x + (i%3) - 1;
2898 int mb_y2= mb_y + (i/3) - 1;
2899 int x= block_w*mb_x2 + block_w/2;
2900 int y= block_w*mb_y2 + block_w/2;
2902 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2903 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2905 //FIXME find a cleaner/simpler way to skip the outside stuff
2906 for(y2= y; y2<0; y2++)
2907 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2908 for(y2= h; y2<y+block_w; y2++)
2909 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911 for(y2= y; y2<y+block_w; y2++)
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2915 for(y2= y; y2<y+block_w; y2++)
2916 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2919 assert(block_w== 8 || block_w==16);
2920 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2924 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2925 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2933 rate = get_block_bits(s, mb_x, mb_y, 2);
2934 for(i=merged?4:0; i<9; i++){
2935 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2936 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2939 return distortion + rate*penalty_factor;
2942 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2943 const int b_stride= s->b_width << s->block_max_depth;
2944 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2945 BlockNode backup= *block;
2946 int rd, index, value;
2948 assert(mb_x>=0 && mb_y>=0);
2949 assert(mb_x<b_stride);
2952 block->color[0] = p[0];
2953 block->color[1] = p[1];
2954 block->color[2] = p[2];
2955 block->type |= BLOCK_INTRA;
2957 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2958 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2959 if(s->me_cache[index] == value)
2961 s->me_cache[index]= value;
2965 block->type &= ~BLOCK_INTRA;
2968 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2980 /* special case for int[2] args we discard afterwards,
2981 * fixes compilation problem with gcc 2.95 */
2982 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2983 int p[2] = {p0, p1};
2984 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2987 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2988 const int b_stride= s->b_width << s->block_max_depth;
2989 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2990 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2991 int rd, index, value;
2993 assert(mb_x>=0 && mb_y>=0);
2994 assert(mb_x<b_stride);
2995 assert(((mb_x|mb_y)&1) == 0);
2997 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2998 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2999 if(s->me_cache[index] == value)
3001 s->me_cache[index]= value;
3006 block->type &= ~BLOCK_INTRA;
3007 block[1]= block[b_stride]= block[b_stride+1]= *block;
3009 rd= get_4block_rd(s, mb_x, mb_y, 0);
3016 block[0]= backup[0];
3017 block[1]= backup[1];
3018 block[b_stride]= backup[2];
3019 block[b_stride+1]= backup[3];
3024 static void iterative_me(SnowContext *s){
3025 int pass, mb_x, mb_y;
3026 const int b_width = s->b_width << s->block_max_depth;
3027 const int b_height= s->b_height << s->block_max_depth;
3028 const int b_stride= b_width;
3032 RangeCoder r = s->c;
3033 uint8_t state[sizeof(s->block_state)];
3034 memcpy(state, s->block_state, sizeof(s->block_state));
3035 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3036 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3037 encode_q_branch(s, 0, mb_x, mb_y);
3039 memcpy(s->block_state, state, sizeof(s->block_state));
3042 for(pass=0; pass<25; pass++){
3045 for(mb_y= 0; mb_y<b_height; mb_y++){
3046 for(mb_x= 0; mb_x<b_width; mb_x++){
3047 int dia_change, i, j, ref;
3048 int best_rd= INT_MAX, ref_rd;
3049 BlockNode backup, ref_b;
3050 const int index= mb_x + mb_y * b_stride;
3051 BlockNode *block= &s->block[index];
3052 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3053 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3054 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3055 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3056 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3057 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3058 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3059 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3060 const int b_w= (MB_SIZE >> s->block_max_depth);
3061 uint8_t obmc_edged[b_w*2][b_w*2];
3063 if(pass && (block->type & BLOCK_OPT))
3065 block->type |= BLOCK_OPT;
3069 if(!s->me_cache_generation)
3070 memset(s->me_cache, 0, sizeof(s->me_cache));
3071 s->me_cache_generation += 1<<22;
3073 //FIXME precalculate
3076 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3078 for(y=0; y<b_w*2; y++)
3079 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3080 if(mb_x==b_stride-1)
3081 for(y=0; y<b_w*2; y++)
3082 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3084 for(x=0; x<b_w*2; x++)
3085 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3086 for(y=1; y<b_w; y++)
3087 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3089 if(mb_y==b_height-1){
3090 for(x=0; x<b_w*2; x++)
3091 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3092 for(y=b_w; y<b_w*2-1; y++)
3093 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3097 //skip stuff outside the picture
3098 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3099 uint8_t *src= s-> input_picture.data[0];
3100 uint8_t *dst= s->current_picture.data[0];
3101 const int stride= s->current_picture.linesize[0];
3102 const int block_w= MB_SIZE >> s->block_max_depth;
3103 const int sx= block_w*mb_x - block_w/2;
3104 const int sy= block_w*mb_y - block_w/2;
3105 const int w= s->plane[0].width;
3106 const int h= s->plane[0].height;
3110 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3111 for(y=h; y<sy+block_w*2; y++)
3112 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114 for(y=sy; y<sy+block_w*2; y++)
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3117 if(sx+block_w*2 > w){
3118 for(y=sy; y<sy+block_w*2; y++)
3119 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3123 // intra(black) = neighbors' contribution to the current block
3125 color[i]= get_dc(s, mb_x, mb_y, i);
3127 // get previous score (cannot be cached due to OBMC)
3128 if(pass > 0 && (block->type&BLOCK_INTRA)){
3129 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3130 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3132 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3136 for(ref=0; ref < s->ref_frames; ref++){
3137 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3138 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3143 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3144 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3146 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3148 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3150 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3152 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3155 //FIXME avoid subpel interpolation / round to nearest integer
3158 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3160 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3161 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3162 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3169 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3172 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3174 //FIXME or try the standard 2 pass qpel or similar
3176 mvr[0][0]= block->mx;
3177 mvr[0][1]= block->my;
3178 if(ref_rd > best_rd){
3186 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3187 //FIXME RD style color selection
3189 if(!same_block(block, &backup)){
3190 if(tb ) tb ->type &= ~BLOCK_OPT;
3191 if(lb ) lb ->type &= ~BLOCK_OPT;
3192 if(rb ) rb ->type &= ~BLOCK_OPT;
3193 if(bb ) bb ->type &= ~BLOCK_OPT;
3194 if(tlb) tlb->type &= ~BLOCK_OPT;
3195 if(trb) trb->type &= ~BLOCK_OPT;
3196 if(blb) blb->type &= ~BLOCK_OPT;
3197 if(brb) brb->type &= ~BLOCK_OPT;
3202 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3207 if(s->block_max_depth == 1){
3209 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3210 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3212 int best_rd, init_rd;
3213 const int index= mb_x + mb_y * b_stride;
3216 b[0]= &s->block[index];
3218 b[2]= b[0]+b_stride;
3220 if(same_block(b[0], b[1]) &&
3221 same_block(b[0], b[2]) &&
3222 same_block(b[0], b[3]))
3225 if(!s->me_cache_generation)
3226 memset(s->me_cache, 0, sizeof(s->me_cache));
3227 s->me_cache_generation += 1<<22;
3229 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3231 //FIXME more multiref search?
3232 check_4block_inter(s, mb_x, mb_y,
3233 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3234 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3237 if(!(b[i]->type&BLOCK_INTRA))
3238 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3240 if(init_rd != best_rd)
3244 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3248 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3249 const int w= b->width;
3250 const int h= b->height;
3251 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3252 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3253 int x,y, thres1, thres2;
3255 if(s->qlog == LOSSLESS_QLOG){
3258 dst[x + y*stride]= src[x + y*stride];
3262 bias= bias ? 0 : (3*qmul)>>3;
3263 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3269 int i= src[x + y*stride];
3271 if((unsigned)(i+thres1) > thres2){
3274 i/= qmul; //FIXME optimize
3275 dst[x + y*stride]= i;
3279 i/= qmul; //FIXME optimize
3280 dst[x + y*stride]= -i;
3283 dst[x + y*stride]= 0;
3289 int i= src[x + y*stride];
3291 if((unsigned)(i+thres1) > thres2){
3294 i= (i + bias) / qmul; //FIXME optimize
3295 dst[x + y*stride]= i;
3299 i= (i + bias) / qmul; //FIXME optimize
3300 dst[x + y*stride]= -i;
3303 dst[x + y*stride]= 0;
3309 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3310 const int w= b->width;
3311 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3312 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3313 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3316 if(s->qlog == LOSSLESS_QLOG) return;
3318 for(y=start_y; y<end_y; y++){
3319 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3320 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3324 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3326 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3332 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3333 const int w= b->width;
3334 const int h= b->height;
3335 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3336 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3337 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3340 if(s->qlog == LOSSLESS_QLOG) return;
3344 int i= src[x + y*stride];
3346 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3348 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3354 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3355 const int w= b->width;
3356 const int h= b->height;
3359 for(y=h-1; y>=0; y--){
3360 for(x=w-1; x>=0; x--){
3361 int i= x + y*stride;
3365 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3366 else src[i] -= src[i - 1];
3368 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3369 else src[i] -= src[i - 1];
3372 if(y) src[i] -= src[i - stride];
3378 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3379 const int w= b->width;
3382 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3386 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3388 for(y=start_y; y<end_y; y++){
3390 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3391 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3395 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3396 else line[x] += line[x - 1];
3398 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3399 else line[x] += line[x - 1];
3402 if(y) line[x] += prev[x];
3408 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3409 const int w= b->width;
3410 const int h= b->height;
3415 int i= x + y*stride;
3419 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3420 else src[i] += src[i - 1];
3422 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3423 else src[i] += src[i - 1];
3426 if(y) src[i] += src[i - stride];
3432 static void encode_qlogs(SnowContext *s){
3433 int plane_index, level, orientation;
3435 for(plane_index=0; plane_index<2; plane_index++){
3436 for(level=0; level<s->spatial_decomposition_count; level++){
3437 for(orientation=level ? 1:0; orientation<4; orientation++){
3438 if(orientation==2) continue;
3439 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3445 static void encode_header(SnowContext *s){
3449 memset(kstate, MID_STATE, sizeof(kstate));
3451 put_rac(&s->c, kstate, s->keyframe);
3452 if(s->keyframe || s->always_reset){
3454 s->last_spatial_decomposition_type=
3458 s->last_block_max_depth= 0;
3459 for(plane_index=0; plane_index<2; plane_index++){
3460 Plane *p= &s->plane[plane_index];
3463 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3467 put_symbol(&s->c, s->header_state, s->version, 0);
3468 put_rac(&s->c, s->header_state, s->always_reset);
3469 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3470 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3471 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3472 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3473 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3474 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3475 put_rac(&s->c, s->header_state, s->spatial_scalability);
3476 // put_rac(&s->c, s->header_state, s->rate_scalability);
3477 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3484 for(plane_index=0; plane_index<2; plane_index++){
3485 Plane *p= &s->plane[plane_index];
3486 update_mc |= p->last_htaps != p->htaps;
3487 update_mc |= p->last_diag_mc != p->diag_mc;
3488 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3490 put_rac(&s->c, s->header_state, update_mc);
3492 for(plane_index=0; plane_index<2; plane_index++){
3493 Plane *p= &s->plane[plane_index];
3494 put_rac(&s->c, s->header_state, p->diag_mc);
3495 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3496 for(i= p->htaps/2; i; i--)
3497 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3500 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3501 put_rac(&s->c, s->header_state, 1);
3502 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3505 put_rac(&s->c, s->header_state, 0);
3508 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3509 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3510 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3511 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3512 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3516 static void update_last_header_values(SnowContext *s){
3520 for(plane_index=0; plane_index<2; plane_index++){
3521 Plane *p= &s->plane[plane_index];
3522 p->last_diag_mc= p->diag_mc;
3523 p->last_htaps = p->htaps;
3524 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3528 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3529 s->last_qlog = s->qlog;
3530 s->last_qbias = s->qbias;
3531 s->last_mv_scale = s->mv_scale;
3532 s->last_block_max_depth = s->block_max_depth;
3533 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3536 static void decode_qlogs(SnowContext *s){
3537 int plane_index, level, orientation;
3539 for(plane_index=0; plane_index<3; plane_index++){
3540 for(level=0; level<s->spatial_decomposition_count; level++){
3541 for(orientation=level ? 1:0; orientation<4; orientation++){
3543 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3544 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3545 else q= get_symbol(&s->c, s->header_state, 1);
3546 s->plane[plane_index].band[level][orientation].qlog= q;
3552 static int decode_header(SnowContext *s){
3556 memset(kstate, MID_STATE, sizeof(kstate));
3558 s->keyframe= get_rac(&s->c, kstate);
3559 if(s->keyframe || s->always_reset){
3561 s->spatial_decomposition_type=
3565 s->block_max_depth= 0;
3568 s->version= get_symbol(&s->c, s->header_state, 0);
3570 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3573 s->always_reset= get_rac(&s->c, s->header_state);
3574 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3575 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3576 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3577 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3578 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3579 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3580 s->spatial_scalability= get_rac(&s->c, s->header_state);
3581 // s->rate_scalability= get_rac(&s->c, s->header_state);
3582 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3588 if(get_rac(&s->c, s->header_state)){
3589 for(plane_index=0; plane_index<2; plane_index++){
3590 int htaps, i, sum=0;
3591 Plane *p= &s->plane[plane_index];
3592 p->diag_mc= get_rac(&s->c, s->header_state);
3593 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3594 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3597 for(i= htaps/2; i; i--){
3598 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3599 sum += p->hcoeff[i];
3601 p->hcoeff[0]= 32-sum;
3603 s->plane[2].diag_mc= s->plane[1].diag_mc;
3604 s->plane[2].htaps = s->plane[1].htaps;
3605 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3607 if(get_rac(&s->c, s->header_state)){
3608 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3613 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3614 if(s->spatial_decomposition_type > 1){
3615 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3619 s->qlog += get_symbol(&s->c, s->header_state, 1);
3620 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3621 s->qbias += get_symbol(&s->c, s->header_state, 1);
3622 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3623 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3624 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3625 s->block_max_depth= 0;
3632 static void init_qexp(void){
3636 for(i=0; i<QROOT; i++){
3638 v *= pow(2, 1.0 / QROOT);
3642 static int common_init(AVCodecContext *avctx){
3643 SnowContext *s = avctx->priv_data;
3649 dsputil_init(&s->dsp, avctx);
3652 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3653 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3654 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3655 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3656 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3657 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3676 #define mcfh(dx,dy)\
3677 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3678 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3679 mc_block_hpel ## dx ## dy ## 16;\
3680 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3681 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3682 mc_block_hpel ## dx ## dy ## 8;
3692 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3694 width= s->avctx->width;
3695 height= s->avctx->height;
3697 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3698 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3700 for(i=0; i<MAX_REF_FRAMES; i++)
3701 for(j=0; j<MAX_REF_FRAMES; j++)
3702 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3704 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3709 static int common_init_after_header(AVCodecContext *avctx){
3710 SnowContext *s = avctx->priv_data;
3711 int plane_index, level, orientation;
3713 for(plane_index=0; plane_index<3; plane_index++){
3714 int w= s->avctx->width;
3715 int h= s->avctx->height;
3718 w>>= s->chroma_h_shift;
3719 h>>= s->chroma_v_shift;
3721 s->plane[plane_index].width = w;
3722 s->plane[plane_index].height= h;
3724 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3725 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3726 SubBand *b= &s->plane[plane_index].band[level][orientation];
3728 b->buf= s->spatial_dwt_buffer;
3730 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3731 b->width = (w + !(orientation&1))>>1;
3732 b->height= (h + !(orientation>1))>>1;
3734 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3735 b->buf_x_offset = 0;
3736 b->buf_y_offset = 0;
3740 b->buf_x_offset = (w+1)>>1;
3743 b->buf += b->stride>>1;
3744 b->buf_y_offset = b->stride_line >> 1;
3746 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3749 b->parent= &s->plane[plane_index].band[level-1][orientation];
3750 //FIXME avoid this realloc
3751 av_freep(&b->x_coeff);
3752 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3762 static int qscale2qlog(int qscale){
3763 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3764 + 61*QROOT/8; //<64 >60
3767 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3769 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3770 * FIXME we know exact mv bits at this point,
3771 * but ratecontrol isn't set up to include them. */
3772 uint32_t coef_sum= 0;
3773 int level, orientation, delta_qlog;
3775 for(level=0; level<s->spatial_decomposition_count; level++){
3776 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3777 SubBand *b= &s->plane[0].band[level][orientation];
3778 IDWTELEM *buf= b->ibuf;
3779 const int w= b->width;
3780 const int h= b->height;
3781 const int stride= b->stride;
3782 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3783 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3784 const int qdiv= (1<<16)/qmul;
3786 //FIXME this is ugly
3789 buf[x+y*stride]= b->buf[x+y*stride];
3791 decorrelate(s, b, buf, stride, 1, 0);
3794 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3798 /* ugly, ratecontrol just takes a sqrt again */
3799 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3800 assert(coef_sum < INT_MAX);
3802 if(pict->pict_type == I_TYPE){
3803 s->m.current_picture.mb_var_sum= coef_sum;
3804 s->m.current_picture.mc_mb_var_sum= 0;
3806 s->m.current_picture.mc_mb_var_sum= coef_sum;
3807 s->m.current_picture.mb_var_sum= 0;
3810 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3811 if (pict->quality < 0)
3813 s->lambda= pict->quality * 3/2;
3814 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3815 s->qlog+= delta_qlog;
3819 static void calculate_visual_weight(SnowContext *s, Plane *p){
3820 int width = p->width;
3821 int height= p->height;
3822 int level, orientation, x, y;
3824 for(level=0; level<s->spatial_decomposition_count; level++){
3825 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3826 SubBand *b= &p->band[level][orientation];
3827 IDWTELEM *ibuf= b->ibuf;
3830 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3831 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3832 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3833 for(y=0; y<height; y++){
3834 for(x=0; x<width; x++){
3835 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3840 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3850 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3851 SubBand *b= &p->band[level][orientation];
3855 int step= 1 << (s->spatial_decomposition_count - level);
3862 //FIXME bias for nonzero ?
3864 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3865 for(y=0; y<p->height; y++){
3866 for(x=0; x<p->width; x++){
3867 int sx= (x-xo + step/2) / step / Q2_STEP;
3868 int sy= (y-yo + step/2) / step / Q2_STEP;
3869 int v= r0[x + y*p->width] - r1[x + y*p->width];
3870 assert(sx>=0 && sy>=0 && sx < score_stride);
3872 score[sx + sy*score_stride] += v*v;
3873 assert(score[sx + sy*score_stride] >= 0);
3878 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3879 int level, orientation;
3881 for(level=0; level<s->spatial_decomposition_count; level++){
3882 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3883 SubBand *b= &p->band[level][orientation];
3884 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3886 dequantize(s, b, dst, b->stride);
3891 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3892 int level, orientation, ys, xs, x, y, pass;
3893 IDWTELEM best_dequant[height * stride];
3894 IDWTELEM idwt2_buffer[height * stride];
3895 const int score_stride= (width + 10)/Q2_STEP;
3896 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3897 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3898 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3900 //FIXME pass the copy cleanly ?
3902 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3903 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3905 for(level=0; level<s->spatial_decomposition_count; level++){
3906 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3907 SubBand *b= &p->band[level][orientation];
3908 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3909 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3910 assert(src == b->buf); // code does not depend on this but it is true currently
3912 quantize(s, b, dst, src, b->stride, s->qbias);
3915 for(pass=0; pass<1; pass++){
3916 if(s->qbias == 0) //keyframe
3918 for(level=0; level<s->spatial_decomposition_count; level++){
3919 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3920 SubBand *b= &p->band[level][orientation];
3921 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3922 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3924 for(ys= 0; ys<Q2_STEP; ys++){
3925 for(xs= 0; xs<Q2_STEP; xs++){
3926 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3927 dequantize_all(s, p, idwt2_buffer, width, height);
3928 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3929 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3930 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3931 for(y=ys; y<b->height; y+= Q2_STEP){
3932 for(x=xs; x<b->width; x+= Q2_STEP){
3933 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3934 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3935 //FIXME try more than just --
3938 dequantize_all(s, p, idwt2_buffer, width, height);
3939 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3940 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3941 for(y=ys; y<b->height; y+= Q2_STEP){
3942 for(x=xs; x<b->width; x+= Q2_STEP){
3943 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3944 if(score[score_idx] <= best_score[score_idx] + threshold){
3945 best_score[score_idx]= score[score_idx];
3946 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3947 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3948 //FIXME copy instead
3957 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3960 #endif /* QUANTIZE2==1 */
3962 static int encode_init(AVCodecContext *avctx)
3964 SnowContext *s = avctx->priv_data;
3967 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3968 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3969 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3973 if(avctx->prediction_method == DWT_97
3974 && (avctx->flags & CODEC_FLAG_QSCALE)
3975 && avctx->global_quality == 0){
3976 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3980 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3982 s->chroma_h_shift= 1; //FIXME XXX
3983 s->chroma_v_shift= 1;
3985 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3986 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
3988 for(plane_index=0; plane_index<3; plane_index++){
3989 s->plane[plane_index].diag_mc= 1;
3990 s->plane[plane_index].htaps= 6;
3991 s->plane[plane_index].hcoeff[0]= 40;
3992 s->plane[plane_index].hcoeff[1]= -10;
3993 s->plane[plane_index].hcoeff[2]= 2;
3994 s->plane[plane_index].fast_mc= 1;
4003 s->m.flags = avctx->flags;
4004 s->m.bit_rate= avctx->bit_rate;
4006 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4007 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4008 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4009 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4010 h263_encode_init(&s->m); //mv_penalty
4012 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4014 if(avctx->flags&CODEC_FLAG_PASS1){
4015 if(!avctx->stats_out)
4016 avctx->stats_out = av_mallocz(256);
4018 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4019 if(ff_rate_control_init(&s->m) < 0)
4022 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4024 avctx->coded_frame= &s->current_picture;
4025 switch(avctx->pix_fmt){
4026 // case PIX_FMT_YUV444P:
4027 // case PIX_FMT_YUV422P:
4028 case PIX_FMT_YUV420P:
4030 // case PIX_FMT_YUV411P:
4031 // case PIX_FMT_YUV410P:
4032 s->colorspace_type= 0;
4034 /* case PIX_FMT_RGB32:
4038 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4041 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4042 s->chroma_h_shift= 1;
4043 s->chroma_v_shift= 1;
4045 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4046 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4048 s->avctx->get_buffer(s->avctx, &s->input_picture);
4050 if(s->avctx->me_method == ME_ITER){
4052 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4053 for(i=0; i<s->max_ref_frames; i++){
4054 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4055 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4062 #define USE_HALFPEL_PLANE 0
4064 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4067 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4071 int w= s->avctx->width >>is_chroma;
4072 int h= s->avctx->height >>is_chroma;
4073 int ls= frame->linesize[p];
4074 uint8_t *src= frame->data[p];
4076 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4077 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4078 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4085 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4092 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4100 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4108 static int frame_start(SnowContext *s){
4110 int w= s->avctx->width; //FIXME round up to x16 ?
4111 int h= s->avctx->height;
4113 if(s->current_picture.data[0]){
4114 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4115 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4116 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4119 tmp= s->last_picture[s->max_ref_frames-1];
4120 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4121 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4122 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4123 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4124 s->last_picture[0]= s->current_picture;
4125 s->current_picture= tmp;
4131 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4132 if(i && s->last_picture[i-1].key_frame)
4137 s->current_picture.reference= 1;
4138 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4139 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4143 s->current_picture.key_frame= s->keyframe;
4148 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4149 SnowContext *s = avctx->priv_data;
4150 RangeCoder * const c= &s->c;
4151 AVFrame *pict = data;
4152 const int width= s->avctx->width;
4153 const int height= s->avctx->height;
4154 int level, orientation, plane_index, i, y;
4155 uint8_t rc_header_bak[sizeof(s->header_state)];
4156 uint8_t rc_block_bak[sizeof(s->block_state)];
4158 ff_init_range_encoder(c, buf, buf_size);
4159 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4163 for(y=0; y<(height>>shift); y++)
4164 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4165 &pict->data[i][y * pict->linesize[i]],
4168 s->new_picture = *pict;
4170 s->m.picture_number= avctx->frame_number;
4171 if(avctx->flags&CODEC_FLAG_PASS2){
4173 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4174 s->keyframe= pict->pict_type==FF_I_TYPE;
4175 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4176 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4177 if (pict->quality < 0)
4181 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4183 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4186 if(s->pass1_rc && avctx->frame_number == 0)
4187 pict->quality= 2*FF_QP2LAMBDA;
4189 s->qlog= qscale2qlog(pict->quality);
4190 s->lambda = pict->quality * 3/2;
4192 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4193 s->qlog= LOSSLESS_QLOG;
4195 }//else keep previous frame's qlog until after motion estimation
4199 s->m.current_picture_ptr= &s->m.current_picture;
4200 if(pict->pict_type == P_TYPE){
4201 int block_width = (width +15)>>4;
4202 int block_height= (height+15)>>4;
4203 int stride= s->current_picture.linesize[0];
4205 assert(s->current_picture.data[0]);
4206 assert(s->last_picture[0].data[0]);
4208 s->m.avctx= s->avctx;
4209 s->m.current_picture.data[0]= s->current_picture.data[0];
4210 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4211 s->m. new_picture.data[0]= s-> input_picture.data[0];
4212 s->m. last_picture_ptr= &s->m. last_picture;
4214 s->m. last_picture.linesize[0]=
4215 s->m. new_picture.linesize[0]=
4216 s->m.current_picture.linesize[0]= stride;
4217 s->m.uvlinesize= s->current_picture.linesize[1];
4219 s->m.height= height;
4220 s->m.mb_width = block_width;
4221 s->m.mb_height= block_height;
4222 s->m.mb_stride= s->m.mb_width+1;
4223 s->m.b8_stride= 2*s->m.mb_width+1;
4225 s->m.pict_type= pict->pict_type;
4226 s->m.me_method= s->avctx->me_method;
4227 s->m.me.scene_change_score=0;
4228 s->m.flags= s->avctx->flags;
4229 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4230 s->m.out_format= FMT_H263;
4231 s->m.unrestricted_mv= 1;
4233 s->m.lambda = s->lambda;
4234 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4235 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4237 s->m.dsp= s->dsp; //move
4243 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4244 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4249 if(pict->pict_type == I_TYPE)
4250 s->spatial_decomposition_count= 5;
4252 s->spatial_decomposition_count= 5;
4254 s->m.pict_type = pict->pict_type;
4255 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4257 common_init_after_header(avctx);
4259 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4260 for(plane_index=0; plane_index<3; plane_index++){
4261 calculate_visual_weight(s, &s->plane[plane_index]);
4266 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4267 encode_blocks(s, 1);
4268 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4270 for(plane_index=0; plane_index<3; plane_index++){
4271 Plane *p= &s->plane[plane_index];
4275 // int bits= put_bits_count(&s->c.pb);
4277 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4279 if(pict->data[plane_index]) //FIXME gray hack
4282 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4285 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4288 && pict->pict_type == P_TYPE
4289 && !(avctx->flags&CODEC_FLAG_PASS2)
4290 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4291 ff_init_range_encoder(c, buf, buf_size);
4292 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4293 pict->pict_type= FF_I_TYPE;
4295 s->current_picture.key_frame=1;
4299 if(s->qlog == LOSSLESS_QLOG){
4302 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4308 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4314 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4316 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4318 if(s->pass1_rc && plane_index==0){
4319 int delta_qlog = ratecontrol_1pass(s, pict);
4320 if (delta_qlog <= INT_MIN)
4323 //reordering qlog in the bitstream would eliminate this reset
4324 ff_init_range_encoder(c, buf, buf_size);
4325 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4326 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4328 encode_blocks(s, 0);
4332 for(level=0; level<s->spatial_decomposition_count; level++){
4333 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4334 SubBand *b= &p->band[level][orientation];
4337 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4339 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4340 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4341 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4343 correlate(s, b, b->ibuf, b->stride, 1, 0);
4347 for(level=0; level<s->spatial_decomposition_count; level++){
4348 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4349 SubBand *b= &p->band[level][orientation];
4351 dequantize(s, b, b->ibuf, b->stride);
4355 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4356 if(s->qlog == LOSSLESS_QLOG){
4359 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4363 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4366 if(pict->pict_type == I_TYPE){
4369 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4370 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4374 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4375 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4378 if(s->avctx->flags&CODEC_FLAG_PSNR){
4381 if(pict->data[plane_index]) //FIXME gray hack
4384 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4388 s->avctx->error[plane_index] += error;
4389 s->current_picture.error[plane_index] = error;
4394 update_last_header_values(s);
4396 if(s->last_picture[s->max_ref_frames-1].data[0]){
4397 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4399 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4400 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4403 s->current_picture.coded_picture_number = avctx->frame_number;
4404 s->current_picture.pict_type = pict->pict_type;
4405 s->current_picture.quality = pict->quality;
4406 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4407 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4408 s->m.current_picture.display_picture_number =
4409 s->m.current_picture.coded_picture_number = avctx->frame_number;
4410 s->m.current_picture.quality = pict->quality;
4411 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4413 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4415 if(avctx->flags&CODEC_FLAG_PASS1)
4416 ff_write_pass1_stats(&s->m);
4417 s->m.last_pict_type = s->m.pict_type;
4418 avctx->frame_bits = s->m.frame_bits;
4419 avctx->mv_bits = s->m.mv_bits;
4420 avctx->misc_bits = s->m.misc_bits;
4421 avctx->p_tex_bits = s->m.p_tex_bits;
4425 return ff_rac_terminate(c);
4428 static void common_end(SnowContext *s){
4429 int plane_index, level, orientation, i;
4431 av_freep(&s->spatial_dwt_buffer);
4432 av_freep(&s->spatial_idwt_buffer);
4434 av_freep(&s->m.me.scratchpad);
4435 av_freep(&s->m.me.map);
4436 av_freep(&s->m.me.score_map);
4437 av_freep(&s->m.obmc_scratchpad);
4439 av_freep(&s->block);
4441 for(i=0; i<MAX_REF_FRAMES; i++){
4442 av_freep(&s->ref_mvs[i]);
4443 av_freep(&s->ref_scores[i]);
4444 if(s->last_picture[i].data[0])
4445 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4448 for(plane_index=0; plane_index<3; plane_index++){
4449 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4450 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4451 SubBand *b= &s->plane[plane_index].band[level][orientation];
4453 av_freep(&b->x_coeff);
4459 static int encode_end(AVCodecContext *avctx)
4461 SnowContext *s = avctx->priv_data;
4464 av_free(avctx->stats_out);
4469 static int decode_init(AVCodecContext *avctx)
4471 avctx->pix_fmt= PIX_FMT_YUV420P;
4478 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, const uint8_t *buf, int buf_size){
4479 SnowContext *s = avctx->priv_data;
4480 RangeCoder * const c= &s->c;
4482 AVFrame *picture = data;
4483 int level, orientation, plane_index, i;
4485 ff_init_range_decoder(c, buf, buf_size);
4486 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4488 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4489 if(decode_header(s)<0)
4491 common_init_after_header(avctx);
4493 // realloc slice buffer for the case that spatial_decomposition_count changed
4494 slice_buffer_destroy(&s->sb);
4495 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4497 for(plane_index=0; plane_index<3; plane_index++){
4498 Plane *p= &s->plane[plane_index];
4499 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4500 && p->hcoeff[1]==-10
4504 if(!s->block) alloc_blocks(s);
4507 //keyframe flag duplication mess FIXME
4508 if(avctx->debug&FF_DEBUG_PICT_INFO)
4509 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4513 for(plane_index=0; plane_index<3; plane_index++){
4514 Plane *p= &s->plane[plane_index];
4518 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4520 if(s->avctx->debug&2048){
4521 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4522 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4526 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4527 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4533 for(level=0; level<s->spatial_decomposition_count; level++){
4534 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4535 SubBand *b= &p->band[level][orientation];
4536 unpack_coeffs(s, b, b->parent, orientation);
4542 const int mb_h= s->b_height << s->block_max_depth;
4543 const int block_size = MB_SIZE >> s->block_max_depth;
4544 const int block_w = plane_index ? block_size/2 : block_size;
4546 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4551 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4552 for(mb_y=0; mb_y<=mb_h; mb_y++){
4554 int slice_starty = block_w*mb_y;
4555 int slice_h = block_w*(mb_y+1);
4556 if (!(s->keyframe || s->avctx->debug&512)){
4557 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4558 slice_h -= (block_w >> 1);
4561 for(level=0; level<s->spatial_decomposition_count; level++){
4562 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4563 SubBand *b= &p->band[level][orientation];
4566 int our_mb_start = mb_y;
4567 int our_mb_end = (mb_y + 1);
4569 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4570 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4571 if (!(s->keyframe || s->avctx->debug&512)){
4572 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4573 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4575 start_y = FFMIN(b->height, start_y);
4576 end_y = FFMIN(b->height, end_y);
4578 if (start_y != end_y){
4579 if (orientation == 0){
4580 SubBand * correlate_band = &p->band[0][0];
4581 int correlate_end_y = FFMIN(b->height, end_y + 1);
4582 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4583 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4584 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4585 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4588 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4593 for(; yd<slice_h; yd+=4){
4594 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4597 if(s->qlog == LOSSLESS_QLOG){
4598 for(; yq<slice_h && yq<h; yq++){
4599 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4601 line[x] <<= FRAC_BITS;
4606 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4608 y = FFMIN(p->height, slice_starty);
4609 end_y = FFMIN(p->height, slice_h);
4611 slice_buffer_release(&s->sb, y++);
4614 slice_buffer_flush(&s->sb);
4621 if(s->last_picture[s->max_ref_frames-1].data[0]){
4622 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4624 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4625 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4628 if(!(s->avctx->debug&2048))
4629 *picture= s->current_picture;
4631 *picture= s->mconly_picture;
4633 *data_size = sizeof(AVFrame);
4635 bytes_read= c->bytestream - c->bytestream_start;
4636 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4641 static int decode_end(AVCodecContext *avctx)
4643 SnowContext *s = avctx->priv_data;
4645 slice_buffer_destroy(&s->sb);
4652 AVCodec snow_decoder = {
4656 sizeof(SnowContext),
4661 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4665 #ifdef CONFIG_SNOW_ENCODER
4666 AVCodec snow_encoder = {
4670 sizeof(SnowContext),
4687 int buffer[2][width*height];
4690 s.spatial_decomposition_count=6;
4691 s.spatial_decomposition_type=1;
4693 printf("testing 5/3 DWT\n");
4694 for(i=0; i<width*height; i++)
4695 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4697 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4698 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4700 for(i=0; i<width*height; i++)
4701 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4703 printf("testing 9/7 DWT\n");
4704 s.spatial_decomposition_type=0;
4705 for(i=0; i<width*height; i++)
4706 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4708 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4709 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4711 for(i=0; i<width*height; i++)
4712 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4715 printf("testing AC coder\n");
4716 memset(s.header_state, 0, sizeof(s.header_state));
4717 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4718 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4720 for(i=-256; i<256; i++){
4721 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4723 ff_rac_terminate(&s.c);
4725 memset(s.header_state, 0, sizeof(s.header_state));
4726 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4727 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4729 for(i=-256; i<256; i++){
4731 j= get_symbol(&s.c, s.header_state, 1);
4732 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4736 int level, orientation, x, y;
4737 int64_t errors[8][4];
4740 memset(errors, 0, sizeof(errors));
4741 s.spatial_decomposition_count=3;
4742 s.spatial_decomposition_type=0;
4743 for(level=0; level<s.spatial_decomposition_count; level++){
4744 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4745 int w= width >> (s.spatial_decomposition_count-level);
4746 int h= height >> (s.spatial_decomposition_count-level);
4747 int stride= width << (s.spatial_decomposition_count-level);
4748 DWTELEM *buf= buffer[0];
4751 if(orientation&1) buf+=w;
4752 if(orientation>1) buf+=stride>>1;
4754 memset(buffer[0], 0, sizeof(int)*width*height);
4755 buf[w/2 + h/2*stride]= 256*256;
4756 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4757 for(y=0; y<height; y++){
4758 for(x=0; x<width; x++){
4759 int64_t d= buffer[0][x + y*width];
4761 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4763 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4765 error= (int)(sqrt(error)+0.5);
4766 errors[level][orientation]= error;
4767 if(g) g=ff_gcd(g, error);
4771 printf("static int const visual_weight[][4]={\n");
4772 for(level=0; level<s.spatial_decomposition_count; level++){
4774 for(orientation=0; orientation<4; orientation++){
4775 printf("%8"PRId64",", errors[level][orientation]/g);
4782 int w= width >> (s.spatial_decomposition_count-level);
4783 //int h= height >> (s.spatial_decomposition_count-level);
4784 int stride= width << (s.spatial_decomposition_count-level);
4785 DWTELEM *buf= buffer[0];
4791 memset(buffer[0], 0, sizeof(int)*width*height);
4793 for(y=0; y<height; y++){
4794 for(x=0; x<width; x++){
4795 int tab[4]={0,2,3,1};
4796 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4799 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4803 buf[x + y*stride ]=169;
4804 buf[x + y*stride-w]=64;
4807 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4809 for(y=0; y<height; y++){
4810 for(x=0; x<width; x++){
4811 int64_t d= buffer[0][x + y*width];
4813 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4815 if(FFABS(height/2-y)<9) printf("\n");