2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
468 int spatial_scalability;
478 #define QBIAS_SHIFT 3
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
501 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
502 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
504 static void iterative_me(SnowContext *s);
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
517 for (i = 0; i < max_allocated_lines; i++)
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522 buf->data_stack_top = max_allocated_lines - 1;
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
530 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
532 assert(buf->data_stack_top >= 0);
533 // assert(!buf->line[line]);
535 return buf->line[line];
537 offset = buf->line_width * line;
538 buffer = buf->data_stack[buf->data_stack_top];
539 buf->data_stack_top--;
540 buf->line[line] = buffer;
542 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
547 static void slice_buffer_release(slice_buffer * buf, int line)
552 assert(line >= 0 && line < buf->line_count);
553 assert(buf->line[line]);
555 offset = buf->line_width * line;
556 buffer = buf->line[line];
557 buf->data_stack_top++;
558 buf->data_stack[buf->data_stack_top] = buffer;
559 buf->line[line] = NULL;
561 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
564 static void slice_buffer_flush(slice_buffer * buf)
567 for (i = 0; i < buf->line_count; i++)
571 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
572 slice_buffer_release(buf, i);
577 static void slice_buffer_destroy(slice_buffer * buf)
580 slice_buffer_flush(buf);
582 for (i = buf->data_count - 1; i >= 0; i--)
584 av_freep(&buf->data_stack[i]);
586 av_freep(&buf->data_stack);
587 av_freep(&buf->line);
591 // Avoid a name clash on SGI IRIX
594 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
595 static uint8_t qexp[QROOT];
597 static inline int mirror(int v, int m){
598 while((unsigned)v > (unsigned)m){
605 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
609 const int a= FFABS(v);
610 const int e= av_log2(a);
612 const int el= FFMIN(e, 10);
613 put_rac(c, state+0, 0);
616 put_rac(c, state+1+i, 1); //1..10
619 put_rac(c, state+1+9, 1); //1..10
621 put_rac(c, state+1+FFMIN(i,9), 0);
623 for(i=e-1; i>=el; i--){
624 put_rac(c, state+22+9, (a>>i)&1); //22..31
627 put_rac(c, state+22+i, (a>>i)&1); //22..31
631 put_rac(c, state+11 + el, v < 0); //11..21
634 put_rac(c, state+0, 0);
637 put_rac(c, state+1+i, 1); //1..10
639 put_rac(c, state+1+i, 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+i, (a>>i)&1); //22..31
646 put_rac(c, state+11 + e, v < 0); //11..21
649 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
651 put_rac(c, state+1+FFMIN(i,9), 0);
653 for(i=e-1; i>=0; i--){
654 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
658 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
662 put_rac(c, state+0, 1);
666 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
667 if(get_rac(c, state+0))
672 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
677 for(i=e-1; i>=0; i--){
678 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
681 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
688 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
690 int r= log2>=0 ? 1<<log2 : 1;
696 put_rac(c, state+4+log2, 1);
701 put_rac(c, state+4+log2, 0);
703 for(i=log2-1; i>=0; i--){
704 put_rac(c, state+31-i, (v>>i)&1);
708 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
710 int r= log2>=0 ? 1<<log2 : 1;
715 while(get_rac(c, state+4+log2)){
721 for(i=log2-1; i>=0; i--){
722 v+= get_rac(c, state+31-i)<<i;
728 static av_always_inline void
729 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
730 int dst_step, int src_step, int ref_step,
731 int width, int mul, int add, int shift,
732 int highpass, int inverse){
733 const int mirror_left= !highpass;
734 const int mirror_right= (width&1) ^ highpass;
735 const int w= (width>>1) - 1 + (highpass & width);
738 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
740 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
747 LIFT(src[i*src_step],
748 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
754 LIFT(src[w*src_step],
755 ((mul*2*ref[w*ref_step]+add)>>shift),
760 static av_always_inline void
761 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
762 int dst_step, int src_step, int ref_step,
763 int width, int mul, int add, int shift,
764 int highpass, int inverse){
765 const int mirror_left= !highpass;
766 const int mirror_right= (width&1) ^ highpass;
767 const int w= (width>>1) - 1 + (highpass & width);
770 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
772 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
779 LIFT(src[i*src_step],
780 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
786 LIFT(src[w*src_step],
787 ((mul*2*ref[w*ref_step]+add)>>shift),
793 static av_always_inline void
794 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
795 int dst_step, int src_step, int ref_step,
796 int width, int mul, int add, int shift,
797 int highpass, int inverse){
798 const int mirror_left= !highpass;
799 const int mirror_right= (width&1) ^ highpass;
800 const int w= (width>>1) - 1 + (highpass & width);
804 #define LIFTS(src, ref, inv) \
806 (src) + (((ref) + 4*(src))>>shift): \
807 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
809 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
816 LIFTS(src[i*src_step],
817 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
823 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
826 static av_always_inline void
827 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
828 int dst_step, int src_step, int ref_step,
829 int width, int mul, int add, int shift,
830 int highpass, int inverse){
831 const int mirror_left= !highpass;
832 const int mirror_right= (width&1) ^ highpass;
833 const int w= (width>>1) - 1 + (highpass & width);
837 #define LIFTS(src, ref, inv) \
839 (src) + (((ref) + 4*(src))>>shift): \
840 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
842 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
849 LIFTS(src[i*src_step],
850 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
856 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
861 static void horizontal_decompose53i(DWTELEM *b, int width){
863 const int width2= width>>1;
865 const int w2= (width+1)>>1;
867 for(x=0; x<width2; x++){
869 temp[x+w2]= b[2*x + 1];
883 for(x=1; x+1<width2; x+=2){
887 A2 += (A1 + A3 + 2)>>2;
891 A1= temp[x+1+width2];
894 A4 += (A1 + A3 + 2)>>2;
900 A2 += (A1 + A3 + 2)>>2;
905 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
906 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
910 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
913 for(i=0; i<width; i++){
914 b1[i] -= (b0[i] + b2[i])>>1;
918 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
921 for(i=0; i<width; i++){
922 b1[i] += (b0[i] + b2[i] + 2)>>2;
926 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
928 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
929 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
931 for(y=-2; y<height; y+=2){
932 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
933 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
936 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
937 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
938 STOP_TIMER("horizontal_decompose53i")}
941 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
942 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
943 STOP_TIMER("vertical_decompose53i*")}
950 static void horizontal_decompose97i(DWTELEM *b, int width){
952 const int w2= (width+1)>>1;
954 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
955 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
956 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
957 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
961 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
964 for(i=0; i<width; i++){
965 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
969 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
972 for(i=0; i<width; i++){
973 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
977 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
980 for(i=0; i<width; i++){
982 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
984 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
989 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
992 for(i=0; i<width; i++){
993 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
997 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
999 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1000 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1001 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1002 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1004 for(y=-4; y<height; y+=2){
1005 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1006 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1009 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1010 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1012 STOP_TIMER("horizontal_decompose97i")
1016 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1017 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1018 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1019 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1022 STOP_TIMER("vertical_decompose97i")
1032 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1035 for(level=0; level<decomposition_count; level++){
1037 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1038 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1043 static void horizontal_compose53i(IDWTELEM *b, int width){
1044 IDWTELEM temp[width];
1045 const int width2= width>>1;
1046 const int w2= (width+1)>>1;
1058 for(x=1; x+1<width2; x+=2){
1062 A2 += (A1 + A3 + 2)>>2;
1066 A1= temp[x+1+width2];
1069 A4 += (A1 + A3 + 2)>>2;
1075 A2 += (A1 + A3 + 2)>>2;
1079 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1080 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1082 for(x=0; x<width2; x++){
1084 b[2*x + 1]= temp[x+w2];
1090 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1093 for(i=0; i<width; i++){
1094 b1[i] += (b0[i] + b2[i])>>1;
1098 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1101 for(i=0; i<width; i++){
1102 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1106 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1107 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1108 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1112 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1113 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1114 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1118 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1121 IDWTELEM *b0= cs->b0;
1122 IDWTELEM *b1= cs->b1;
1123 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1124 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1127 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1128 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1129 STOP_TIMER("vertical_compose53i*")}
1132 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1133 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1134 STOP_TIMER("horizontal_compose53i")}
1141 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1143 IDWTELEM *b0= cs->b0;
1144 IDWTELEM *b1= cs->b1;
1145 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1146 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1149 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1150 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1151 STOP_TIMER("vertical_compose53i*")}
1154 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1155 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1156 STOP_TIMER("horizontal_compose53i")}
1163 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1165 spatial_compose53i_init(&cs, buffer, height, stride);
1166 while(cs.y <= height)
1167 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1171 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1172 IDWTELEM temp[width];
1173 const int w2= (width+1)>>1;
1175 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1176 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1177 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1178 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1181 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1184 for(i=0; i<width; i++){
1185 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1189 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1192 for(i=0; i<width; i++){
1193 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1197 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1200 for(i=0; i<width; i++){
1202 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1204 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1209 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1212 for(i=0; i<width; i++){
1213 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1217 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1220 for(i=0; i<width; i++){
1221 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1222 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1224 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1226 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1228 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1232 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1233 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1234 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1235 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1236 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1240 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1241 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1242 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1243 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1244 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1248 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1251 IDWTELEM *b0= cs->b0;
1252 IDWTELEM *b1= cs->b1;
1253 IDWTELEM *b2= cs->b2;
1254 IDWTELEM *b3= cs->b3;
1255 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1256 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1259 if(y>0 && y+4<height){
1260 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1262 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1263 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1264 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1265 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1268 STOP_TIMER("vertical_compose97i")}}
1271 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1272 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1273 if(width>400 && y+0<(unsigned)height){
1274 STOP_TIMER("horizontal_compose97i")}}
1283 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1285 IDWTELEM *b0= cs->b0;
1286 IDWTELEM *b1= cs->b1;
1287 IDWTELEM *b2= cs->b2;
1288 IDWTELEM *b3= cs->b3;
1289 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1290 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1293 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1294 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1295 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1296 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1298 STOP_TIMER("vertical_compose97i")}}
1301 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1302 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1303 if(width>400 && b0 <= b2){
1304 STOP_TIMER("horizontal_compose97i")}}
1313 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1315 spatial_compose97i_init(&cs, buffer, height, stride);
1316 while(cs.y <= height)
1317 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1320 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1322 for(level=decomposition_count-1; level>=0; level--){
1324 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1325 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1330 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1332 for(level=decomposition_count-1; level>=0; level--){
1334 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1335 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1340 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1341 const int support = type==1 ? 3 : 5;
1345 for(level=decomposition_count-1; level>=0; level--){
1346 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1348 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1350 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1357 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1358 const int support = type==1 ? 3 : 5;
1362 for(level=decomposition_count-1; level>=0; level--){
1363 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1365 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1367 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1374 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1375 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1377 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1378 for(y=0; y<height; y+=4)
1379 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1382 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1383 const int w= b->width;
1384 const int h= b->height;
1396 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1397 v= src[x + y*stride];
1400 t= src[x + (y-1)*stride];
1402 lt= src[x - 1 + (y-1)*stride];
1405 rt= src[x + 1 + (y-1)*stride];
1409 l= src[x - 1 + y*stride];
1411 if(orientation==1) ll= src[y + (x-2)*stride];
1412 else ll= src[x - 2 + y*stride];
1418 if(px<b->parent->width && py<b->parent->height)
1419 p= parent[px + py*2*stride];
1421 if(!(/*ll|*/l|lt|t|rt|p)){
1423 runs[run_index++]= run;
1431 max_index= run_index;
1432 runs[run_index++]= run;
1434 run= runs[run_index++];
1436 put_symbol2(&s->c, b->state[30], max_index, 0);
1437 if(run_index <= max_index)
1438 put_symbol2(&s->c, b->state[1], run, 3);
1441 if(s->c.bytestream_end - s->c.bytestream < w*40){
1442 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1447 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1448 v= src[x + y*stride];
1451 t= src[x + (y-1)*stride];
1453 lt= src[x - 1 + (y-1)*stride];
1456 rt= src[x + 1 + (y-1)*stride];
1460 l= src[x - 1 + y*stride];
1462 if(orientation==1) ll= src[y + (x-2)*stride];
1463 else ll= src[x - 2 + y*stride];
1469 if(px<b->parent->width && py<b->parent->height)
1470 p= parent[px + py*2*stride];
1472 if(/*ll|*/l|lt|t|rt|p){
1473 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1475 put_rac(&s->c, &b->state[0][context], !!v);
1478 run= runs[run_index++];
1480 if(run_index <= max_index)
1481 put_symbol2(&s->c, b->state[1], run, 3);
1489 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1490 int l2= 2*FFABS(l) + (l<0);
1491 int t2= 2*FFABS(t) + (t<0);
1493 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1494 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1502 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1503 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1504 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1505 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1506 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1509 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1510 const int w= b->width;
1511 const int h= b->height;
1516 x_and_coeff *xc= b->x_coeff;
1517 x_and_coeff *prev_xc= NULL;
1518 x_and_coeff *prev2_xc= xc;
1519 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1520 x_and_coeff *prev_parent_xc= parent_xc;
1522 runs= get_symbol2(&s->c, b->state[30], 0);
1523 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1528 int lt=0, t=0, rt=0;
1530 if(y && prev_xc->x == 0){
1542 if(prev_xc->x == x + 1)
1548 if(x>>1 > parent_xc->x){
1551 if(x>>1 == parent_xc->x){
1552 p= parent_xc->coeff;
1555 if(/*ll|*/l|lt|t|rt|p){
1556 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1558 v=get_rac(&s->c, &b->state[0][context]);
1560 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1561 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1568 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1570 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1571 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1580 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1581 else max_run= FFMIN(run, w-x-1);
1583 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1589 (xc++)->x= w+1; //end marker
1595 while(parent_xc->x != parent->width+1)
1598 prev_parent_xc= parent_xc;
1600 parent_xc= prev_parent_xc;
1605 (xc++)->x= w+1; //end marker
1609 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1610 const int w= b->width;
1612 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1613 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1614 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1619 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1624 /* If we are on the second or later slice, restore our index. */
1626 new_index = save_state[0];
1629 for(y=start_y; y<h; y++){
1632 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1633 memset(line, 0, b->width*sizeof(IDWTELEM));
1634 v = b->x_coeff[new_index].coeff;
1635 x = b->x_coeff[new_index++].x;
1638 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1639 register int u= -(v&1);
1640 line[x] = (t^u) - u;
1642 v = b->x_coeff[new_index].coeff;
1643 x = b->x_coeff[new_index++].x;
1646 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1647 STOP_TIMER("decode_subband")
1650 /* Save our variables for the next slice. */
1651 save_state[0] = new_index;
1656 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1657 int plane_index, level, orientation;
1659 for(plane_index=0; plane_index<3; plane_index++){
1660 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1661 for(orientation=level ? 1:0; orientation<4; orientation++){
1662 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1666 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1667 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1670 static int alloc_blocks(SnowContext *s){
1671 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1672 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1677 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1681 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1682 uint8_t *bytestream= d->bytestream;
1683 uint8_t *bytestream_start= d->bytestream_start;
1685 d->bytestream= bytestream;
1686 d->bytestream_start= bytestream_start;
1689 //near copy & paste from dsputil, FIXME
1690 static int pix_sum(uint8_t * pix, int line_size, int w)
1695 for (i = 0; i < w; i++) {
1696 for (j = 0; j < w; j++) {
1700 pix += line_size - w;
1705 //near copy & paste from dsputil, FIXME
1706 static int pix_norm1(uint8_t * pix, int line_size, int w)
1709 uint32_t *sq = ff_squareTbl + 256;
1712 for (i = 0; i < w; i++) {
1713 for (j = 0; j < w; j ++) {
1717 pix += line_size - w;
1722 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1723 const int w= s->b_width << s->block_max_depth;
1724 const int rem_depth= s->block_max_depth - level;
1725 const int index= (x + y*w) << rem_depth;
1726 const int block_w= 1<<rem_depth;
1739 for(j=0; j<block_w; j++){
1740 for(i=0; i<block_w; i++){
1741 s->block[index + i + j*w]= block;
1746 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1747 const int offset[3]= {
1749 ((y*c->uvstride + x)>>1),
1750 ((y*c->uvstride + x)>>1),
1754 c->src[0][i]= src [i];
1755 c->ref[0][i]= ref [i] + offset[i];
1760 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1761 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1762 if(s->ref_frames == 1){
1763 *mx = mid_pred(left->mx, top->mx, tr->mx);
1764 *my = mid_pred(left->my, top->my, tr->my);
1766 const int *scale = scale_mv_ref[ref];
1767 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1768 (top ->mx * scale[top ->ref] + 128) >>8,
1769 (tr ->mx * scale[tr ->ref] + 128) >>8);
1770 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1771 (top ->my * scale[top ->ref] + 128) >>8,
1772 (tr ->my * scale[tr ->ref] + 128) >>8);
1779 #define P_TOPRIGHT P[3]
1780 #define P_MEDIAN P[4]
1782 #define FLAG_QPEL 1 //must be 1
1784 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1785 uint8_t p_buffer[1024];
1786 uint8_t i_buffer[1024];
1787 uint8_t p_state[sizeof(s->block_state)];
1788 uint8_t i_state[sizeof(s->block_state)];
1790 uint8_t *pbbak= s->c.bytestream;
1791 uint8_t *pbbak_start= s->c.bytestream_start;
1792 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1793 const int w= s->b_width << s->block_max_depth;
1794 const int h= s->b_height << s->block_max_depth;
1795 const int rem_depth= s->block_max_depth - level;
1796 const int index= (x + y*w) << rem_depth;
1797 const int block_w= 1<<(LOG2_MB_SIZE - level);
1798 int trx= (x+1)<<rem_depth;
1799 int try= (y+1)<<rem_depth;
1800 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1801 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1802 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1803 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1804 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1805 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1806 int pl = left->color[0];
1807 int pcb= left->color[1];
1808 int pcr= left->color[2];
1812 const int stride= s->current_picture.linesize[0];
1813 const int uvstride= s->current_picture.linesize[1];
1814 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1815 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1816 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1818 int16_t last_mv[3][2];
1819 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1820 const int shift= 1+qpel;
1821 MotionEstContext *c= &s->m.me;
1822 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1823 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1824 int my_context= av_log2(2*FFABS(left->my - top->my));
1825 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1826 int ref, best_ref, ref_score, ref_mx, ref_my;
1828 assert(sizeof(s->block_state) >= 256);
1830 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1834 // clip predictors / edge ?
1836 P_LEFT[0]= left->mx;
1837 P_LEFT[1]= left->my;
1840 P_TOPRIGHT[0]= tr->mx;
1841 P_TOPRIGHT[1]= tr->my;
1843 last_mv[0][0]= s->block[index].mx;
1844 last_mv[0][1]= s->block[index].my;
1845 last_mv[1][0]= right->mx;
1846 last_mv[1][1]= right->my;
1847 last_mv[2][0]= bottom->mx;
1848 last_mv[2][1]= bottom->my;
1855 assert(c-> stride == stride);
1856 assert(c->uvstride == uvstride);
1858 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1859 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1860 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1861 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1863 c->xmin = - x*block_w - 16+2;
1864 c->ymin = - y*block_w - 16+2;
1865 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1866 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1868 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1869 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1870 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1871 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1872 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1873 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1874 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1876 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1877 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1880 c->pred_x= P_LEFT[0];
1881 c->pred_y= P_LEFT[1];
1883 c->pred_x = P_MEDIAN[0];
1884 c->pred_y = P_MEDIAN[1];
1889 for(ref=0; ref<s->ref_frames; ref++){
1890 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1892 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1893 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1895 assert(ref_mx >= c->xmin);
1896 assert(ref_mx <= c->xmax);
1897 assert(ref_my >= c->ymin);
1898 assert(ref_my <= c->ymax);
1900 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1901 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1902 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1903 if(s->ref_mvs[ref]){
1904 s->ref_mvs[ref][index][0]= ref_mx;
1905 s->ref_mvs[ref][index][1]= ref_my;
1906 s->ref_scores[ref][index]= ref_score;
1908 if(score > ref_score){
1915 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1918 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1920 pc.bytestream_start=
1921 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1922 memcpy(p_state, s->block_state, sizeof(s->block_state));
1924 if(level!=s->block_max_depth)
1925 put_rac(&pc, &p_state[4 + s_context], 1);
1926 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1927 if(s->ref_frames > 1)
1928 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1929 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1930 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1931 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1932 p_len= pc.bytestream - pc.bytestream_start;
1933 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1935 block_s= block_w*block_w;
1936 sum = pix_sum(current_data[0], stride, block_w);
1937 l= (sum + block_s/2)/block_s;
1938 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1940 block_s= block_w*block_w>>2;
1941 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1942 cb= (sum + block_s/2)/block_s;
1943 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1944 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1945 cr= (sum + block_s/2)/block_s;
1946 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1949 ic.bytestream_start=
1950 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1951 memcpy(i_state, s->block_state, sizeof(s->block_state));
1952 if(level!=s->block_max_depth)
1953 put_rac(&ic, &i_state[4 + s_context], 1);
1954 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1955 put_symbol(&ic, &i_state[32], l-pl , 1);
1956 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1957 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1958 i_len= ic.bytestream - ic.bytestream_start;
1959 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1961 // assert(score==256*256*256*64-1);
1962 assert(iscore < 255*255*256 + s->lambda2*10);
1963 assert(iscore >= 0);
1964 assert(l>=0 && l<=255);
1965 assert(pl>=0 && pl<=255);
1968 int varc= iscore >> 8;
1969 int vard= score >> 8;
1970 if (vard <= 64 || vard < varc)
1971 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1973 c->scene_change_score+= s->m.qscale;
1976 if(level!=s->block_max_depth){
1977 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1978 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1979 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1980 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1982 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1984 if(score2 < score && score2 < iscore)
1989 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1990 memcpy(pbbak, i_buffer, i_len);
1992 s->c.bytestream_start= pbbak_start;
1993 s->c.bytestream= pbbak + i_len;
1994 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1995 memcpy(s->block_state, i_state, sizeof(s->block_state));
1998 memcpy(pbbak, p_buffer, p_len);
2000 s->c.bytestream_start= pbbak_start;
2001 s->c.bytestream= pbbak + p_len;
2002 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2003 memcpy(s->block_state, p_state, sizeof(s->block_state));
2008 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2009 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2010 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2012 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2016 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2017 const int w= s->b_width << s->block_max_depth;
2018 const int rem_depth= s->block_max_depth - level;
2019 const int index= (x + y*w) << rem_depth;
2020 int trx= (x+1)<<rem_depth;
2021 BlockNode *b= &s->block[index];
2022 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2023 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2024 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2025 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2026 int pl = left->color[0];
2027 int pcb= left->color[1];
2028 int pcr= left->color[2];
2030 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2031 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2032 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2033 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2036 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2040 if(level!=s->block_max_depth){
2041 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2042 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2044 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2045 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2046 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2047 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2048 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2052 if(b->type & BLOCK_INTRA){
2053 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2055 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2056 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2057 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2058 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2060 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2061 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2062 if(s->ref_frames > 1)
2063 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2064 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2065 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2066 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2070 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2071 const int w= s->b_width << s->block_max_depth;
2072 const int rem_depth= s->block_max_depth - level;
2073 const int index= (x + y*w) << rem_depth;
2074 int trx= (x+1)<<rem_depth;
2075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2077 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2078 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2079 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2082 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2086 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2088 int l = left->color[0];
2089 int cb= left->color[1];
2090 int cr= left->color[2];
2092 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2093 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2094 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2096 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2099 pred_mv(s, &mx, &my, 0, left, top, tr);
2100 l += get_symbol(&s->c, &s->block_state[32], 1);
2101 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2102 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2104 if(s->ref_frames > 1)
2105 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2106 pred_mv(s, &mx, &my, ref, left, top, tr);
2107 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2108 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2110 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2112 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2113 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2114 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2115 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2119 static void encode_blocks(SnowContext *s, int search){
2124 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2128 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2129 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2133 if(s->avctx->me_method == ME_ITER || !search)
2134 encode_q_branch2(s, 0, x, y);
2136 encode_q_branch (s, 0, x, y);
2141 static void decode_blocks(SnowContext *s){
2148 decode_q_branch(s, 0, x, y);
2153 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2154 const static uint8_t weight[64]={
2165 const static uint8_t brane[256]={
2166 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2167 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2168 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2169 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2170 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2171 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2172 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2173 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2174 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2175 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2176 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2177 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2178 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2179 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2180 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2181 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2184 const static uint8_t needs[16]={
2192 int16_t tmpIt [64*(32+HTAPS_MAX)];
2193 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2194 int16_t *tmpI= tmpIt;
2195 uint8_t *tmp2= tmp2t[0];
2198 assert(dx<16 && dy<16);
2199 r= brane[dx + 16*dy]&15;
2200 l= brane[dx + 16*dy]>>4;
2202 b= needs[l] | needs[r];
2203 if(p && !p->diag_mc)
2207 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2208 for(x=0; x < b_w; x++){
2209 int a_1=src[x + HTAPS_MAX/2-4];
2210 int a0= src[x + HTAPS_MAX/2-3];
2211 int a1= src[x + HTAPS_MAX/2-2];
2212 int a2= src[x + HTAPS_MAX/2-1];
2213 int a3= src[x + HTAPS_MAX/2+0];
2214 int a4= src[x + HTAPS_MAX/2+1];
2215 int a5= src[x + HTAPS_MAX/2+2];
2216 int a6= src[x + HTAPS_MAX/2+3];
2218 if(!p || p->fast_mc){
2219 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2223 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2228 if(am&(~255)) am= ~(am>>31);
2237 src += HTAPS_MAX/2 - 1;
2241 for(y=0; y < b_h; y++){
2242 for(x=0; x < b_w+1; x++){
2243 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2244 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2245 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2246 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2247 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2248 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2249 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2250 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2252 if(!p || p->fast_mc)
2253 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2255 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2257 if(am&(~255)) am= ~(am>>31);
2265 src += stride*(HTAPS_MAX/2 - 1);
2269 for(y=0; y < b_h; y++){
2270 for(x=0; x < b_w; x++){
2271 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2272 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2273 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2274 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2275 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2276 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2277 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2278 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2280 if(!p || p->fast_mc)
2281 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2283 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2284 if(am&(~255)) am= ~(am>>31);
2293 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2298 hpel[ 6]= tmp2t[1] + 1;
2300 hpel[ 8]= src + stride;
2301 hpel[ 9]= hpel[1] + stride;
2302 hpel[10]= hpel[8] + 1;
2305 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2306 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2307 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2308 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2311 for(y=0; y < b_h; y++){
2312 for(x=0; x < b_w; x++){
2313 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2314 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2323 uint8_t *src1= hpel[l];
2324 uint8_t *src2= hpel[r];
2325 int a= weight[((dx&7) + (8*(dy&7)))];
2327 for(y=0; y < b_h; y++){
2328 for(x=0; x < b_w; x++){
2329 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2336 STOP_TIMER("mc_block")
2339 #define mca(dx,dy,b_w)\
2340 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2341 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2343 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2355 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2356 if(block->type & BLOCK_INTRA){
2358 const int color = block->color[plane_index];
2359 const int color4= color*0x01010101;
2361 for(y=0; y < b_h; y++){
2362 *(uint32_t*)&dst[0 + y*stride]= color4;
2363 *(uint32_t*)&dst[4 + y*stride]= color4;
2364 *(uint32_t*)&dst[8 + y*stride]= color4;
2365 *(uint32_t*)&dst[12+ y*stride]= color4;
2366 *(uint32_t*)&dst[16+ y*stride]= color4;
2367 *(uint32_t*)&dst[20+ y*stride]= color4;
2368 *(uint32_t*)&dst[24+ y*stride]= color4;
2369 *(uint32_t*)&dst[28+ y*stride]= color4;
2372 for(y=0; y < b_h; y++){
2373 *(uint32_t*)&dst[0 + y*stride]= color4;
2374 *(uint32_t*)&dst[4 + y*stride]= color4;
2375 *(uint32_t*)&dst[8 + y*stride]= color4;
2376 *(uint32_t*)&dst[12+ y*stride]= color4;
2379 for(y=0; y < b_h; y++){
2380 *(uint32_t*)&dst[0 + y*stride]= color4;
2381 *(uint32_t*)&dst[4 + y*stride]= color4;
2384 for(y=0; y < b_h; y++){
2385 *(uint32_t*)&dst[0 + y*stride]= color4;
2388 for(y=0; y < b_h; y++){
2389 for(x=0; x < b_w; x++){
2390 dst[x + y*stride]= color;
2395 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2396 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2397 int mx= block->mx*scale;
2398 int my= block->my*scale;
2399 const int dx= mx&15;
2400 const int dy= my&15;
2401 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2402 sx += (mx>>4) - (HTAPS_MAX/2-1);
2403 sy += (my>>4) - (HTAPS_MAX/2-1);
2404 src += sx + sy*stride;
2405 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2406 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2407 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2410 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2411 // assert(!(b_w&(b_w-1)));
2412 assert(b_w>1 && b_h>1);
2413 assert(tab_index>=0 && tab_index<4 || b_w==32);
2414 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2415 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2418 for(y=0; y<b_h; y+=16){
2419 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2420 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2423 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2424 else if(b_w==2*b_h){
2425 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2426 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2429 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2430 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2435 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2436 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2439 for(y=0; y<b_h; y++){
2440 //FIXME ugly misuse of obmc_stride
2441 const uint8_t *obmc1= obmc + y*obmc_stride;
2442 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2443 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2444 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2445 dst = slice_buffer_get_line(sb, src_y + y);
2446 for(x=0; x<b_w; x++){
2447 int v= obmc1[x] * block[3][x + y*src_stride]
2448 +obmc2[x] * block[2][x + y*src_stride]
2449 +obmc3[x] * block[1][x + y*src_stride]
2450 +obmc4[x] * block[0][x + y*src_stride];
2452 v <<= 8 - LOG2_OBMC_MAX;
2454 v >>= 8 - FRAC_BITS;
2457 v += dst[x + src_x];
2458 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2459 if(v&(~255)) v= ~(v>>31);
2460 dst8[x + y*src_stride] = v;
2462 dst[x + src_x] -= v;
2468 //FIXME name clenup (b_w, block_w, b_width stuff)
2469 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2470 const int b_width = s->b_width << s->block_max_depth;
2471 const int b_height= s->b_height << s->block_max_depth;
2472 const int b_stride= b_width;
2473 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2474 BlockNode *rt= lt+1;
2475 BlockNode *lb= lt+b_stride;
2476 BlockNode *rb= lb+1;
2478 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2479 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2486 }else if(b_x + 1 >= b_width){
2493 }else if(b_y + 1 >= b_height){
2498 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2501 if(!sliced && !offset_dst)
2504 }else if(src_x + b_w > w){
2508 obmc -= src_y*obmc_stride;
2510 if(!sliced && !offset_dst)
2511 dst -= src_y*dst_stride;
2513 }else if(src_y + b_h> h){
2517 if(b_w<=0 || b_h<=0) return;
2519 assert(src_stride > 2*MB_SIZE + 5);
2520 if(!sliced && offset_dst)
2521 dst += src_x + src_y*dst_stride;
2522 dst8+= src_x + src_y*src_stride;
2523 // src += src_x + src_y*src_stride;
2525 ptmp= tmp + 3*tmp_step;
2528 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2530 if(same_block(lt, rt)){
2535 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2538 if(same_block(lt, lb)){
2540 }else if(same_block(rt, lb)){
2545 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2548 if(same_block(lt, rb) ){
2550 }else if(same_block(rt, rb)){
2552 }else if(same_block(lb, rb)){
2556 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2559 for(y=0; y<b_h; y++){
2560 for(x=0; x<b_w; x++){
2561 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2562 if(add) dst[x + y*dst_stride] += v;
2563 else dst[x + y*dst_stride] -= v;
2566 for(y=0; y<b_h; y++){
2567 uint8_t *obmc2= obmc + (obmc_stride>>1);
2568 for(x=0; x<b_w; x++){
2569 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2570 if(add) dst[x + y*dst_stride] += v;
2571 else dst[x + y*dst_stride] -= v;
2574 for(y=0; y<b_h; y++){
2575 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2576 for(x=0; x<b_w; x++){
2577 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2578 if(add) dst[x + y*dst_stride] += v;
2579 else dst[x + y*dst_stride] -= v;
2582 for(y=0; y<b_h; y++){
2583 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2584 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2585 for(x=0; x<b_w; x++){
2586 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2587 if(add) dst[x + y*dst_stride] += v;
2588 else dst[x + y*dst_stride] -= v;
2595 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2596 STOP_TIMER("inner_add_yblock")
2598 for(y=0; y<b_h; y++){
2599 //FIXME ugly misuse of obmc_stride
2600 const uint8_t *obmc1= obmc + y*obmc_stride;
2601 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2602 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2603 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2604 for(x=0; x<b_w; x++){
2605 int v= obmc1[x] * block[3][x + y*src_stride]
2606 +obmc2[x] * block[2][x + y*src_stride]
2607 +obmc3[x] * block[1][x + y*src_stride]
2608 +obmc4[x] * block[0][x + y*src_stride];
2610 v <<= 8 - LOG2_OBMC_MAX;
2612 v >>= 8 - FRAC_BITS;
2615 v += dst[x + y*dst_stride];
2616 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2617 if(v&(~255)) v= ~(v>>31);
2618 dst8[x + y*src_stride] = v;
2620 dst[x + y*dst_stride] -= v;
2627 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2628 Plane *p= &s->plane[plane_index];
2629 const int mb_w= s->b_width << s->block_max_depth;
2630 const int mb_h= s->b_height << s->block_max_depth;
2632 int block_size = MB_SIZE >> s->block_max_depth;
2633 int block_w = plane_index ? block_size/2 : block_size;
2634 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2635 int obmc_stride= plane_index ? block_size : 2*block_size;
2636 int ref_stride= s->current_picture.linesize[plane_index];
2637 uint8_t *dst8= s->current_picture.data[plane_index];
2642 if(s->keyframe || (s->avctx->debug&512)){
2647 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2649 // DWTELEM * line = slice_buffer_get_line(sb, y);
2650 IDWTELEM * line = sb->line[y];
2653 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2656 if(v&(~255)) v= ~(v>>31);
2657 dst8[x + y*ref_stride]= v;
2661 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2663 // DWTELEM * line = slice_buffer_get_line(sb, y);
2664 IDWTELEM * line = sb->line[y];
2667 line[x] -= 128 << FRAC_BITS;
2668 // buf[x + y*w]-= 128<<FRAC_BITS;
2676 for(mb_x=0; mb_x<=mb_w; mb_x++){
2679 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2680 block_w*mb_x - block_w/2,
2681 block_w*mb_y - block_w/2,
2684 w, ref_stride, obmc_stride,
2686 add, 0, plane_index);
2688 STOP_TIMER("add_yblock")
2691 STOP_TIMER("predict_slice")
2694 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2695 Plane *p= &s->plane[plane_index];
2696 const int mb_w= s->b_width << s->block_max_depth;
2697 const int mb_h= s->b_height << s->block_max_depth;
2699 int block_size = MB_SIZE >> s->block_max_depth;
2700 int block_w = plane_index ? block_size/2 : block_size;
2701 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2702 const int obmc_stride= plane_index ? block_size : 2*block_size;
2703 int ref_stride= s->current_picture.linesize[plane_index];
2704 uint8_t *dst8= s->current_picture.data[plane_index];
2709 if(s->keyframe || (s->avctx->debug&512)){
2714 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2716 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2718 if(v&(~255)) v= ~(v>>31);
2719 dst8[x + y*ref_stride]= v;
2723 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2725 buf[x + y*w]-= 128<<FRAC_BITS;
2733 for(mb_x=0; mb_x<=mb_w; mb_x++){
2736 add_yblock(s, 0, NULL, buf, dst8, obmc,
2737 block_w*mb_x - block_w/2,
2738 block_w*mb_y - block_w/2,
2741 w, ref_stride, obmc_stride,
2743 add, 1, plane_index);
2745 STOP_TIMER("add_yblock")
2748 STOP_TIMER("predict_slice")
2751 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2752 const int mb_h= s->b_height << s->block_max_depth;
2754 for(mb_y=0; mb_y<=mb_h; mb_y++)
2755 predict_slice(s, buf, plane_index, add, mb_y);
2758 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2760 Plane *p= &s->plane[plane_index];
2761 const int block_size = MB_SIZE >> s->block_max_depth;
2762 const int block_w = plane_index ? block_size/2 : block_size;
2763 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2764 const int obmc_stride= plane_index ? block_size : 2*block_size;
2765 const int ref_stride= s->current_picture.linesize[plane_index];
2766 uint8_t *src= s-> input_picture.data[plane_index];
2767 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2768 const int b_stride = s->b_width << s->block_max_depth;
2769 const int w= p->width;
2770 const int h= p->height;
2771 int index= mb_x + mb_y*b_stride;
2772 BlockNode *b= &s->block[index];
2773 BlockNode backup= *b;
2777 b->type|= BLOCK_INTRA;
2778 b->color[plane_index]= 0;
2779 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2782 int mb_x2= mb_x + (i &1) - 1;
2783 int mb_y2= mb_y + (i>>1) - 1;
2784 int x= block_w*mb_x2 + block_w/2;
2785 int y= block_w*mb_y2 + block_w/2;
2787 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2788 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2790 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2791 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2792 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2793 int obmc_v= obmc[index];
2795 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2796 if(x<0) obmc_v += obmc[index + block_w];
2797 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2798 if(x+block_w>w) obmc_v += obmc[index - block_w];
2799 //FIXME precalc this or simplify it somehow else
2801 d = -dst[index] + (1<<(FRAC_BITS-1));
2803 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2804 aa += obmc_v * obmc_v; //FIXME precalclate this
2810 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2813 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2814 const int b_stride = s->b_width << s->block_max_depth;
2815 const int b_height = s->b_height<< s->block_max_depth;
2816 int index= x + y*b_stride;
2817 const BlockNode *b = &s->block[index];
2818 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2819 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2820 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2821 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2823 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2824 // int my_context= av_log2(2*FFABS(left->my - top->my));
2826 if(x<0 || x>=b_stride || y>=b_height)
2833 00001XXXX 15-30 8-15
2835 //FIXME try accurate rate
2836 //FIXME intra and inter predictors if surrounding blocks arent the same type
2837 if(b->type & BLOCK_INTRA){
2838 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2839 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2840 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2842 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2845 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2846 + av_log2(2*FFABS(dmy))
2847 + av_log2(2*b->ref));
2851 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2852 Plane *p= &s->plane[plane_index];
2853 const int block_size = MB_SIZE >> s->block_max_depth;
2854 const int block_w = plane_index ? block_size/2 : block_size;
2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
2856 const int ref_stride= s->current_picture.linesize[plane_index];
2857 uint8_t *dst= s->current_picture.data[plane_index];
2858 uint8_t *src= s-> input_picture.data[plane_index];
2859 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2860 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2861 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2862 const int b_stride = s->b_width << s->block_max_depth;
2863 const int b_height = s->b_height<< s->block_max_depth;
2864 const int w= p->width;
2865 const int h= p->height;
2868 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2869 int sx= block_w*mb_x - block_w/2;
2870 int sy= block_w*mb_y - block_w/2;
2871 int x0= FFMAX(0,-sx);
2872 int y0= FFMAX(0,-sy);
2873 int x1= FFMIN(block_w*2, w-sx);
2874 int y1= FFMIN(block_w*2, h-sy);
2877 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2879 for(y=y0; y<y1; y++){
2880 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2881 const IDWTELEM *pred1 = pred + y*obmc_stride;
2882 uint8_t *cur1 = cur + y*ref_stride;
2883 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2884 for(x=x0; x<x1; x++){
2885 #if FRAC_BITS >= LOG2_OBMC_MAX
2886 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2888 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2890 v = (v + pred1[x]) >> FRAC_BITS;
2891 if(v&(~255)) v= ~(v>>31);
2896 /* copy the regions where obmc[] = (uint8_t)256 */
2897 if(LOG2_OBMC_MAX == 8
2898 && (mb_x == 0 || mb_x == b_stride-1)
2899 && (mb_y == 0 || mb_y == b_height-1)){
2908 for(y=y0; y<y1; y++)
2909 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2913 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2914 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2915 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2916 * so improving the score of one block is not strictly guaranteed to
2917 * improve the score of the whole frame, so iterative motion est
2918 * doesn't always converge. */
2919 if(s->avctx->me_cmp == FF_CMP_W97)
2920 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2921 else if(s->avctx->me_cmp == FF_CMP_W53)
2922 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2926 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2927 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2932 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2941 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2943 if(mb_x == b_stride-2)
2944 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2946 return distortion + rate*penalty_factor;
2949 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2951 Plane *p= &s->plane[plane_index];
2952 const int block_size = MB_SIZE >> s->block_max_depth;
2953 const int block_w = plane_index ? block_size/2 : block_size;
2954 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2955 const int obmc_stride= plane_index ? block_size : 2*block_size;
2956 const int ref_stride= s->current_picture.linesize[plane_index];
2957 uint8_t *dst= s->current_picture.data[plane_index];
2958 uint8_t *src= s-> input_picture.data[plane_index];
2959 static const IDWTELEM zero_dst[4096]; //FIXME
2960 const int b_stride = s->b_width << s->block_max_depth;
2961 const int w= p->width;
2962 const int h= p->height;
2965 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2968 int mb_x2= mb_x + (i%3) - 1;
2969 int mb_y2= mb_y + (i/3) - 1;
2970 int x= block_w*mb_x2 + block_w/2;
2971 int y= block_w*mb_y2 + block_w/2;
2973 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2974 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2976 //FIXME find a cleaner/simpler way to skip the outside stuff
2977 for(y2= y; y2<0; y2++)
2978 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2979 for(y2= h; y2<y+block_w; y2++)
2980 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2982 for(y2= y; y2<y+block_w; y2++)
2983 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2986 for(y2= y; y2<y+block_w; y2++)
2987 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2990 assert(block_w== 8 || block_w==16);
2991 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2995 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2996 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3004 rate = get_block_bits(s, mb_x, mb_y, 2);
3005 for(i=merged?4:0; i<9; i++){
3006 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3007 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3010 return distortion + rate*penalty_factor;
3013 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3014 const int b_stride= s->b_width << s->block_max_depth;
3015 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3016 BlockNode backup= *block;
3017 int rd, index, value;
3019 assert(mb_x>=0 && mb_y>=0);
3020 assert(mb_x<b_stride);
3023 block->color[0] = p[0];
3024 block->color[1] = p[1];
3025 block->color[2] = p[2];
3026 block->type |= BLOCK_INTRA;
3028 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3029 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3030 if(s->me_cache[index] == value)
3032 s->me_cache[index]= value;
3036 block->type &= ~BLOCK_INTRA;
3039 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3051 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3052 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3053 int p[2] = {p0, p1};
3054 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3057 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3058 const int b_stride= s->b_width << s->block_max_depth;
3059 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3060 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3061 int rd, index, value;
3063 assert(mb_x>=0 && mb_y>=0);
3064 assert(mb_x<b_stride);
3065 assert(((mb_x|mb_y)&1) == 0);
3067 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3068 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3069 if(s->me_cache[index] == value)
3071 s->me_cache[index]= value;
3076 block->type &= ~BLOCK_INTRA;
3077 block[1]= block[b_stride]= block[b_stride+1]= *block;
3079 rd= get_4block_rd(s, mb_x, mb_y, 0);
3086 block[0]= backup[0];
3087 block[1]= backup[1];
3088 block[b_stride]= backup[2];
3089 block[b_stride+1]= backup[3];
3094 static void iterative_me(SnowContext *s){
3095 int pass, mb_x, mb_y;
3096 const int b_width = s->b_width << s->block_max_depth;
3097 const int b_height= s->b_height << s->block_max_depth;
3098 const int b_stride= b_width;
3102 RangeCoder r = s->c;
3103 uint8_t state[sizeof(s->block_state)];
3104 memcpy(state, s->block_state, sizeof(s->block_state));
3105 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3106 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3107 encode_q_branch(s, 0, mb_x, mb_y);
3109 memcpy(s->block_state, state, sizeof(s->block_state));
3112 for(pass=0; pass<25; pass++){
3115 for(mb_y= 0; mb_y<b_height; mb_y++){
3116 for(mb_x= 0; mb_x<b_width; mb_x++){
3117 int dia_change, i, j, ref;
3118 int best_rd= INT_MAX, ref_rd;
3119 BlockNode backup, ref_b;
3120 const int index= mb_x + mb_y * b_stride;
3121 BlockNode *block= &s->block[index];
3122 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3123 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3124 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3125 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3126 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3127 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3128 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3129 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3130 const int b_w= (MB_SIZE >> s->block_max_depth);
3131 uint8_t obmc_edged[b_w*2][b_w*2];
3133 if(pass && (block->type & BLOCK_OPT))
3135 block->type |= BLOCK_OPT;
3139 if(!s->me_cache_generation)
3140 memset(s->me_cache, 0, sizeof(s->me_cache));
3141 s->me_cache_generation += 1<<22;
3146 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3148 for(y=0; y<b_w*2; y++)
3149 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3150 if(mb_x==b_stride-1)
3151 for(y=0; y<b_w*2; y++)
3152 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3154 for(x=0; x<b_w*2; x++)
3155 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3156 for(y=1; y<b_w; y++)
3157 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3159 if(mb_y==b_height-1){
3160 for(x=0; x<b_w*2; x++)
3161 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3162 for(y=b_w; y<b_w*2-1; y++)
3163 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3167 //skip stuff outside the picture
3168 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3170 uint8_t *src= s-> input_picture.data[0];
3171 uint8_t *dst= s->current_picture.data[0];
3172 const int stride= s->current_picture.linesize[0];
3173 const int block_w= MB_SIZE >> s->block_max_depth;
3174 const int sx= block_w*mb_x - block_w/2;
3175 const int sy= block_w*mb_y - block_w/2;
3176 const int w= s->plane[0].width;
3177 const int h= s->plane[0].height;
3181 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3182 for(y=h; y<sy+block_w*2; y++)
3183 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3185 for(y=sy; y<sy+block_w*2; y++)
3186 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3188 if(sx+block_w*2 > w){
3189 for(y=sy; y<sy+block_w*2; y++)
3190 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3194 // intra(black) = neighbors' contribution to the current block
3196 color[i]= get_dc(s, mb_x, mb_y, i);
3198 // get previous score (cannot be cached due to OBMC)
3199 if(pass > 0 && (block->type&BLOCK_INTRA)){
3200 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3201 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3203 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3207 for(ref=0; ref < s->ref_frames; ref++){
3208 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3209 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3214 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3215 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3217 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3219 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3221 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3223 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3226 //FIXME avoid subpel interpol / round to nearest integer
3229 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3231 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3232 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3234 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3240 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3243 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3245 //FIXME or try the standard 2 pass qpel or similar
3247 mvr[0][0]= block->mx;
3248 mvr[0][1]= block->my;
3249 if(ref_rd > best_rd){
3257 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3258 //FIXME RD style color selection
3260 if(!same_block(block, &backup)){
3261 if(tb ) tb ->type &= ~BLOCK_OPT;
3262 if(lb ) lb ->type &= ~BLOCK_OPT;
3263 if(rb ) rb ->type &= ~BLOCK_OPT;
3264 if(bb ) bb ->type &= ~BLOCK_OPT;
3265 if(tlb) tlb->type &= ~BLOCK_OPT;
3266 if(trb) trb->type &= ~BLOCK_OPT;
3267 if(blb) blb->type &= ~BLOCK_OPT;
3268 if(brb) brb->type &= ~BLOCK_OPT;
3273 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3278 if(s->block_max_depth == 1){
3280 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3281 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3283 int best_rd, init_rd;
3284 const int index= mb_x + mb_y * b_stride;
3287 b[0]= &s->block[index];
3289 b[2]= b[0]+b_stride;
3291 if(same_block(b[0], b[1]) &&
3292 same_block(b[0], b[2]) &&
3293 same_block(b[0], b[3]))
3296 if(!s->me_cache_generation)
3297 memset(s->me_cache, 0, sizeof(s->me_cache));
3298 s->me_cache_generation += 1<<22;
3300 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3302 //FIXME more multiref search?
3303 check_4block_inter(s, mb_x, mb_y,
3304 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3305 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3308 if(!(b[i]->type&BLOCK_INTRA))
3309 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3311 if(init_rd != best_rd)
3315 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3319 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3320 const int level= b->level;
3321 const int w= b->width;
3322 const int h= b->height;
3323 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3324 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3325 int x,y, thres1, thres2;
3328 if(s->qlog == LOSSLESS_QLOG){
3331 dst[x + y*stride]= src[x + y*stride];
3335 bias= bias ? 0 : (3*qmul)>>3;
3336 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3342 int i= src[x + y*stride];
3344 if((unsigned)(i+thres1) > thres2){
3347 i/= qmul; //FIXME optimize
3348 dst[x + y*stride]= i;
3352 i/= qmul; //FIXME optimize
3353 dst[x + y*stride]= -i;
3356 dst[x + y*stride]= 0;
3362 int i= src[x + y*stride];
3364 if((unsigned)(i+thres1) > thres2){
3367 i= (i + bias) / qmul; //FIXME optimize
3368 dst[x + y*stride]= i;
3372 i= (i + bias) / qmul; //FIXME optimize
3373 dst[x + y*stride]= -i;
3376 dst[x + y*stride]= 0;
3380 if(level+1 == s->spatial_decomposition_count){
3381 // STOP_TIMER("quantize")
3385 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3386 const int w= b->width;
3387 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3388 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3389 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3393 if(s->qlog == LOSSLESS_QLOG) return;
3395 for(y=start_y; y<end_y; y++){
3396 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3397 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3401 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3403 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3407 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3408 STOP_TIMER("dquant")
3412 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3413 const int w= b->width;
3414 const int h= b->height;
3415 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3416 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3417 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3421 if(s->qlog == LOSSLESS_QLOG) return;
3425 int i= src[x + y*stride];
3427 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3429 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3433 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3434 STOP_TIMER("dquant")
3438 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3439 const int w= b->width;
3440 const int h= b->height;
3443 for(y=h-1; y>=0; y--){
3444 for(x=w-1; x>=0; x--){
3445 int i= x + y*stride;
3449 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3450 else src[i] -= src[i - 1];
3452 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3453 else src[i] -= src[i - 1];
3456 if(y) src[i] -= src[i - stride];
3462 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3463 const int w= b->width;
3468 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3472 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3474 for(y=start_y; y<end_y; y++){
3476 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3477 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3481 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3482 else line[x] += line[x - 1];
3484 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3485 else line[x] += line[x - 1];
3488 if(y) line[x] += prev[x];
3493 // STOP_TIMER("correlate")
3496 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3497 const int w= b->width;
3498 const int h= b->height;
3503 int i= x + y*stride;
3507 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3508 else src[i] += src[i - 1];
3510 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3511 else src[i] += src[i - 1];
3514 if(y) src[i] += src[i - stride];
3520 static void encode_qlogs(SnowContext *s){
3521 int plane_index, level, orientation;
3523 for(plane_index=0; plane_index<2; plane_index++){
3524 for(level=0; level<s->spatial_decomposition_count; level++){
3525 for(orientation=level ? 1:0; orientation<4; orientation++){
3526 if(orientation==2) continue;
3527 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3533 static void encode_header(SnowContext *s){
3537 memset(kstate, MID_STATE, sizeof(kstate));
3539 put_rac(&s->c, kstate, s->keyframe);
3540 if(s->keyframe || s->always_reset){
3542 s->last_spatial_decomposition_type=
3546 s->last_block_max_depth= 0;
3547 for(plane_index=0; plane_index<2; plane_index++){
3548 Plane *p= &s->plane[plane_index];
3551 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3555 put_symbol(&s->c, s->header_state, s->version, 0);
3556 put_rac(&s->c, s->header_state, s->always_reset);
3557 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3558 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3559 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3560 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3561 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3562 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3563 put_rac(&s->c, s->header_state, s->spatial_scalability);
3564 // put_rac(&s->c, s->header_state, s->rate_scalability);
3565 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3572 for(plane_index=0; plane_index<2; plane_index++){
3573 Plane *p= &s->plane[plane_index];
3574 update_mc |= p->last_htaps != p->htaps;
3575 update_mc |= p->last_diag_mc != p->diag_mc;
3576 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3578 put_rac(&s->c, s->header_state, update_mc);
3580 for(plane_index=0; plane_index<2; plane_index++){
3581 Plane *p= &s->plane[plane_index];
3582 put_rac(&s->c, s->header_state, p->diag_mc);
3583 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3584 for(i= p->htaps/2; i; i--)
3585 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3587 p->last_diag_mc= p->diag_mc;
3588 p->last_htaps= p->htaps;
3589 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3592 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3593 put_rac(&s->c, s->header_state, 1);
3594 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3597 put_rac(&s->c, s->header_state, 0);
3600 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3601 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3602 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3603 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3604 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3606 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3607 s->last_qlog = s->qlog;
3608 s->last_qbias = s->qbias;
3609 s->last_mv_scale = s->mv_scale;
3610 s->last_block_max_depth = s->block_max_depth;
3611 s->last_spatial_decomposition_count= s->spatial_decomposition_count;
3614 static void decode_qlogs(SnowContext *s){
3615 int plane_index, level, orientation;
3617 for(plane_index=0; plane_index<3; plane_index++){
3618 for(level=0; level<s->spatial_decomposition_count; level++){
3619 for(orientation=level ? 1:0; orientation<4; orientation++){
3621 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3622 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3623 else q= get_symbol(&s->c, s->header_state, 1);
3624 s->plane[plane_index].band[level][orientation].qlog= q;
3630 static int decode_header(SnowContext *s){
3634 memset(kstate, MID_STATE, sizeof(kstate));
3636 s->keyframe= get_rac(&s->c, kstate);
3637 if(s->keyframe || s->always_reset){
3639 s->spatial_decomposition_type=
3643 s->block_max_depth= 0;
3646 s->version= get_symbol(&s->c, s->header_state, 0);
3648 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3651 s->always_reset= get_rac(&s->c, s->header_state);
3652 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3653 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3654 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3655 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3656 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3657 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3658 s->spatial_scalability= get_rac(&s->c, s->header_state);
3659 // s->rate_scalability= get_rac(&s->c, s->header_state);
3660 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3666 if(get_rac(&s->c, s->header_state)){
3667 for(plane_index=0; plane_index<2; plane_index++){
3668 int htaps, i, sum=0;
3669 Plane *p= &s->plane[plane_index];
3670 p->diag_mc= get_rac(&s->c, s->header_state);
3671 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3672 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3675 for(i= htaps/2; i; i--){
3676 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3677 sum += p->hcoeff[i];
3679 p->hcoeff[0]= 32-sum;
3681 s->plane[2].diag_mc= s->plane[1].diag_mc;
3682 s->plane[2].htaps = s->plane[1].htaps;
3683 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3685 if(get_rac(&s->c, s->header_state)){
3686 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3691 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3692 if(s->spatial_decomposition_type > 1){
3693 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3697 s->qlog += get_symbol(&s->c, s->header_state, 1);
3698 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3699 s->qbias += get_symbol(&s->c, s->header_state, 1);
3700 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3701 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3702 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3703 s->block_max_depth= 0;
3710 static void init_qexp(void){
3714 for(i=0; i<QROOT; i++){
3716 v *= pow(2, 1.0 / QROOT);
3720 static int common_init(AVCodecContext *avctx){
3721 SnowContext *s = avctx->priv_data;
3727 dsputil_init(&s->dsp, avctx);
3730 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3731 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3732 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3733 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3734 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3735 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3754 #define mcfh(dx,dy)\
3755 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3756 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3757 mc_block_hpel ## dx ## dy ## 16;\
3758 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3759 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3760 mc_block_hpel ## dx ## dy ## 8;
3770 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3772 width= s->avctx->width;
3773 height= s->avctx->height;
3775 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3776 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3778 for(i=0; i<MAX_REF_FRAMES; i++)
3779 for(j=0; j<MAX_REF_FRAMES; j++)
3780 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3782 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3787 static int common_init_after_header(AVCodecContext *avctx){
3788 SnowContext *s = avctx->priv_data;
3789 int plane_index, level, orientation;
3791 for(plane_index=0; plane_index<3; plane_index++){
3792 int w= s->avctx->width;
3793 int h= s->avctx->height;
3796 w>>= s->chroma_h_shift;
3797 h>>= s->chroma_v_shift;
3799 s->plane[plane_index].width = w;
3800 s->plane[plane_index].height= h;
3802 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3803 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3804 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3805 SubBand *b= &s->plane[plane_index].band[level][orientation];
3807 b->buf= s->spatial_dwt_buffer;
3809 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3810 b->width = (w + !(orientation&1))>>1;
3811 b->height= (h + !(orientation>1))>>1;
3813 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3814 b->buf_x_offset = 0;
3815 b->buf_y_offset = 0;
3819 b->buf_x_offset = (w+1)>>1;
3822 b->buf += b->stride>>1;
3823 b->buf_y_offset = b->stride_line >> 1;
3825 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3828 b->parent= &s->plane[plane_index].band[level-1][orientation];
3829 //FIXME avoid this realloc
3830 av_freep(&b->x_coeff);
3831 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3841 static int qscale2qlog(int qscale){
3842 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3843 + 61*QROOT/8; //<64 >60
3846 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3848 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3849 * FIXME we know exact mv bits at this point,
3850 * but ratecontrol isn't set up to include them. */
3851 uint32_t coef_sum= 0;
3852 int level, orientation, delta_qlog;
3854 for(level=0; level<s->spatial_decomposition_count; level++){
3855 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3856 SubBand *b= &s->plane[0].band[level][orientation];
3857 IDWTELEM *buf= b->ibuf;
3858 const int w= b->width;
3859 const int h= b->height;
3860 const int stride= b->stride;
3861 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3862 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3863 const int qdiv= (1<<16)/qmul;
3865 //FIXME this is ugly
3868 buf[x+y*stride]= b->buf[x+y*stride];
3870 decorrelate(s, b, buf, stride, 1, 0);
3873 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3877 /* ugly, ratecontrol just takes a sqrt again */
3878 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3879 assert(coef_sum < INT_MAX);
3881 if(pict->pict_type == I_TYPE){
3882 s->m.current_picture.mb_var_sum= coef_sum;
3883 s->m.current_picture.mc_mb_var_sum= 0;
3885 s->m.current_picture.mc_mb_var_sum= coef_sum;
3886 s->m.current_picture.mb_var_sum= 0;
3889 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3890 if (pict->quality < 0)
3892 s->lambda= pict->quality * 3/2;
3893 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3894 s->qlog+= delta_qlog;
3898 static void calculate_visual_weight(SnowContext *s, Plane *p){
3899 int width = p->width;
3900 int height= p->height;
3901 int level, orientation, x, y;
3903 for(level=0; level<s->spatial_decomposition_count; level++){
3904 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3905 SubBand *b= &p->band[level][orientation];
3906 IDWTELEM *ibuf= b->ibuf;
3909 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3910 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3911 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3912 for(y=0; y<height; y++){
3913 for(x=0; x<width; x++){
3914 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3919 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3920 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3930 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3931 SubBand *b= &p->band[level][orientation];
3935 int step= 1 << (s->spatial_decomposition_count - level);
3942 //FIXME bias for non zero ?
3944 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3945 for(y=0; y<p->height; y++){
3946 for(x=0; x<p->width; x++){
3947 int sx= (x-xo + step/2) / step / Q2_STEP;
3948 int sy= (y-yo + step/2) / step / Q2_STEP;
3949 int v= r0[x + y*p->width] - r1[x + y*p->width];
3950 assert(sx>=0 && sy>=0 && sx < score_stride);
3952 score[sx + sy*score_stride] += v*v;
3953 assert(score[sx + sy*score_stride] >= 0);
3958 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3959 int level, orientation;
3961 for(level=0; level<s->spatial_decomposition_count; level++){
3962 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3963 SubBand *b= &p->band[level][orientation];
3964 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3966 dequantize(s, b, dst, b->stride);
3971 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3972 int level, orientation, ys, xs, x, y, pass;
3973 IDWTELEM best_dequant[height * stride];
3974 IDWTELEM idwt2_buffer[height * stride];
3975 const int score_stride= (width + 10)/Q2_STEP;
3976 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3977 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3978 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3980 //FIXME pass the copy cleanly ?
3982 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3983 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3985 for(level=0; level<s->spatial_decomposition_count; level++){
3986 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3987 SubBand *b= &p->band[level][orientation];
3988 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3989 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3990 assert(src == b->buf); // code doesnt depen on this but its true currently
3992 quantize(s, b, dst, src, b->stride, s->qbias);
3995 for(pass=0; pass<1; pass++){
3996 if(s->qbias == 0) //keyframe
3998 for(level=0; level<s->spatial_decomposition_count; level++){
3999 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4000 SubBand *b= &p->band[level][orientation];
4001 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
4002 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
4004 for(ys= 0; ys<Q2_STEP; ys++){
4005 for(xs= 0; xs<Q2_STEP; xs++){
4006 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
4007 dequantize_all(s, p, idwt2_buffer, width, height);
4008 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
4009 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
4010 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
4011 for(y=ys; y<b->height; y+= Q2_STEP){
4012 for(x=xs; x<b->width; x+= Q2_STEP){
4013 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
4014 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
4015 //FIXME try more then just --
4018 dequantize_all(s, p, idwt2_buffer, width, height);
4019 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
4020 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
4021 for(y=ys; y<b->height; y+= Q2_STEP){
4022 for(x=xs; x<b->width; x+= Q2_STEP){
4023 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
4024 if(score[score_idx] <= best_score[score_idx] + threshold){
4025 best_score[score_idx]= score[score_idx];
4026 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
4027 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
4028 //FIXME copy instead
4037 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly insteda of copy at the end
4042 static int encode_init(AVCodecContext *avctx)
4044 SnowContext *s = avctx->priv_data;
4047 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
4048 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
4049 "use vstrict=-2 / -strict -2 to use it anyway\n");
4053 if(avctx->prediction_method == DWT_97
4054 && (avctx->flags & CODEC_FLAG_QSCALE)
4055 && avctx->global_quality == 0){
4056 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
4060 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
4062 s->chroma_h_shift= 1; //FIXME XXX
4063 s->chroma_v_shift= 1;
4065 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
4066 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4068 for(plane_index=0; plane_index<3; plane_index++){
4069 s->plane[plane_index].diag_mc= 1;
4070 s->plane[plane_index].htaps= 6;
4071 s->plane[plane_index].hcoeff[0]= 40;
4072 s->plane[plane_index].hcoeff[1]= -10;
4073 s->plane[plane_index].hcoeff[2]= 2;
4074 s->plane[plane_index].fast_mc= 1;
4083 s->m.flags = avctx->flags;
4084 s->m.bit_rate= avctx->bit_rate;
4086 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4087 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4088 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4089 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4090 h263_encode_init(&s->m); //mv_penalty
4092 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4094 if(avctx->flags&CODEC_FLAG_PASS1){
4095 if(!avctx->stats_out)
4096 avctx->stats_out = av_mallocz(256);
4098 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4099 if(ff_rate_control_init(&s->m) < 0)
4102 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4104 avctx->coded_frame= &s->current_picture;
4105 switch(avctx->pix_fmt){
4106 // case PIX_FMT_YUV444P:
4107 // case PIX_FMT_YUV422P:
4108 case PIX_FMT_YUV420P:
4110 // case PIX_FMT_YUV411P:
4111 // case PIX_FMT_YUV410P:
4112 s->colorspace_type= 0;
4114 /* case PIX_FMT_RGB32:
4118 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4121 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4122 s->chroma_h_shift= 1;
4123 s->chroma_v_shift= 1;
4125 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4126 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4128 s->avctx->get_buffer(s->avctx, &s->input_picture);
4130 if(s->avctx->me_method == ME_ITER){
4132 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4133 for(i=0; i<s->max_ref_frames; i++){
4134 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4135 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4142 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4145 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4149 int w= s->avctx->width >>is_chroma;
4150 int h= s->avctx->height >>is_chroma;
4151 int ls= frame->linesize[p];
4152 uint8_t *src= frame->data[p];
4154 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4155 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4156 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4163 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4170 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4178 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4186 static int frame_start(SnowContext *s){
4188 int w= s->avctx->width; //FIXME round up to x16 ?
4189 int h= s->avctx->height;
4191 if(s->current_picture.data[0]){
4192 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4193 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4194 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4197 tmp= s->last_picture[s->max_ref_frames-1];
4198 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4199 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4200 #ifdef USE_HALFPEL_PLANE
4201 if(s->current_picture.data[0])
4202 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4204 s->last_picture[0]= s->current_picture;
4205 s->current_picture= tmp;
4211 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4212 if(i && s->last_picture[i-1].key_frame)
4217 s->current_picture.reference= 1;
4218 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4219 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4223 s->current_picture.key_frame= s->keyframe;
4228 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4229 SnowContext *s = avctx->priv_data;
4230 RangeCoder * const c= &s->c;
4231 AVFrame *pict = data;
4232 const int width= s->avctx->width;
4233 const int height= s->avctx->height;
4234 int level, orientation, plane_index, i, y;
4235 uint8_t rc_header_bak[sizeof(s->header_state)];
4236 uint8_t rc_block_bak[sizeof(s->block_state)];
4238 ff_init_range_encoder(c, buf, buf_size);
4239 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4243 for(y=0; y<(height>>shift); y++)
4244 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4245 &pict->data[i][y * pict->linesize[i]],
4248 s->new_picture = *pict;
4250 s->m.picture_number= avctx->frame_number;
4251 if(avctx->flags&CODEC_FLAG_PASS2){
4253 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4254 s->keyframe= pict->pict_type==FF_I_TYPE;
4255 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4256 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4257 if (pict->quality < 0)
4261 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4263 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4266 if(s->pass1_rc && avctx->frame_number == 0)
4267 pict->quality= 2*FF_QP2LAMBDA;
4269 s->qlog= qscale2qlog(pict->quality);
4270 s->lambda = pict->quality * 3/2;
4272 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4273 s->qlog= LOSSLESS_QLOG;
4275 }//else keep previous frame's qlog until after motion est
4279 s->m.current_picture_ptr= &s->m.current_picture;
4280 if(pict->pict_type == P_TYPE){
4281 int block_width = (width +15)>>4;
4282 int block_height= (height+15)>>4;
4283 int stride= s->current_picture.linesize[0];
4285 assert(s->current_picture.data[0]);
4286 assert(s->last_picture[0].data[0]);
4288 s->m.avctx= s->avctx;
4289 s->m.current_picture.data[0]= s->current_picture.data[0];
4290 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4291 s->m. new_picture.data[0]= s-> input_picture.data[0];
4292 s->m. last_picture_ptr= &s->m. last_picture;
4294 s->m. last_picture.linesize[0]=
4295 s->m. new_picture.linesize[0]=
4296 s->m.current_picture.linesize[0]= stride;
4297 s->m.uvlinesize= s->current_picture.linesize[1];
4299 s->m.height= height;
4300 s->m.mb_width = block_width;
4301 s->m.mb_height= block_height;
4302 s->m.mb_stride= s->m.mb_width+1;
4303 s->m.b8_stride= 2*s->m.mb_width+1;
4305 s->m.pict_type= pict->pict_type;
4306 s->m.me_method= s->avctx->me_method;
4307 s->m.me.scene_change_score=0;
4308 s->m.flags= s->avctx->flags;
4309 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4310 s->m.out_format= FMT_H263;
4311 s->m.unrestricted_mv= 1;
4313 s->m.lambda = s->lambda;
4314 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4315 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4317 s->m.dsp= s->dsp; //move
4323 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4324 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4329 if(pict->pict_type == I_TYPE)
4330 s->spatial_decomposition_count= 5;
4332 s->spatial_decomposition_count= 5;
4334 s->m.pict_type = pict->pict_type;
4335 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4337 common_init_after_header(avctx);
4339 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4340 for(plane_index=0; plane_index<3; plane_index++){
4341 calculate_visual_weight(s, &s->plane[plane_index]);
4346 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4347 encode_blocks(s, 1);
4348 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4350 for(plane_index=0; plane_index<3; plane_index++){
4351 Plane *p= &s->plane[plane_index];
4355 // int bits= put_bits_count(&s->c.pb);
4357 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4359 if(pict->data[plane_index]) //FIXME gray hack
4362 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4365 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4368 && pict->pict_type == P_TYPE
4369 && !(avctx->flags&CODEC_FLAG_PASS2)
4370 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4371 ff_init_range_encoder(c, buf, buf_size);
4372 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4373 pict->pict_type= FF_I_TYPE;
4375 s->current_picture.key_frame=1;
4379 if(s->qlog == LOSSLESS_QLOG){
4382 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4388 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4394 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4396 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4398 if(s->pass1_rc && plane_index==0){
4399 int delta_qlog = ratecontrol_1pass(s, pict);
4400 if (delta_qlog <= INT_MIN)
4403 //reordering qlog in the bitstream would eliminate this reset
4404 ff_init_range_encoder(c, buf, buf_size);
4405 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4406 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4408 encode_blocks(s, 0);
4412 for(level=0; level<s->spatial_decomposition_count; level++){
4413 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4414 SubBand *b= &p->band[level][orientation];
4417 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4419 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4420 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4421 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4423 correlate(s, b, b->ibuf, b->stride, 1, 0);
4426 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4428 for(level=0; level<s->spatial_decomposition_count; level++){
4429 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4430 SubBand *b= &p->band[level][orientation];
4432 dequantize(s, b, b->ibuf, b->stride);
4436 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4437 if(s->qlog == LOSSLESS_QLOG){
4440 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4445 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4446 STOP_TIMER("pred-conv")}
4449 if(pict->pict_type == I_TYPE){
4452 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4453 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4457 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4458 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4461 if(s->avctx->flags&CODEC_FLAG_PSNR){
4464 if(pict->data[plane_index]) //FIXME gray hack
4467 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4471 s->avctx->error[plane_index] += error;
4472 s->current_picture.error[plane_index] = error;
4476 if(s->last_picture[s->max_ref_frames-1].data[0]){
4477 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4479 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4480 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4483 s->current_picture.coded_picture_number = avctx->frame_number;
4484 s->current_picture.pict_type = pict->pict_type;
4485 s->current_picture.quality = pict->quality;
4486 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4487 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4488 s->m.current_picture.display_picture_number =
4489 s->m.current_picture.coded_picture_number = avctx->frame_number;
4490 s->m.current_picture.quality = pict->quality;
4491 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4493 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4495 if(avctx->flags&CODEC_FLAG_PASS1)
4496 ff_write_pass1_stats(&s->m);
4497 s->m.last_pict_type = s->m.pict_type;
4498 avctx->frame_bits = s->m.frame_bits;
4499 avctx->mv_bits = s->m.mv_bits;
4500 avctx->misc_bits = s->m.misc_bits;
4501 avctx->p_tex_bits = s->m.p_tex_bits;
4505 return ff_rac_terminate(c);
4508 static void common_end(SnowContext *s){
4509 int plane_index, level, orientation, i;
4511 av_freep(&s->spatial_dwt_buffer);
4512 av_freep(&s->spatial_idwt_buffer);
4514 av_freep(&s->m.me.scratchpad);
4515 av_freep(&s->m.me.map);
4516 av_freep(&s->m.me.score_map);
4517 av_freep(&s->m.obmc_scratchpad);
4519 av_freep(&s->block);
4521 for(i=0; i<MAX_REF_FRAMES; i++){
4522 av_freep(&s->ref_mvs[i]);
4523 av_freep(&s->ref_scores[i]);
4524 if(s->last_picture[i].data[0])
4525 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4528 for(plane_index=0; plane_index<3; plane_index++){
4529 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4530 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4531 SubBand *b= &s->plane[plane_index].band[level][orientation];
4533 av_freep(&b->x_coeff);
4539 static int encode_end(AVCodecContext *avctx)
4541 SnowContext *s = avctx->priv_data;
4544 av_free(avctx->stats_out);
4549 static int decode_init(AVCodecContext *avctx)
4551 avctx->pix_fmt= PIX_FMT_YUV420P;
4558 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4559 SnowContext *s = avctx->priv_data;
4560 RangeCoder * const c= &s->c;
4562 AVFrame *picture = data;
4563 int level, orientation, plane_index, i;
4565 ff_init_range_decoder(c, buf, buf_size);
4566 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4568 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4569 if(decode_header(s)<0)
4571 common_init_after_header(avctx);
4573 // realloc slice buffer for the case that spatial_decomposition_count changed
4574 slice_buffer_destroy(&s->sb);
4575 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4577 for(plane_index=0; plane_index<3; plane_index++){
4578 Plane *p= &s->plane[plane_index];
4579 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4580 && p->hcoeff[1]==-10
4584 if(!s->block) alloc_blocks(s);
4587 //keyframe flag dupliaction mess FIXME
4588 if(avctx->debug&FF_DEBUG_PICT_INFO)
4589 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4593 for(plane_index=0; plane_index<3; plane_index++){
4594 Plane *p= &s->plane[plane_index];
4598 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4600 if(s->avctx->debug&2048){
4601 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4602 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4606 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4607 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4613 for(level=0; level<s->spatial_decomposition_count; level++){
4614 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4615 SubBand *b= &p->band[level][orientation];
4616 unpack_coeffs(s, b, b->parent, orientation);
4619 STOP_TIMER("unpack coeffs");
4623 const int mb_h= s->b_height << s->block_max_depth;
4624 const int block_size = MB_SIZE >> s->block_max_depth;
4625 const int block_w = plane_index ? block_size/2 : block_size;
4627 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4632 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4633 for(mb_y=0; mb_y<=mb_h; mb_y++){
4635 int slice_starty = block_w*mb_y;
4636 int slice_h = block_w*(mb_y+1);
4637 if (!(s->keyframe || s->avctx->debug&512)){
4638 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4639 slice_h -= (block_w >> 1);
4644 for(level=0; level<s->spatial_decomposition_count; level++){
4645 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4646 SubBand *b= &p->band[level][orientation];
4649 int our_mb_start = mb_y;
4650 int our_mb_end = (mb_y + 1);
4652 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4653 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4654 if (!(s->keyframe || s->avctx->debug&512)){
4655 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4656 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4658 start_y = FFMIN(b->height, start_y);
4659 end_y = FFMIN(b->height, end_y);
4661 if (start_y != end_y){
4662 if (orientation == 0){
4663 SubBand * correlate_band = &p->band[0][0];
4664 int correlate_end_y = FFMIN(b->height, end_y + 1);
4665 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4666 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4667 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4668 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4671 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4675 STOP_TIMER("decode_subband_slice");
4679 for(; yd<slice_h; yd+=4){
4680 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4682 STOP_TIMER("idwt slice");}
4685 if(s->qlog == LOSSLESS_QLOG){
4686 for(; yq<slice_h && yq<h; yq++){
4687 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4689 line[x] <<= FRAC_BITS;
4694 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4696 y = FFMIN(p->height, slice_starty);
4697 end_y = FFMIN(p->height, slice_h);
4699 slice_buffer_release(&s->sb, y++);
4702 slice_buffer_flush(&s->sb);
4704 STOP_TIMER("idwt + predict_slices")}
4709 if(s->last_picture[s->max_ref_frames-1].data[0]){
4710 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4712 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4713 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4716 if(!(s->avctx->debug&2048))
4717 *picture= s->current_picture;
4719 *picture= s->mconly_picture;
4721 *data_size = sizeof(AVFrame);
4723 bytes_read= c->bytestream - c->bytestream_start;
4724 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4729 static int decode_end(AVCodecContext *avctx)
4731 SnowContext *s = avctx->priv_data;
4733 slice_buffer_destroy(&s->sb);
4740 AVCodec snow_decoder = {
4744 sizeof(SnowContext),
4749 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4753 #ifdef CONFIG_SNOW_ENCODER
4754 AVCodec snow_encoder = {
4758 sizeof(SnowContext),
4775 int buffer[2][width*height];
4778 s.spatial_decomposition_count=6;
4779 s.spatial_decomposition_type=1;
4781 printf("testing 5/3 DWT\n");
4782 for(i=0; i<width*height; i++)
4783 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4785 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4786 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4788 for(i=0; i<width*height; i++)
4789 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4791 printf("testing 9/7 DWT\n");
4792 s.spatial_decomposition_type=0;
4793 for(i=0; i<width*height; i++)
4794 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4796 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4797 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4799 for(i=0; i<width*height; i++)
4800 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4803 printf("testing AC coder\n");
4804 memset(s.header_state, 0, sizeof(s.header_state));
4805 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4806 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4808 for(i=-256; i<256; i++){
4810 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4811 STOP_TIMER("put_symbol")
4813 ff_rac_terminate(&s.c);
4815 memset(s.header_state, 0, sizeof(s.header_state));
4816 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4817 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4819 for(i=-256; i<256; i++){
4822 j= get_symbol(&s.c, s.header_state, 1);
4823 STOP_TIMER("get_symbol")
4824 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4828 int level, orientation, x, y;
4829 int64_t errors[8][4];
4832 memset(errors, 0, sizeof(errors));
4833 s.spatial_decomposition_count=3;
4834 s.spatial_decomposition_type=0;
4835 for(level=0; level<s.spatial_decomposition_count; level++){
4836 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4837 int w= width >> (s.spatial_decomposition_count-level);
4838 int h= height >> (s.spatial_decomposition_count-level);
4839 int stride= width << (s.spatial_decomposition_count-level);
4840 DWTELEM *buf= buffer[0];
4843 if(orientation&1) buf+=w;
4844 if(orientation>1) buf+=stride>>1;
4846 memset(buffer[0], 0, sizeof(int)*width*height);
4847 buf[w/2 + h/2*stride]= 256*256;
4848 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4849 for(y=0; y<height; y++){
4850 for(x=0; x<width; x++){
4851 int64_t d= buffer[0][x + y*width];
4853 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4855 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4857 error= (int)(sqrt(error)+0.5);
4858 errors[level][orientation]= error;
4859 if(g) g=ff_gcd(g, error);
4863 printf("static int const visual_weight[][4]={\n");
4864 for(level=0; level<s.spatial_decomposition_count; level++){
4866 for(orientation=0; orientation<4; orientation++){
4867 printf("%8"PRId64",", errors[level][orientation]/g);
4875 int w= width >> (s.spatial_decomposition_count-level);
4876 int h= height >> (s.spatial_decomposition_count-level);
4877 int stride= width << (s.spatial_decomposition_count-level);
4878 DWTELEM *buf= buffer[0];
4884 memset(buffer[0], 0, sizeof(int)*width*height);
4886 for(y=0; y<height; y++){
4887 for(x=0; x<width; x++){
4888 int tab[4]={0,2,3,1};
4889 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4892 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4896 buf[x + y*stride ]=169;
4897 buf[x + y*stride-w]=64;
4900 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4902 for(y=0; y<height; y++){
4903 for(x=0; x<width; x++){
4904 int64_t d= buffer[0][x + y*width];
4906 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4908 if(FFABS(height/2-y)<9) printf("\n");