2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
181 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
182 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
183 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
184 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
185 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
186 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
187 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
188 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
189 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
190 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
191 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
192 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
193 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
194 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
197 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
198 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
199 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
200 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
201 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
202 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
203 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
204 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
205 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
206 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
207 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
208 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
209 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
215 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
216 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
217 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
218 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
219 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
220 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
221 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
224 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
225 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
226 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
227 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
228 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
229 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
292 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
293 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
294 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
295 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
296 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
297 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
298 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
299 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
300 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
301 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
302 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
303 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
304 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
307 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
308 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
309 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
310 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
311 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
312 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
313 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
314 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
315 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
316 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
317 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
318 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
325 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
326 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
327 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
328 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
329 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
330 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
331 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
334 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
335 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
336 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
337 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
338 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
339 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 int8_t hcoeff[HTAPS_MAX/2];
431 int8_t last_hcoeff[HTAPS_MAX/2];
435 typedef struct SnowContext{
436 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int temporal_decomposition_count;
460 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
461 uint32_t *ref_scores[MAX_REF_FRAMES];
462 DWTELEM *spatial_dwt_buffer;
463 IDWTELEM *spatial_idwt_buffer;
467 int spatial_scalability;
477 #define QBIAS_SHIFT 3
481 int last_block_max_depth;
482 Plane plane[MAX_PLANES];
484 #define ME_CACHE_SIZE 1024
485 int me_cache[ME_CACHE_SIZE];
486 int me_cache_generation;
489 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
500 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
501 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
503 static void iterative_me(SnowContext *s);
505 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
509 buf->base_buffer = base_buffer;
510 buf->line_count = line_count;
511 buf->line_width = line_width;
512 buf->data_count = max_allocated_lines;
513 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
514 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
516 for (i = 0; i < max_allocated_lines; i++)
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 buf->data_stack_top = max_allocated_lines - 1;
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
529 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
534 return buf->line[line];
536 offset = buf->line_width * line;
537 buffer = buf->data_stack[buf->data_stack_top];
538 buf->data_stack_top--;
539 buf->line[line] = buffer;
541 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
546 static void slice_buffer_release(slice_buffer * buf, int line)
551 assert(line >= 0 && line < buf->line_count);
552 assert(buf->line[line]);
554 offset = buf->line_width * line;
555 buffer = buf->line[line];
556 buf->data_stack_top++;
557 buf->data_stack[buf->data_stack_top] = buffer;
558 buf->line[line] = NULL;
560 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
563 static void slice_buffer_flush(slice_buffer * buf)
566 for (i = 0; i < buf->line_count; i++)
570 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
571 slice_buffer_release(buf, i);
576 static void slice_buffer_destroy(slice_buffer * buf)
579 slice_buffer_flush(buf);
581 for (i = buf->data_count - 1; i >= 0; i--)
583 assert(buf->data_stack[i]);
584 av_freep(&buf->data_stack[i]);
586 assert(buf->data_stack);
587 av_freep(&buf->data_stack);
589 av_freep(&buf->line);
593 // Avoid a name clash on SGI IRIX
596 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
597 static uint8_t qexp[QROOT];
599 static inline int mirror(int v, int m){
600 while((unsigned)v > (unsigned)m){
607 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
611 const int a= FFABS(v);
612 const int e= av_log2(a);
614 const int el= FFMIN(e, 10);
615 put_rac(c, state+0, 0);
618 put_rac(c, state+1+i, 1); //1..10
621 put_rac(c, state+1+9, 1); //1..10
623 put_rac(c, state+1+FFMIN(i,9), 0);
625 for(i=e-1; i>=el; i--){
626 put_rac(c, state+22+9, (a>>i)&1); //22..31
629 put_rac(c, state+22+i, (a>>i)&1); //22..31
633 put_rac(c, state+11 + el, v < 0); //11..21
636 put_rac(c, state+0, 0);
639 put_rac(c, state+1+i, 1); //1..10
641 put_rac(c, state+1+i, 0);
643 for(i=e-1; i>=0; i--){
644 put_rac(c, state+22+i, (a>>i)&1); //22..31
648 put_rac(c, state+11 + e, v < 0); //11..21
651 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
653 put_rac(c, state+1+FFMIN(i,9), 0);
655 for(i=e-1; i>=0; i--){
656 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
660 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
664 put_rac(c, state+0, 1);
668 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
669 if(get_rac(c, state+0))
674 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
679 for(i=e-1; i>=0; i--){
680 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
683 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
690 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
692 int r= log2>=0 ? 1<<log2 : 1;
698 put_rac(c, state+4+log2, 1);
703 put_rac(c, state+4+log2, 0);
705 for(i=log2-1; i>=0; i--){
706 put_rac(c, state+31-i, (v>>i)&1);
710 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
712 int r= log2>=0 ? 1<<log2 : 1;
717 while(get_rac(c, state+4+log2)){
723 for(i=log2-1; i>=0; i--){
724 v+= get_rac(c, state+31-i)<<i;
730 static av_always_inline void
731 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
732 int dst_step, int src_step, int ref_step,
733 int width, int mul, int add, int shift,
734 int highpass, int inverse){
735 const int mirror_left= !highpass;
736 const int mirror_right= (width&1) ^ highpass;
737 const int w= (width>>1) - 1 + (highpass & width);
740 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
742 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
749 LIFT(src[i*src_step],
750 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
756 LIFT(src[w*src_step],
757 ((mul*2*ref[w*ref_step]+add)>>shift),
762 static av_always_inline void
763 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
764 int dst_step, int src_step, int ref_step,
765 int width, int mul, int add, int shift,
766 int highpass, int inverse){
767 const int mirror_left= !highpass;
768 const int mirror_right= (width&1) ^ highpass;
769 const int w= (width>>1) - 1 + (highpass & width);
772 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
774 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
781 LIFT(src[i*src_step],
782 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
788 LIFT(src[w*src_step],
789 ((mul*2*ref[w*ref_step]+add)>>shift),
795 static av_always_inline void
796 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
797 int dst_step, int src_step, int ref_step,
798 int width, int mul, int add, int shift,
799 int highpass, int inverse){
800 const int mirror_left= !highpass;
801 const int mirror_right= (width&1) ^ highpass;
802 const int w= (width>>1) - 1 + (highpass & width);
806 #define LIFTS(src, ref, inv) \
808 (src) + (((ref) + 4*(src))>>shift): \
809 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
811 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
818 LIFTS(src[i*src_step],
819 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
825 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
828 static av_always_inline void
829 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
830 int dst_step, int src_step, int ref_step,
831 int width, int mul, int add, int shift,
832 int highpass, int inverse){
833 const int mirror_left= !highpass;
834 const int mirror_right= (width&1) ^ highpass;
835 const int w= (width>>1) - 1 + (highpass & width);
839 #define LIFTS(src, ref, inv) \
841 (src) + (((ref) + 4*(src))>>shift): \
842 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
844 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
851 LIFTS(src[i*src_step],
852 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
858 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
863 static void horizontal_decompose53i(DWTELEM *b, int width){
865 const int width2= width>>1;
867 const int w2= (width+1)>>1;
869 for(x=0; x<width2; x++){
871 temp[x+w2]= b[2*x + 1];
885 for(x=1; x+1<width2; x+=2){
889 A2 += (A1 + A3 + 2)>>2;
893 A1= temp[x+1+width2];
896 A4 += (A1 + A3 + 2)>>2;
902 A2 += (A1 + A3 + 2)>>2;
907 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
908 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
912 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
915 for(i=0; i<width; i++){
916 b1[i] -= (b0[i] + b2[i])>>1;
920 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
923 for(i=0; i<width; i++){
924 b1[i] += (b0[i] + b2[i] + 2)>>2;
928 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
930 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
931 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
933 for(y=-2; y<height; y+=2){
934 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
935 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
938 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
939 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
940 STOP_TIMER("horizontal_decompose53i")}
943 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
944 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
945 STOP_TIMER("vertical_decompose53i*")}
952 static void horizontal_decompose97i(DWTELEM *b, int width){
954 const int w2= (width+1)>>1;
956 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
957 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
958 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
959 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
963 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
966 for(i=0; i<width; i++){
967 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
971 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
974 for(i=0; i<width; i++){
975 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
979 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
982 for(i=0; i<width; i++){
984 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
986 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
991 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
994 for(i=0; i<width; i++){
995 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
999 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1001 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1002 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1003 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1004 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1006 for(y=-4; y<height; y+=2){
1007 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1008 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1011 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1012 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1014 STOP_TIMER("horizontal_decompose97i")
1018 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1019 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1020 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1021 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1024 STOP_TIMER("vertical_decompose97i")
1034 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1037 for(level=0; level<decomposition_count; level++){
1039 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1040 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1045 static void horizontal_compose53i(IDWTELEM *b, int width){
1046 IDWTELEM temp[width];
1047 const int width2= width>>1;
1048 const int w2= (width+1)>>1;
1060 for(x=1; x+1<width2; x+=2){
1064 A2 += (A1 + A3 + 2)>>2;
1068 A1= temp[x+1+width2];
1071 A4 += (A1 + A3 + 2)>>2;
1077 A2 += (A1 + A3 + 2)>>2;
1081 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1082 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1084 for(x=0; x<width2; x++){
1086 b[2*x + 1]= temp[x+w2];
1092 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1095 for(i=0; i<width; i++){
1096 b1[i] += (b0[i] + b2[i])>>1;
1100 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1103 for(i=0; i<width; i++){
1104 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1108 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1109 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1110 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1114 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1115 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1116 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1120 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1123 IDWTELEM *b0= cs->b0;
1124 IDWTELEM *b1= cs->b1;
1125 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1126 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1129 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1130 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1131 STOP_TIMER("vertical_compose53i*")}
1134 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1135 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1136 STOP_TIMER("horizontal_compose53i")}
1143 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1145 IDWTELEM *b0= cs->b0;
1146 IDWTELEM *b1= cs->b1;
1147 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1148 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1151 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1152 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1153 STOP_TIMER("vertical_compose53i*")}
1156 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1157 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1158 STOP_TIMER("horizontal_compose53i")}
1165 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1167 spatial_compose53i_init(&cs, buffer, height, stride);
1168 while(cs.y <= height)
1169 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1173 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1174 IDWTELEM temp[width];
1175 const int w2= (width+1)>>1;
1177 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1178 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1179 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1180 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1183 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1186 for(i=0; i<width; i++){
1187 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1191 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1194 for(i=0; i<width; i++){
1195 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1199 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1202 for(i=0; i<width; i++){
1204 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1206 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1211 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1214 for(i=0; i<width; i++){
1215 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1219 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1222 for(i=0; i<width; i++){
1223 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1224 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1226 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1228 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1230 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1234 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1235 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1236 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1237 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1238 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1242 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1243 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1244 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1245 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1246 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1250 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1253 IDWTELEM *b0= cs->b0;
1254 IDWTELEM *b1= cs->b1;
1255 IDWTELEM *b2= cs->b2;
1256 IDWTELEM *b3= cs->b3;
1257 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1258 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1261 if(y>0 && y+4<height){
1262 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1264 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1265 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1266 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1267 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1270 STOP_TIMER("vertical_compose97i")}}
1273 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1274 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1275 if(width>400 && y+0<(unsigned)height){
1276 STOP_TIMER("horizontal_compose97i")}}
1285 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1287 IDWTELEM *b0= cs->b0;
1288 IDWTELEM *b1= cs->b1;
1289 IDWTELEM *b2= cs->b2;
1290 IDWTELEM *b3= cs->b3;
1291 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1292 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1295 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1296 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1297 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1298 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1300 STOP_TIMER("vertical_compose97i")}}
1303 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1304 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1305 if(width>400 && b0 <= b2){
1306 STOP_TIMER("horizontal_compose97i")}}
1315 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1317 spatial_compose97i_init(&cs, buffer, height, stride);
1318 while(cs.y <= height)
1319 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1322 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1324 for(level=decomposition_count-1; level>=0; level--){
1326 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1327 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1332 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1334 for(level=decomposition_count-1; level>=0; level--){
1336 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1337 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1342 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1343 const int support = type==1 ? 3 : 5;
1347 for(level=decomposition_count-1; level>=0; level--){
1348 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1350 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1352 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1359 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1360 const int support = type==1 ? 3 : 5;
1364 for(level=decomposition_count-1; level>=0; level--){
1365 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1367 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1369 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1376 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1377 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1379 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1380 for(y=0; y<height; y+=4)
1381 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1384 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1385 const int w= b->width;
1386 const int h= b->height;
1398 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1399 v= src[x + y*stride];
1402 t= src[x + (y-1)*stride];
1404 lt= src[x - 1 + (y-1)*stride];
1407 rt= src[x + 1 + (y-1)*stride];
1411 l= src[x - 1 + y*stride];
1413 if(orientation==1) ll= src[y + (x-2)*stride];
1414 else ll= src[x - 2 + y*stride];
1420 if(px<b->parent->width && py<b->parent->height)
1421 p= parent[px + py*2*stride];
1423 if(!(/*ll|*/l|lt|t|rt|p)){
1425 runs[run_index++]= run;
1433 max_index= run_index;
1434 runs[run_index++]= run;
1436 run= runs[run_index++];
1438 put_symbol2(&s->c, b->state[30], max_index, 0);
1439 if(run_index <= max_index)
1440 put_symbol2(&s->c, b->state[1], run, 3);
1443 if(s->c.bytestream_end - s->c.bytestream < w*40){
1444 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1449 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1450 v= src[x + y*stride];
1453 t= src[x + (y-1)*stride];
1455 lt= src[x - 1 + (y-1)*stride];
1458 rt= src[x + 1 + (y-1)*stride];
1462 l= src[x - 1 + y*stride];
1464 if(orientation==1) ll= src[y + (x-2)*stride];
1465 else ll= src[x - 2 + y*stride];
1471 if(px<b->parent->width && py<b->parent->height)
1472 p= parent[px + py*2*stride];
1474 if(/*ll|*/l|lt|t|rt|p){
1475 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1477 put_rac(&s->c, &b->state[0][context], !!v);
1480 run= runs[run_index++];
1482 if(run_index <= max_index)
1483 put_symbol2(&s->c, b->state[1], run, 3);
1491 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1492 int l2= 2*FFABS(l) + (l<0);
1493 int t2= 2*FFABS(t) + (t<0);
1495 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1496 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1504 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1505 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1506 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1507 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1508 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1511 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1512 const int w= b->width;
1513 const int h= b->height;
1518 x_and_coeff *xc= b->x_coeff;
1519 x_and_coeff *prev_xc= NULL;
1520 x_and_coeff *prev2_xc= xc;
1521 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1522 x_and_coeff *prev_parent_xc= parent_xc;
1524 runs= get_symbol2(&s->c, b->state[30], 0);
1525 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1530 int lt=0, t=0, rt=0;
1532 if(y && prev_xc->x == 0){
1544 if(prev_xc->x == x + 1)
1550 if(x>>1 > parent_xc->x){
1553 if(x>>1 == parent_xc->x){
1554 p= parent_xc->coeff;
1557 if(/*ll|*/l|lt|t|rt|p){
1558 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1560 v=get_rac(&s->c, &b->state[0][context]);
1562 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1563 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1570 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1572 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1573 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1582 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1583 else max_run= FFMIN(run, w-x-1);
1585 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1591 (xc++)->x= w+1; //end marker
1597 while(parent_xc->x != parent->width+1)
1600 prev_parent_xc= parent_xc;
1602 parent_xc= prev_parent_xc;
1607 (xc++)->x= w+1; //end marker
1611 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1612 const int w= b->width;
1614 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1615 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1616 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1621 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1626 /* If we are on the second or later slice, restore our index. */
1628 new_index = save_state[0];
1631 for(y=start_y; y<h; y++){
1634 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1635 memset(line, 0, b->width*sizeof(IDWTELEM));
1636 v = b->x_coeff[new_index].coeff;
1637 x = b->x_coeff[new_index++].x;
1640 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1641 register int u= -(v&1);
1642 line[x] = (t^u) - u;
1644 v = b->x_coeff[new_index].coeff;
1645 x = b->x_coeff[new_index++].x;
1648 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1649 STOP_TIMER("decode_subband")
1652 /* Save our variables for the next slice. */
1653 save_state[0] = new_index;
1658 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1659 int plane_index, level, orientation;
1661 for(plane_index=0; plane_index<3; plane_index++){
1662 for(level=0; level<s->spatial_decomposition_count; level++){
1663 for(orientation=level ? 1:0; orientation<4; orientation++){
1664 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1668 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1669 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1672 static int alloc_blocks(SnowContext *s){
1673 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1674 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1679 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1683 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1684 uint8_t *bytestream= d->bytestream;
1685 uint8_t *bytestream_start= d->bytestream_start;
1687 d->bytestream= bytestream;
1688 d->bytestream_start= bytestream_start;
1691 //near copy & paste from dsputil, FIXME
1692 static int pix_sum(uint8_t * pix, int line_size, int w)
1697 for (i = 0; i < w; i++) {
1698 for (j = 0; j < w; j++) {
1702 pix += line_size - w;
1707 //near copy & paste from dsputil, FIXME
1708 static int pix_norm1(uint8_t * pix, int line_size, int w)
1711 uint32_t *sq = ff_squareTbl + 256;
1714 for (i = 0; i < w; i++) {
1715 for (j = 0; j < w; j ++) {
1719 pix += line_size - w;
1724 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1725 const int w= s->b_width << s->block_max_depth;
1726 const int rem_depth= s->block_max_depth - level;
1727 const int index= (x + y*w) << rem_depth;
1728 const int block_w= 1<<rem_depth;
1741 for(j=0; j<block_w; j++){
1742 for(i=0; i<block_w; i++){
1743 s->block[index + i + j*w]= block;
1748 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1749 const int offset[3]= {
1751 ((y*c->uvstride + x)>>1),
1752 ((y*c->uvstride + x)>>1),
1756 c->src[0][i]= src [i];
1757 c->ref[0][i]= ref [i] + offset[i];
1762 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1763 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1764 if(s->ref_frames == 1){
1765 *mx = mid_pred(left->mx, top->mx, tr->mx);
1766 *my = mid_pred(left->my, top->my, tr->my);
1768 const int *scale = scale_mv_ref[ref];
1769 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1770 (top ->mx * scale[top ->ref] + 128) >>8,
1771 (tr ->mx * scale[tr ->ref] + 128) >>8);
1772 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1773 (top ->my * scale[top ->ref] + 128) >>8,
1774 (tr ->my * scale[tr ->ref] + 128) >>8);
1781 #define P_TOPRIGHT P[3]
1782 #define P_MEDIAN P[4]
1784 #define FLAG_QPEL 1 //must be 1
1786 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1787 uint8_t p_buffer[1024];
1788 uint8_t i_buffer[1024];
1789 uint8_t p_state[sizeof(s->block_state)];
1790 uint8_t i_state[sizeof(s->block_state)];
1792 uint8_t *pbbak= s->c.bytestream;
1793 uint8_t *pbbak_start= s->c.bytestream_start;
1794 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1795 const int w= s->b_width << s->block_max_depth;
1796 const int h= s->b_height << s->block_max_depth;
1797 const int rem_depth= s->block_max_depth - level;
1798 const int index= (x + y*w) << rem_depth;
1799 const int block_w= 1<<(LOG2_MB_SIZE - level);
1800 int trx= (x+1)<<rem_depth;
1801 int try= (y+1)<<rem_depth;
1802 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1803 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1804 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1805 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1806 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1807 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1808 int pl = left->color[0];
1809 int pcb= left->color[1];
1810 int pcr= left->color[2];
1814 const int stride= s->current_picture.linesize[0];
1815 const int uvstride= s->current_picture.linesize[1];
1816 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1817 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1818 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1820 int16_t last_mv[3][2];
1821 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1822 const int shift= 1+qpel;
1823 MotionEstContext *c= &s->m.me;
1824 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1825 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1826 int my_context= av_log2(2*FFABS(left->my - top->my));
1827 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1828 int ref, best_ref, ref_score, ref_mx, ref_my;
1830 assert(sizeof(s->block_state) >= 256);
1832 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1836 // clip predictors / edge ?
1838 P_LEFT[0]= left->mx;
1839 P_LEFT[1]= left->my;
1842 P_TOPRIGHT[0]= tr->mx;
1843 P_TOPRIGHT[1]= tr->my;
1845 last_mv[0][0]= s->block[index].mx;
1846 last_mv[0][1]= s->block[index].my;
1847 last_mv[1][0]= right->mx;
1848 last_mv[1][1]= right->my;
1849 last_mv[2][0]= bottom->mx;
1850 last_mv[2][1]= bottom->my;
1857 assert(c-> stride == stride);
1858 assert(c->uvstride == uvstride);
1860 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1861 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1862 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1863 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1865 c->xmin = - x*block_w - 16+2;
1866 c->ymin = - y*block_w - 16+2;
1867 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1868 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1870 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1871 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1872 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1873 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1874 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1875 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1876 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1878 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1879 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1882 c->pred_x= P_LEFT[0];
1883 c->pred_y= P_LEFT[1];
1885 c->pred_x = P_MEDIAN[0];
1886 c->pred_y = P_MEDIAN[1];
1891 for(ref=0; ref<s->ref_frames; ref++){
1892 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1894 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1895 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1897 assert(ref_mx >= c->xmin);
1898 assert(ref_mx <= c->xmax);
1899 assert(ref_my >= c->ymin);
1900 assert(ref_my <= c->ymax);
1902 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1903 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1904 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1905 if(s->ref_mvs[ref]){
1906 s->ref_mvs[ref][index][0]= ref_mx;
1907 s->ref_mvs[ref][index][1]= ref_my;
1908 s->ref_scores[ref][index]= ref_score;
1910 if(score > ref_score){
1917 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1920 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1922 pc.bytestream_start=
1923 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1924 memcpy(p_state, s->block_state, sizeof(s->block_state));
1926 if(level!=s->block_max_depth)
1927 put_rac(&pc, &p_state[4 + s_context], 1);
1928 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1929 if(s->ref_frames > 1)
1930 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1931 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1932 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1933 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1934 p_len= pc.bytestream - pc.bytestream_start;
1935 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1937 block_s= block_w*block_w;
1938 sum = pix_sum(current_data[0], stride, block_w);
1939 l= (sum + block_s/2)/block_s;
1940 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1942 block_s= block_w*block_w>>2;
1943 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1944 cb= (sum + block_s/2)/block_s;
1945 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1946 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1947 cr= (sum + block_s/2)/block_s;
1948 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1951 ic.bytestream_start=
1952 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1953 memcpy(i_state, s->block_state, sizeof(s->block_state));
1954 if(level!=s->block_max_depth)
1955 put_rac(&ic, &i_state[4 + s_context], 1);
1956 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1957 put_symbol(&ic, &i_state[32], l-pl , 1);
1958 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1959 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1960 i_len= ic.bytestream - ic.bytestream_start;
1961 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1963 // assert(score==256*256*256*64-1);
1964 assert(iscore < 255*255*256 + s->lambda2*10);
1965 assert(iscore >= 0);
1966 assert(l>=0 && l<=255);
1967 assert(pl>=0 && pl<=255);
1970 int varc= iscore >> 8;
1971 int vard= score >> 8;
1972 if (vard <= 64 || vard < varc)
1973 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1975 c->scene_change_score+= s->m.qscale;
1978 if(level!=s->block_max_depth){
1979 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1980 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1981 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1982 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1983 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1984 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1986 if(score2 < score && score2 < iscore)
1991 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1992 memcpy(pbbak, i_buffer, i_len);
1994 s->c.bytestream_start= pbbak_start;
1995 s->c.bytestream= pbbak + i_len;
1996 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1997 memcpy(s->block_state, i_state, sizeof(s->block_state));
2000 memcpy(pbbak, p_buffer, p_len);
2002 s->c.bytestream_start= pbbak_start;
2003 s->c.bytestream= pbbak + p_len;
2004 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2005 memcpy(s->block_state, p_state, sizeof(s->block_state));
2010 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2011 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2012 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2014 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2018 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2019 const int w= s->b_width << s->block_max_depth;
2020 const int rem_depth= s->block_max_depth - level;
2021 const int index= (x + y*w) << rem_depth;
2022 int trx= (x+1)<<rem_depth;
2023 BlockNode *b= &s->block[index];
2024 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2025 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2026 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2027 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2028 int pl = left->color[0];
2029 int pcb= left->color[1];
2030 int pcr= left->color[2];
2032 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2033 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2034 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2035 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2038 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2042 if(level!=s->block_max_depth){
2043 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2044 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2046 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2047 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2048 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2049 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2050 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2054 if(b->type & BLOCK_INTRA){
2055 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2056 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2057 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2058 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2059 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2060 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2062 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2063 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2064 if(s->ref_frames > 1)
2065 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2066 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2067 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2068 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2072 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2073 const int w= s->b_width << s->block_max_depth;
2074 const int rem_depth= s->block_max_depth - level;
2075 const int index= (x + y*w) << rem_depth;
2076 int trx= (x+1)<<rem_depth;
2077 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2078 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2079 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2080 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2081 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2084 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2088 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2090 int l = left->color[0];
2091 int cb= left->color[1];
2092 int cr= left->color[2];
2094 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2095 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2096 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2098 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2101 pred_mv(s, &mx, &my, 0, left, top, tr);
2102 l += get_symbol(&s->c, &s->block_state[32], 1);
2103 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2104 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2106 if(s->ref_frames > 1)
2107 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2108 pred_mv(s, &mx, &my, ref, left, top, tr);
2109 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2110 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2112 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2114 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2115 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2116 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2117 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2121 static void encode_blocks(SnowContext *s, int search){
2126 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2130 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2131 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2135 if(s->avctx->me_method == ME_ITER || !search)
2136 encode_q_branch2(s, 0, x, y);
2138 encode_q_branch (s, 0, x, y);
2143 static void decode_blocks(SnowContext *s){
2150 decode_q_branch(s, 0, x, y);
2155 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2156 const static uint8_t weight[64]={
2167 const static uint8_t brane[256]={
2168 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2169 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2170 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2171 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2172 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2173 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2174 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2175 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2176 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2177 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2178 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2179 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2180 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2181 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2182 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2183 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2186 const static uint8_t needs[16]={
2194 int16_t tmpIt [64*(32+HTAPS_MAX)];
2195 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2196 int16_t *tmpI= tmpIt;
2197 uint8_t *tmp2= tmp2t[0];
2200 assert(dx<16 && dy<16);
2201 r= brane[dx + 16*dy]&15;
2202 l= brane[dx + 16*dy]>>4;
2204 b= needs[l] | needs[r];
2205 if(p && !p->diag_mc)
2209 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2210 for(x=0; x < b_w; x++){
2211 int a_1=src[x + HTAPS_MAX/2-4];
2212 int a0= src[x + HTAPS_MAX/2-3];
2213 int a1= src[x + HTAPS_MAX/2-2];
2214 int a2= src[x + HTAPS_MAX/2-1];
2215 int a3= src[x + HTAPS_MAX/2+0];
2216 int a4= src[x + HTAPS_MAX/2+1];
2217 int a5= src[x + HTAPS_MAX/2+2];
2218 int a6= src[x + HTAPS_MAX/2+3];
2220 if(!p || p->fast_mc){
2221 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2225 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2230 if(am&(~255)) am= ~(am>>31);
2239 src += HTAPS_MAX/2 - 1;
2243 for(y=0; y < b_h; y++){
2244 for(x=0; x < b_w+1; x++){
2245 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2246 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2247 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2248 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2249 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2250 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2251 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2252 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2254 if(!p || p->fast_mc)
2255 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2257 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2259 if(am&(~255)) am= ~(am>>31);
2267 src += stride*(HTAPS_MAX/2 - 1);
2271 for(y=0; y < b_h; y++){
2272 for(x=0; x < b_w; x++){
2273 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2274 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2275 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2276 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2277 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2278 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2279 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2280 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2282 if(!p || p->fast_mc)
2283 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2285 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2286 if(am&(~255)) am= ~(am>>31);
2295 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2300 hpel[ 6]= tmp2t[1] + 1;
2302 hpel[ 8]= src + stride;
2303 hpel[ 9]= hpel[1] + stride;
2304 hpel[10]= hpel[8] + 1;
2307 uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2308 uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2309 uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2310 uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2313 for(y=0; y < b_h; y++){
2314 for(x=0; x < b_w; x++){
2315 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2316 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2325 uint8_t *src1= hpel[l];
2326 uint8_t *src2= hpel[r];
2327 int a= weight[((dx&7) + (8*(dy&7)))];
2329 for(y=0; y < b_h; y++){
2330 for(x=0; x < b_w; x++){
2331 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2338 STOP_TIMER("mc_block")
2341 #define mca(dx,dy,b_w)\
2342 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2343 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2345 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2357 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2358 if(block->type & BLOCK_INTRA){
2360 const int color = block->color[plane_index];
2361 const int color4= color*0x01010101;
2363 for(y=0; y < b_h; y++){
2364 *(uint32_t*)&dst[0 + y*stride]= color4;
2365 *(uint32_t*)&dst[4 + y*stride]= color4;
2366 *(uint32_t*)&dst[8 + y*stride]= color4;
2367 *(uint32_t*)&dst[12+ y*stride]= color4;
2368 *(uint32_t*)&dst[16+ y*stride]= color4;
2369 *(uint32_t*)&dst[20+ y*stride]= color4;
2370 *(uint32_t*)&dst[24+ y*stride]= color4;
2371 *(uint32_t*)&dst[28+ y*stride]= color4;
2374 for(y=0; y < b_h; y++){
2375 *(uint32_t*)&dst[0 + y*stride]= color4;
2376 *(uint32_t*)&dst[4 + y*stride]= color4;
2377 *(uint32_t*)&dst[8 + y*stride]= color4;
2378 *(uint32_t*)&dst[12+ y*stride]= color4;
2381 for(y=0; y < b_h; y++){
2382 *(uint32_t*)&dst[0 + y*stride]= color4;
2383 *(uint32_t*)&dst[4 + y*stride]= color4;
2386 for(y=0; y < b_h; y++){
2387 *(uint32_t*)&dst[0 + y*stride]= color4;
2390 for(y=0; y < b_h; y++){
2391 for(x=0; x < b_w; x++){
2392 dst[x + y*stride]= color;
2397 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2398 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2399 int mx= block->mx*scale;
2400 int my= block->my*scale;
2401 const int dx= mx&15;
2402 const int dy= my&15;
2403 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2404 sx += (mx>>4) - (HTAPS_MAX/2-1);
2405 sy += (my>>4) - (HTAPS_MAX/2-1);
2406 src += sx + sy*stride;
2407 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2408 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2409 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2412 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2413 // assert(!(b_w&(b_w-1)));
2414 assert(b_w>1 && b_h>1);
2415 assert(tab_index>=0 && tab_index<4 || b_w==32);
2416 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2417 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2420 for(y=0; y<b_h; y+=16){
2421 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2422 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2425 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2426 else if(b_w==2*b_h){
2427 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2428 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2431 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2432 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2437 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2438 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2441 for(y=0; y<b_h; y++){
2442 //FIXME ugly misuse of obmc_stride
2443 const uint8_t *obmc1= obmc + y*obmc_stride;
2444 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2445 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2446 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2447 dst = slice_buffer_get_line(sb, src_y + y);
2448 for(x=0; x<b_w; x++){
2449 int v= obmc1[x] * block[3][x + y*src_stride]
2450 +obmc2[x] * block[2][x + y*src_stride]
2451 +obmc3[x] * block[1][x + y*src_stride]
2452 +obmc4[x] * block[0][x + y*src_stride];
2454 v <<= 8 - LOG2_OBMC_MAX;
2456 v >>= 8 - FRAC_BITS;
2459 v += dst[x + src_x];
2460 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2461 if(v&(~255)) v= ~(v>>31);
2462 dst8[x + y*src_stride] = v;
2464 dst[x + src_x] -= v;
2470 //FIXME name clenup (b_w, block_w, b_width stuff)
2471 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2472 const int b_width = s->b_width << s->block_max_depth;
2473 const int b_height= s->b_height << s->block_max_depth;
2474 const int b_stride= b_width;
2475 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2476 BlockNode *rt= lt+1;
2477 BlockNode *lb= lt+b_stride;
2478 BlockNode *rb= lb+1;
2480 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2481 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2488 }else if(b_x + 1 >= b_width){
2495 }else if(b_y + 1 >= b_height){
2500 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2503 if(!sliced && !offset_dst)
2506 }else if(src_x + b_w > w){
2510 obmc -= src_y*obmc_stride;
2512 if(!sliced && !offset_dst)
2513 dst -= src_y*dst_stride;
2515 }else if(src_y + b_h> h){
2519 if(b_w<=0 || b_h<=0) return;
2521 assert(src_stride > 2*MB_SIZE + 5);
2522 if(!sliced && offset_dst)
2523 dst += src_x + src_y*dst_stride;
2524 dst8+= src_x + src_y*src_stride;
2525 // src += src_x + src_y*src_stride;
2527 ptmp= tmp + 3*tmp_step;
2530 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2532 if(same_block(lt, rt)){
2537 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2540 if(same_block(lt, lb)){
2542 }else if(same_block(rt, lb)){
2547 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2550 if(same_block(lt, rb) ){
2552 }else if(same_block(rt, rb)){
2554 }else if(same_block(lb, rb)){
2558 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2561 for(y=0; y<b_h; y++){
2562 for(x=0; x<b_w; x++){
2563 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2564 if(add) dst[x + y*dst_stride] += v;
2565 else dst[x + y*dst_stride] -= v;
2568 for(y=0; y<b_h; y++){
2569 uint8_t *obmc2= obmc + (obmc_stride>>1);
2570 for(x=0; x<b_w; x++){
2571 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2572 if(add) dst[x + y*dst_stride] += v;
2573 else dst[x + y*dst_stride] -= v;
2576 for(y=0; y<b_h; y++){
2577 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2578 for(x=0; x<b_w; x++){
2579 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2580 if(add) dst[x + y*dst_stride] += v;
2581 else dst[x + y*dst_stride] -= v;
2584 for(y=0; y<b_h; y++){
2585 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2586 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2587 for(x=0; x<b_w; x++){
2588 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2589 if(add) dst[x + y*dst_stride] += v;
2590 else dst[x + y*dst_stride] -= v;
2597 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2598 STOP_TIMER("inner_add_yblock")
2600 for(y=0; y<b_h; y++){
2601 //FIXME ugly misuse of obmc_stride
2602 const uint8_t *obmc1= obmc + y*obmc_stride;
2603 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2604 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2605 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2606 for(x=0; x<b_w; x++){
2607 int v= obmc1[x] * block[3][x + y*src_stride]
2608 +obmc2[x] * block[2][x + y*src_stride]
2609 +obmc3[x] * block[1][x + y*src_stride]
2610 +obmc4[x] * block[0][x + y*src_stride];
2612 v <<= 8 - LOG2_OBMC_MAX;
2614 v >>= 8 - FRAC_BITS;
2617 v += dst[x + y*dst_stride];
2618 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2619 if(v&(~255)) v= ~(v>>31);
2620 dst8[x + y*src_stride] = v;
2622 dst[x + y*dst_stride] -= v;
2629 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2630 Plane *p= &s->plane[plane_index];
2631 const int mb_w= s->b_width << s->block_max_depth;
2632 const int mb_h= s->b_height << s->block_max_depth;
2634 int block_size = MB_SIZE >> s->block_max_depth;
2635 int block_w = plane_index ? block_size/2 : block_size;
2636 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2637 int obmc_stride= plane_index ? block_size : 2*block_size;
2638 int ref_stride= s->current_picture.linesize[plane_index];
2639 uint8_t *dst8= s->current_picture.data[plane_index];
2644 if(s->keyframe || (s->avctx->debug&512)){
2649 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2651 // DWTELEM * line = slice_buffer_get_line(sb, y);
2652 IDWTELEM * line = sb->line[y];
2655 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2656 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2658 if(v&(~255)) v= ~(v>>31);
2659 dst8[x + y*ref_stride]= v;
2663 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2665 // DWTELEM * line = slice_buffer_get_line(sb, y);
2666 IDWTELEM * line = sb->line[y];
2669 line[x] -= 128 << FRAC_BITS;
2670 // buf[x + y*w]-= 128<<FRAC_BITS;
2678 for(mb_x=0; mb_x<=mb_w; mb_x++){
2681 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2682 block_w*mb_x - block_w/2,
2683 block_w*mb_y - block_w/2,
2686 w, ref_stride, obmc_stride,
2688 add, 0, plane_index);
2690 STOP_TIMER("add_yblock")
2693 STOP_TIMER("predict_slice")
2696 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2697 Plane *p= &s->plane[plane_index];
2698 const int mb_w= s->b_width << s->block_max_depth;
2699 const int mb_h= s->b_height << s->block_max_depth;
2701 int block_size = MB_SIZE >> s->block_max_depth;
2702 int block_w = plane_index ? block_size/2 : block_size;
2703 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2704 const int obmc_stride= plane_index ? block_size : 2*block_size;
2705 int ref_stride= s->current_picture.linesize[plane_index];
2706 uint8_t *dst8= s->current_picture.data[plane_index];
2711 if(s->keyframe || (s->avctx->debug&512)){
2716 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2718 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2720 if(v&(~255)) v= ~(v>>31);
2721 dst8[x + y*ref_stride]= v;
2725 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2727 buf[x + y*w]-= 128<<FRAC_BITS;
2735 for(mb_x=0; mb_x<=mb_w; mb_x++){
2738 add_yblock(s, 0, NULL, buf, dst8, obmc,
2739 block_w*mb_x - block_w/2,
2740 block_w*mb_y - block_w/2,
2743 w, ref_stride, obmc_stride,
2745 add, 1, plane_index);
2747 STOP_TIMER("add_yblock")
2750 STOP_TIMER("predict_slice")
2753 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2754 const int mb_h= s->b_height << s->block_max_depth;
2756 for(mb_y=0; mb_y<=mb_h; mb_y++)
2757 predict_slice(s, buf, plane_index, add, mb_y);
2760 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2762 Plane *p= &s->plane[plane_index];
2763 const int block_size = MB_SIZE >> s->block_max_depth;
2764 const int block_w = plane_index ? block_size/2 : block_size;
2765 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2766 const int obmc_stride= plane_index ? block_size : 2*block_size;
2767 const int ref_stride= s->current_picture.linesize[plane_index];
2768 uint8_t *src= s-> input_picture.data[plane_index];
2769 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2770 const int b_stride = s->b_width << s->block_max_depth;
2771 const int w= p->width;
2772 const int h= p->height;
2773 int index= mb_x + mb_y*b_stride;
2774 BlockNode *b= &s->block[index];
2775 BlockNode backup= *b;
2779 b->type|= BLOCK_INTRA;
2780 b->color[plane_index]= 0;
2781 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2784 int mb_x2= mb_x + (i &1) - 1;
2785 int mb_y2= mb_y + (i>>1) - 1;
2786 int x= block_w*mb_x2 + block_w/2;
2787 int y= block_w*mb_y2 + block_w/2;
2789 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2790 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2792 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2793 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2794 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2795 int obmc_v= obmc[index];
2797 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2798 if(x<0) obmc_v += obmc[index + block_w];
2799 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2800 if(x+block_w>w) obmc_v += obmc[index - block_w];
2801 //FIXME precalc this or simplify it somehow else
2803 d = -dst[index] + (1<<(FRAC_BITS-1));
2805 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2806 aa += obmc_v * obmc_v; //FIXME precalclate this
2812 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2815 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2816 const int b_stride = s->b_width << s->block_max_depth;
2817 const int b_height = s->b_height<< s->block_max_depth;
2818 int index= x + y*b_stride;
2819 const BlockNode *b = &s->block[index];
2820 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2821 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2822 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2823 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2825 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2826 // int my_context= av_log2(2*FFABS(left->my - top->my));
2828 if(x<0 || x>=b_stride || y>=b_height)
2835 00001XXXX 15-30 8-15
2837 //FIXME try accurate rate
2838 //FIXME intra and inter predictors if surrounding blocks arent the same type
2839 if(b->type & BLOCK_INTRA){
2840 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2841 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2842 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2844 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2847 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2848 + av_log2(2*FFABS(dmy))
2849 + av_log2(2*b->ref));
2853 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2854 Plane *p= &s->plane[plane_index];
2855 const int block_size = MB_SIZE >> s->block_max_depth;
2856 const int block_w = plane_index ? block_size/2 : block_size;
2857 const int obmc_stride= plane_index ? block_size : 2*block_size;
2858 const int ref_stride= s->current_picture.linesize[plane_index];
2859 uint8_t *dst= s->current_picture.data[plane_index];
2860 uint8_t *src= s-> input_picture.data[plane_index];
2861 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2862 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2863 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2864 const int b_stride = s->b_width << s->block_max_depth;
2865 const int b_height = s->b_height<< s->block_max_depth;
2866 const int w= p->width;
2867 const int h= p->height;
2870 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2871 int sx= block_w*mb_x - block_w/2;
2872 int sy= block_w*mb_y - block_w/2;
2873 int x0= FFMAX(0,-sx);
2874 int y0= FFMAX(0,-sy);
2875 int x1= FFMIN(block_w*2, w-sx);
2876 int y1= FFMIN(block_w*2, h-sy);
2879 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2881 for(y=y0; y<y1; y++){
2882 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2883 const IDWTELEM *pred1 = pred + y*obmc_stride;
2884 uint8_t *cur1 = cur + y*ref_stride;
2885 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2886 for(x=x0; x<x1; x++){
2887 #if FRAC_BITS >= LOG2_OBMC_MAX
2888 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2890 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2892 v = (v + pred1[x]) >> FRAC_BITS;
2893 if(v&(~255)) v= ~(v>>31);
2898 /* copy the regions where obmc[] = (uint8_t)256 */
2899 if(LOG2_OBMC_MAX == 8
2900 && (mb_x == 0 || mb_x == b_stride-1)
2901 && (mb_y == 0 || mb_y == b_height-1)){
2910 for(y=y0; y<y1; y++)
2911 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2915 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2916 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2917 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2918 * so improving the score of one block is not strictly guaranteed to
2919 * improve the score of the whole frame, so iterative motion est
2920 * doesn't always converge. */
2921 if(s->avctx->me_cmp == FF_CMP_W97)
2922 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2923 else if(s->avctx->me_cmp == FF_CMP_W53)
2924 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2928 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2929 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2934 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2943 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2945 if(mb_x == b_stride-2)
2946 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2948 return distortion + rate*penalty_factor;
2951 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2953 Plane *p= &s->plane[plane_index];
2954 const int block_size = MB_SIZE >> s->block_max_depth;
2955 const int block_w = plane_index ? block_size/2 : block_size;
2956 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2957 const int obmc_stride= plane_index ? block_size : 2*block_size;
2958 const int ref_stride= s->current_picture.linesize[plane_index];
2959 uint8_t *dst= s->current_picture.data[plane_index];
2960 uint8_t *src= s-> input_picture.data[plane_index];
2961 static const IDWTELEM zero_dst[4096]; //FIXME
2962 const int b_stride = s->b_width << s->block_max_depth;
2963 const int w= p->width;
2964 const int h= p->height;
2967 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2970 int mb_x2= mb_x + (i%3) - 1;
2971 int mb_y2= mb_y + (i/3) - 1;
2972 int x= block_w*mb_x2 + block_w/2;
2973 int y= block_w*mb_y2 + block_w/2;
2975 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2976 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2978 //FIXME find a cleaner/simpler way to skip the outside stuff
2979 for(y2= y; y2<0; y2++)
2980 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2981 for(y2= h; y2<y+block_w; y2++)
2982 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2984 for(y2= y; y2<y+block_w; y2++)
2985 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2988 for(y2= y; y2<y+block_w; y2++)
2989 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2992 assert(block_w== 8 || block_w==16);
2993 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2997 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2998 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3006 rate = get_block_bits(s, mb_x, mb_y, 2);
3007 for(i=merged?4:0; i<9; i++){
3008 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3009 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3012 return distortion + rate*penalty_factor;
3015 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3016 const int b_stride= s->b_width << s->block_max_depth;
3017 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3018 BlockNode backup= *block;
3019 int rd, index, value;
3021 assert(mb_x>=0 && mb_y>=0);
3022 assert(mb_x<b_stride);
3025 block->color[0] = p[0];
3026 block->color[1] = p[1];
3027 block->color[2] = p[2];
3028 block->type |= BLOCK_INTRA;
3030 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3031 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3032 if(s->me_cache[index] == value)
3034 s->me_cache[index]= value;
3038 block->type &= ~BLOCK_INTRA;
3041 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3053 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3054 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3055 int p[2] = {p0, p1};
3056 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3059 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3060 const int b_stride= s->b_width << s->block_max_depth;
3061 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3062 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3063 int rd, index, value;
3065 assert(mb_x>=0 && mb_y>=0);
3066 assert(mb_x<b_stride);
3067 assert(((mb_x|mb_y)&1) == 0);
3069 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3070 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3071 if(s->me_cache[index] == value)
3073 s->me_cache[index]= value;
3078 block->type &= ~BLOCK_INTRA;
3079 block[1]= block[b_stride]= block[b_stride+1]= *block;
3081 rd= get_4block_rd(s, mb_x, mb_y, 0);
3088 block[0]= backup[0];
3089 block[1]= backup[1];
3090 block[b_stride]= backup[2];
3091 block[b_stride+1]= backup[3];
3096 static void iterative_me(SnowContext *s){
3097 int pass, mb_x, mb_y;
3098 const int b_width = s->b_width << s->block_max_depth;
3099 const int b_height= s->b_height << s->block_max_depth;
3100 const int b_stride= b_width;
3104 RangeCoder r = s->c;
3105 uint8_t state[sizeof(s->block_state)];
3106 memcpy(state, s->block_state, sizeof(s->block_state));
3107 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3108 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3109 encode_q_branch(s, 0, mb_x, mb_y);
3111 memcpy(s->block_state, state, sizeof(s->block_state));
3114 for(pass=0; pass<25; pass++){
3117 for(mb_y= 0; mb_y<b_height; mb_y++){
3118 for(mb_x= 0; mb_x<b_width; mb_x++){
3119 int dia_change, i, j, ref;
3120 int best_rd= INT_MAX, ref_rd;
3121 BlockNode backup, ref_b;
3122 const int index= mb_x + mb_y * b_stride;
3123 BlockNode *block= &s->block[index];
3124 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3125 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3126 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3127 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3128 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3129 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3130 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3131 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3132 const int b_w= (MB_SIZE >> s->block_max_depth);
3133 uint8_t obmc_edged[b_w*2][b_w*2];
3135 if(pass && (block->type & BLOCK_OPT))
3137 block->type |= BLOCK_OPT;
3141 if(!s->me_cache_generation)
3142 memset(s->me_cache, 0, sizeof(s->me_cache));
3143 s->me_cache_generation += 1<<22;
3148 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3150 for(y=0; y<b_w*2; y++)
3151 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3152 if(mb_x==b_stride-1)
3153 for(y=0; y<b_w*2; y++)
3154 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3156 for(x=0; x<b_w*2; x++)
3157 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3158 for(y=1; y<b_w; y++)
3159 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3161 if(mb_y==b_height-1){
3162 for(x=0; x<b_w*2; x++)
3163 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3164 for(y=b_w; y<b_w*2-1; y++)
3165 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3169 //skip stuff outside the picture
3170 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3172 uint8_t *src= s-> input_picture.data[0];
3173 uint8_t *dst= s->current_picture.data[0];
3174 const int stride= s->current_picture.linesize[0];
3175 const int block_w= MB_SIZE >> s->block_max_depth;
3176 const int sx= block_w*mb_x - block_w/2;
3177 const int sy= block_w*mb_y - block_w/2;
3178 const int w= s->plane[0].width;
3179 const int h= s->plane[0].height;
3183 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3184 for(y=h; y<sy+block_w*2; y++)
3185 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3187 for(y=sy; y<sy+block_w*2; y++)
3188 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3190 if(sx+block_w*2 > w){
3191 for(y=sy; y<sy+block_w*2; y++)
3192 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3196 // intra(black) = neighbors' contribution to the current block
3198 color[i]= get_dc(s, mb_x, mb_y, i);
3200 // get previous score (cannot be cached due to OBMC)
3201 if(pass > 0 && (block->type&BLOCK_INTRA)){
3202 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3203 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3205 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3209 for(ref=0; ref < s->ref_frames; ref++){
3210 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3211 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3216 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3217 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3219 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3221 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3223 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3225 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3228 //FIXME avoid subpel interpol / round to nearest integer
3231 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3233 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3234 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3235 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3236 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3242 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3245 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3247 //FIXME or try the standard 2 pass qpel or similar
3249 mvr[0][0]= block->mx;
3250 mvr[0][1]= block->my;
3251 if(ref_rd > best_rd){
3259 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3260 //FIXME RD style color selection
3262 if(!same_block(block, &backup)){
3263 if(tb ) tb ->type &= ~BLOCK_OPT;
3264 if(lb ) lb ->type &= ~BLOCK_OPT;
3265 if(rb ) rb ->type &= ~BLOCK_OPT;
3266 if(bb ) bb ->type &= ~BLOCK_OPT;
3267 if(tlb) tlb->type &= ~BLOCK_OPT;
3268 if(trb) trb->type &= ~BLOCK_OPT;
3269 if(blb) blb->type &= ~BLOCK_OPT;
3270 if(brb) brb->type &= ~BLOCK_OPT;
3275 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3280 if(s->block_max_depth == 1){
3282 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3283 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3285 int best_rd, init_rd;
3286 const int index= mb_x + mb_y * b_stride;
3289 b[0]= &s->block[index];
3291 b[2]= b[0]+b_stride;
3293 if(same_block(b[0], b[1]) &&
3294 same_block(b[0], b[2]) &&
3295 same_block(b[0], b[3]))
3298 if(!s->me_cache_generation)
3299 memset(s->me_cache, 0, sizeof(s->me_cache));
3300 s->me_cache_generation += 1<<22;
3302 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3304 //FIXME more multiref search?
3305 check_4block_inter(s, mb_x, mb_y,
3306 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3307 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3310 if(!(b[i]->type&BLOCK_INTRA))
3311 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3313 if(init_rd != best_rd)
3317 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3321 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3322 const int level= b->level;
3323 const int w= b->width;
3324 const int h= b->height;
3325 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3326 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3327 int x,y, thres1, thres2;
3330 if(s->qlog == LOSSLESS_QLOG){
3333 dst[x + y*stride]= src[x + y*stride];
3337 bias= bias ? 0 : (3*qmul)>>3;
3338 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3344 int i= src[x + y*stride];
3346 if((unsigned)(i+thres1) > thres2){
3349 i/= qmul; //FIXME optimize
3350 dst[x + y*stride]= i;
3354 i/= qmul; //FIXME optimize
3355 dst[x + y*stride]= -i;
3358 dst[x + y*stride]= 0;
3364 int i= src[x + y*stride];
3366 if((unsigned)(i+thres1) > thres2){
3369 i= (i + bias) / qmul; //FIXME optimize
3370 dst[x + y*stride]= i;
3374 i= (i + bias) / qmul; //FIXME optimize
3375 dst[x + y*stride]= -i;
3378 dst[x + y*stride]= 0;
3382 if(level+1 == s->spatial_decomposition_count){
3383 // STOP_TIMER("quantize")
3387 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3388 const int w= b->width;
3389 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3390 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3391 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3395 if(s->qlog == LOSSLESS_QLOG) return;
3397 for(y=start_y; y<end_y; y++){
3398 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3399 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3403 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3405 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3409 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3410 STOP_TIMER("dquant")
3414 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3415 const int w= b->width;
3416 const int h= b->height;
3417 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3418 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3419 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3423 if(s->qlog == LOSSLESS_QLOG) return;
3427 int i= src[x + y*stride];
3429 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3431 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3435 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3436 STOP_TIMER("dquant")
3440 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3441 const int w= b->width;
3442 const int h= b->height;
3445 for(y=h-1; y>=0; y--){
3446 for(x=w-1; x>=0; x--){
3447 int i= x + y*stride;
3451 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3452 else src[i] -= src[i - 1];
3454 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3455 else src[i] -= src[i - 1];
3458 if(y) src[i] -= src[i - stride];
3464 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3465 const int w= b->width;
3470 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3474 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3476 for(y=start_y; y<end_y; y++){
3478 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3479 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3483 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3484 else line[x] += line[x - 1];
3486 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3487 else line[x] += line[x - 1];
3490 if(y) line[x] += prev[x];
3495 // STOP_TIMER("correlate")
3498 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3499 const int w= b->width;
3500 const int h= b->height;
3505 int i= x + y*stride;
3509 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3510 else src[i] += src[i - 1];
3512 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3513 else src[i] += src[i - 1];
3516 if(y) src[i] += src[i - stride];
3522 static void encode_header(SnowContext *s){
3523 int plane_index, level, orientation, i;
3526 memset(kstate, MID_STATE, sizeof(kstate));
3528 put_rac(&s->c, kstate, s->keyframe);
3529 if(s->keyframe || s->always_reset){
3531 s->last_spatial_decomposition_type=
3535 s->last_block_max_depth= 0;
3536 for(plane_index=0; plane_index<2; plane_index++){
3537 Plane *p= &s->plane[plane_index];
3540 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3544 put_symbol(&s->c, s->header_state, s->version, 0);
3545 put_rac(&s->c, s->header_state, s->always_reset);
3546 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3547 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3548 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3549 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3550 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3551 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3552 put_rac(&s->c, s->header_state, s->spatial_scalability);
3553 // put_rac(&s->c, s->header_state, s->rate_scalability);
3554 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3556 for(plane_index=0; plane_index<2; plane_index++){
3557 for(level=0; level<s->spatial_decomposition_count; level++){
3558 for(orientation=level ? 1:0; orientation<4; orientation++){
3559 if(orientation==2) continue;
3560 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3568 for(plane_index=0; plane_index<2; plane_index++){
3569 Plane *p= &s->plane[plane_index];
3570 update_mc |= p->last_htaps != p->htaps;
3571 update_mc |= p->last_diag_mc != p->diag_mc;
3572 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3574 if(!s->always_reset)
3575 put_rac(&s->c, s->header_state, update_mc);
3577 for(plane_index=0; plane_index<2; plane_index++){
3578 Plane *p= &s->plane[plane_index];
3579 put_rac(&s->c, s->header_state, p->diag_mc);
3580 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3581 for(i= p->htaps/2; i; i--)
3582 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3584 p->last_diag_mc= p->diag_mc;
3585 p->last_htaps= p->htaps;
3586 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3591 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3592 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3593 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3594 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3595 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3597 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3598 s->last_qlog = s->qlog;
3599 s->last_qbias = s->qbias;
3600 s->last_mv_scale = s->mv_scale;
3601 s->last_block_max_depth = s->block_max_depth;
3604 static int decode_header(SnowContext *s){
3605 int plane_index, level, orientation;
3608 memset(kstate, MID_STATE, sizeof(kstate));
3610 s->keyframe= get_rac(&s->c, kstate);
3611 if(s->keyframe || s->always_reset){
3613 s->spatial_decomposition_type=
3617 s->block_max_depth= 0;
3620 s->version= get_symbol(&s->c, s->header_state, 0);
3622 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3625 s->always_reset= get_rac(&s->c, s->header_state);
3626 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3627 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3628 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3629 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3630 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3631 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3632 s->spatial_scalability= get_rac(&s->c, s->header_state);
3633 // s->rate_scalability= get_rac(&s->c, s->header_state);
3634 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3636 for(plane_index=0; plane_index<3; plane_index++){
3637 for(level=0; level<s->spatial_decomposition_count; level++){
3638 for(orientation=level ? 1:0; orientation<4; orientation++){
3640 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3641 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3642 else q= get_symbol(&s->c, s->header_state, 1);
3643 s->plane[plane_index].band[level][orientation].qlog= q;
3650 if(s->always_reset || get_rac(&s->c, s->header_state)){
3651 for(plane_index=0; plane_index<2; plane_index++){
3652 int htaps, i, sum=0, absum=0;
3653 Plane *p= &s->plane[plane_index];
3654 p->diag_mc= get_rac(&s->c, s->header_state);
3655 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3656 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3659 for(i= htaps/2; i; i--){
3660 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3661 sum += p->hcoeff[i];
3663 p->hcoeff[0]= 32-sum;
3665 s->plane[2].diag_mc= s->plane[1].diag_mc;
3666 s->plane[2].htaps = s->plane[1].htaps;
3667 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3671 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3672 if(s->spatial_decomposition_type > 1){
3673 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3677 s->qlog += get_symbol(&s->c, s->header_state, 1);
3678 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3679 s->qbias += get_symbol(&s->c, s->header_state, 1);
3680 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3681 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3682 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3683 s->block_max_depth= 0;
3690 static void init_qexp(void){
3694 for(i=0; i<QROOT; i++){
3696 v *= pow(2, 1.0 / QROOT);
3700 static int common_init(AVCodecContext *avctx){
3701 SnowContext *s = avctx->priv_data;
3703 int level, orientation, plane_index, dec;
3708 dsputil_init(&s->dsp, avctx);
3711 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3712 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3713 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3714 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3715 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3716 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3735 #define mcfh(dx,dy)\
3736 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3737 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3738 mc_block_hpel ## dx ## dy ## 16;\
3739 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3740 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3741 mc_block_hpel ## dx ## dy ## 8;
3751 dec= s->spatial_decomposition_count= 5;
3752 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3754 s->chroma_h_shift= 1; //FIXME XXX
3755 s->chroma_v_shift= 1;
3757 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3759 width= s->avctx->width;
3760 height= s->avctx->height;
3762 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3763 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3765 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3766 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3768 for(plane_index=0; plane_index<3; plane_index++){
3769 int w= s->avctx->width;
3770 int h= s->avctx->height;
3773 w>>= s->chroma_h_shift;
3774 h>>= s->chroma_v_shift;
3776 s->plane[plane_index].width = w;
3777 s->plane[plane_index].height= h;
3779 s->plane[plane_index].diag_mc= 1;
3780 s->plane[plane_index].htaps= 6;
3781 s->plane[plane_index].hcoeff[0]= 40;
3782 s->plane[plane_index].hcoeff[1]= -10;
3783 s->plane[plane_index].hcoeff[2]= 2;
3784 s->plane[plane_index].fast_mc= 1;
3786 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3787 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3788 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3789 SubBand *b= &s->plane[plane_index].band[level][orientation];
3791 b->buf= s->spatial_dwt_buffer;
3793 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3794 b->width = (w + !(orientation&1))>>1;
3795 b->height= (h + !(orientation>1))>>1;
3797 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3798 b->buf_x_offset = 0;
3799 b->buf_y_offset = 0;
3803 b->buf_x_offset = (w+1)>>1;
3806 b->buf += b->stride>>1;
3807 b->buf_y_offset = b->stride_line >> 1;
3809 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3812 b->parent= &s->plane[plane_index].band[level-1][orientation];
3813 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3820 for(i=0; i<MAX_REF_FRAMES; i++)
3821 for(j=0; j<MAX_REF_FRAMES; j++)
3822 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3826 width= s->width= avctx->width;
3827 height= s->height= avctx->height;
3829 assert(width && height);
3831 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3836 static int qscale2qlog(int qscale){
3837 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3838 + 61*QROOT/8; //<64 >60
3841 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3843 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3844 * FIXME we know exact mv bits at this point,
3845 * but ratecontrol isn't set up to include them. */
3846 uint32_t coef_sum= 0;
3847 int level, orientation, delta_qlog;
3849 for(level=0; level<s->spatial_decomposition_count; level++){
3850 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3851 SubBand *b= &s->plane[0].band[level][orientation];
3852 IDWTELEM *buf= b->ibuf;
3853 const int w= b->width;
3854 const int h= b->height;
3855 const int stride= b->stride;
3856 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3857 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3858 const int qdiv= (1<<16)/qmul;
3860 //FIXME this is ugly
3863 buf[x+y*stride]= b->buf[x+y*stride];
3865 decorrelate(s, b, buf, stride, 1, 0);
3868 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3872 /* ugly, ratecontrol just takes a sqrt again */
3873 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3874 assert(coef_sum < INT_MAX);
3876 if(pict->pict_type == I_TYPE){
3877 s->m.current_picture.mb_var_sum= coef_sum;
3878 s->m.current_picture.mc_mb_var_sum= 0;
3880 s->m.current_picture.mc_mb_var_sum= coef_sum;
3881 s->m.current_picture.mb_var_sum= 0;
3884 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3885 if (pict->quality < 0)
3887 s->lambda= pict->quality * 3/2;
3888 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3889 s->qlog+= delta_qlog;
3893 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3894 int width = p->width;
3895 int height= p->height;
3896 int level, orientation, x, y;
3898 for(level=0; level<s->spatial_decomposition_count; level++){
3899 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3900 SubBand *b= &p->band[level][orientation];
3901 IDWTELEM *ibuf= b->ibuf;
3904 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3905 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3906 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3907 for(y=0; y<height; y++){
3908 for(x=0; x<width; x++){
3909 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3914 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3915 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3920 static int encode_init(AVCodecContext *avctx)
3922 SnowContext *s = avctx->priv_data;
3925 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3926 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3927 "use vstrict=-2 / -strict -2 to use it anyway\n");
3931 if(avctx->prediction_method == DWT_97
3932 && (avctx->flags & CODEC_FLAG_QSCALE)
3933 && avctx->global_quality == 0){
3934 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3944 s->m.flags = avctx->flags;
3945 s->m.bit_rate= avctx->bit_rate;
3947 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3948 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3949 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3950 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3951 h263_encode_init(&s->m); //mv_penalty
3953 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3955 if(avctx->flags&CODEC_FLAG_PASS1){
3956 if(!avctx->stats_out)
3957 avctx->stats_out = av_mallocz(256);
3959 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3960 if(ff_rate_control_init(&s->m) < 0)
3963 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3965 for(plane_index=0; plane_index<3; plane_index++){
3966 calculate_vissual_weight(s, &s->plane[plane_index]);
3970 avctx->coded_frame= &s->current_picture;
3971 switch(avctx->pix_fmt){
3972 // case PIX_FMT_YUV444P:
3973 // case PIX_FMT_YUV422P:
3974 case PIX_FMT_YUV420P:
3976 // case PIX_FMT_YUV411P:
3977 // case PIX_FMT_YUV410P:
3978 s->colorspace_type= 0;
3980 /* case PIX_FMT_RGB32:
3984 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3987 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3988 s->chroma_h_shift= 1;
3989 s->chroma_v_shift= 1;
3991 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3992 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3994 s->avctx->get_buffer(s->avctx, &s->input_picture);
3996 if(s->avctx->me_method == ME_ITER){
3998 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3999 for(i=0; i<s->max_ref_frames; i++){
4000 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4001 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4008 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4011 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4015 int w= s->avctx->width >>is_chroma;
4016 int h= s->avctx->height >>is_chroma;
4017 int ls= frame->linesize[p];
4018 uint8_t *src= frame->data[p];
4020 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4021 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4022 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4029 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4036 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4044 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4052 static int frame_start(SnowContext *s){
4054 int w= s->avctx->width; //FIXME round up to x16 ?
4055 int h= s->avctx->height;
4057 if(s->current_picture.data[0]){
4058 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4059 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4060 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4063 tmp= s->last_picture[s->max_ref_frames-1];
4064 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4065 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4066 #ifdef USE_HALFPEL_PLANE
4067 if(s->current_picture.data[0])
4068 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4070 s->last_picture[0]= s->current_picture;
4071 s->current_picture= tmp;
4077 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4078 if(i && s->last_picture[i-1].key_frame)
4083 s->current_picture.reference= 1;
4084 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4085 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4089 s->current_picture.key_frame= s->keyframe;
4094 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4095 SnowContext *s = avctx->priv_data;
4096 RangeCoder * const c= &s->c;
4097 AVFrame *pict = data;
4098 const int width= s->avctx->width;
4099 const int height= s->avctx->height;
4100 int level, orientation, plane_index, i, y;
4101 uint8_t rc_header_bak[sizeof(s->header_state)];
4102 uint8_t rc_block_bak[sizeof(s->block_state)];
4104 ff_init_range_encoder(c, buf, buf_size);
4105 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4109 for(y=0; y<(height>>shift); y++)
4110 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4111 &pict->data[i][y * pict->linesize[i]],
4114 s->new_picture = *pict;
4116 s->m.picture_number= avctx->frame_number;
4117 if(avctx->flags&CODEC_FLAG_PASS2){
4119 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4120 s->keyframe= pict->pict_type==FF_I_TYPE;
4121 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4122 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4123 if (pict->quality < 0)
4127 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4129 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4132 if(s->pass1_rc && avctx->frame_number == 0)
4133 pict->quality= 2*FF_QP2LAMBDA;
4135 s->qlog= qscale2qlog(pict->quality);
4136 s->lambda = pict->quality * 3/2;
4138 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4139 s->qlog= LOSSLESS_QLOG;
4141 }//else keep previous frame's qlog until after motion est
4145 s->m.current_picture_ptr= &s->m.current_picture;
4146 if(pict->pict_type == P_TYPE){
4147 int block_width = (width +15)>>4;
4148 int block_height= (height+15)>>4;
4149 int stride= s->current_picture.linesize[0];
4151 assert(s->current_picture.data[0]);
4152 assert(s->last_picture[0].data[0]);
4154 s->m.avctx= s->avctx;
4155 s->m.current_picture.data[0]= s->current_picture.data[0];
4156 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4157 s->m. new_picture.data[0]= s-> input_picture.data[0];
4158 s->m. last_picture_ptr= &s->m. last_picture;
4160 s->m. last_picture.linesize[0]=
4161 s->m. new_picture.linesize[0]=
4162 s->m.current_picture.linesize[0]= stride;
4163 s->m.uvlinesize= s->current_picture.linesize[1];
4165 s->m.height= height;
4166 s->m.mb_width = block_width;
4167 s->m.mb_height= block_height;
4168 s->m.mb_stride= s->m.mb_width+1;
4169 s->m.b8_stride= 2*s->m.mb_width+1;
4171 s->m.pict_type= pict->pict_type;
4172 s->m.me_method= s->avctx->me_method;
4173 s->m.me.scene_change_score=0;
4174 s->m.flags= s->avctx->flags;
4175 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4176 s->m.out_format= FMT_H263;
4177 s->m.unrestricted_mv= 1;
4179 s->m.lambda = s->lambda;
4180 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4181 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4183 s->m.dsp= s->dsp; //move
4189 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4190 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4195 s->m.pict_type = pict->pict_type;
4196 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4199 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4200 encode_blocks(s, 1);
4201 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4203 for(plane_index=0; plane_index<3; plane_index++){
4204 Plane *p= &s->plane[plane_index];
4208 // int bits= put_bits_count(&s->c.pb);
4210 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4212 if(pict->data[plane_index]) //FIXME gray hack
4215 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4218 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4221 && pict->pict_type == P_TYPE
4222 && !(avctx->flags&CODEC_FLAG_PASS2)
4223 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4224 ff_init_range_encoder(c, buf, buf_size);
4225 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4226 pict->pict_type= FF_I_TYPE;
4228 s->current_picture.key_frame=1;
4232 if(s->qlog == LOSSLESS_QLOG){
4235 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4241 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4246 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4248 if(s->pass1_rc && plane_index==0){
4249 int delta_qlog = ratecontrol_1pass(s, pict);
4250 if (delta_qlog <= INT_MIN)
4253 //reordering qlog in the bitstream would eliminate this reset
4254 ff_init_range_encoder(c, buf, buf_size);
4255 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4256 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4258 encode_blocks(s, 0);
4262 for(level=0; level<s->spatial_decomposition_count; level++){
4263 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4264 SubBand *b= &p->band[level][orientation];
4266 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4268 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4269 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4270 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4272 correlate(s, b, b->ibuf, b->stride, 1, 0);
4275 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4277 for(level=0; level<s->spatial_decomposition_count; level++){
4278 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4279 SubBand *b= &p->band[level][orientation];
4281 dequantize(s, b, b->ibuf, b->stride);
4285 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4286 if(s->qlog == LOSSLESS_QLOG){
4289 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4294 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4295 STOP_TIMER("pred-conv")}
4298 if(pict->pict_type == I_TYPE){
4301 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4302 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4306 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4307 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4310 if(s->avctx->flags&CODEC_FLAG_PSNR){
4313 if(pict->data[plane_index]) //FIXME gray hack
4316 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4320 s->avctx->error[plane_index] += error;
4321 s->current_picture.error[plane_index] = error;
4325 if(s->last_picture[s->max_ref_frames-1].data[0]){
4326 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4328 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4329 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4332 s->current_picture.coded_picture_number = avctx->frame_number;
4333 s->current_picture.pict_type = pict->pict_type;
4334 s->current_picture.quality = pict->quality;
4335 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4336 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4337 s->m.current_picture.display_picture_number =
4338 s->m.current_picture.coded_picture_number = avctx->frame_number;
4339 s->m.current_picture.quality = pict->quality;
4340 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4342 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4344 if(avctx->flags&CODEC_FLAG_PASS1)
4345 ff_write_pass1_stats(&s->m);
4346 s->m.last_pict_type = s->m.pict_type;
4347 avctx->frame_bits = s->m.frame_bits;
4348 avctx->mv_bits = s->m.mv_bits;
4349 avctx->misc_bits = s->m.misc_bits;
4350 avctx->p_tex_bits = s->m.p_tex_bits;
4354 return ff_rac_terminate(c);
4357 static void common_end(SnowContext *s){
4358 int plane_index, level, orientation, i;
4360 av_freep(&s->spatial_dwt_buffer);
4361 av_freep(&s->spatial_idwt_buffer);
4363 av_freep(&s->m.me.scratchpad);
4364 av_freep(&s->m.me.map);
4365 av_freep(&s->m.me.score_map);
4366 av_freep(&s->m.obmc_scratchpad);
4368 av_freep(&s->block);
4370 for(i=0; i<MAX_REF_FRAMES; i++){
4371 av_freep(&s->ref_mvs[i]);
4372 av_freep(&s->ref_scores[i]);
4373 if(s->last_picture[i].data[0])
4374 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4377 for(plane_index=0; plane_index<3; plane_index++){
4378 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4379 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4380 SubBand *b= &s->plane[plane_index].band[level][orientation];
4382 av_freep(&b->x_coeff);
4388 static int encode_end(AVCodecContext *avctx)
4390 SnowContext *s = avctx->priv_data;
4393 av_free(avctx->stats_out);
4398 static int decode_init(AVCodecContext *avctx)
4400 SnowContext *s = avctx->priv_data;
4403 avctx->pix_fmt= PIX_FMT_YUV420P;
4407 block_size = MB_SIZE >> s->block_max_depth;
4408 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer);
4413 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4414 SnowContext *s = avctx->priv_data;
4415 RangeCoder * const c= &s->c;
4417 AVFrame *picture = data;
4418 int level, orientation, plane_index, i;
4420 ff_init_range_decoder(c, buf, buf_size);
4421 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4423 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4426 for(plane_index=0; plane_index<3; plane_index++){
4427 Plane *p= &s->plane[plane_index];
4428 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4429 && p->hcoeff[1]==-10
4433 if(!s->block) alloc_blocks(s);
4436 //keyframe flag dupliaction mess FIXME
4437 if(avctx->debug&FF_DEBUG_PICT_INFO)
4438 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4442 for(plane_index=0; plane_index<3; plane_index++){
4443 Plane *p= &s->plane[plane_index];
4447 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4449 if(s->avctx->debug&2048){
4450 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4451 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4455 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4456 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4462 for(level=0; level<s->spatial_decomposition_count; level++){
4463 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4464 SubBand *b= &p->band[level][orientation];
4465 unpack_coeffs(s, b, b->parent, orientation);
4468 STOP_TIMER("unpack coeffs");
4472 const int mb_h= s->b_height << s->block_max_depth;
4473 const int block_size = MB_SIZE >> s->block_max_depth;
4474 const int block_w = plane_index ? block_size/2 : block_size;
4476 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4481 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4482 for(mb_y=0; mb_y<=mb_h; mb_y++){
4484 int slice_starty = block_w*mb_y;
4485 int slice_h = block_w*(mb_y+1);
4486 if (!(s->keyframe || s->avctx->debug&512)){
4487 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4488 slice_h -= (block_w >> 1);
4493 for(level=0; level<s->spatial_decomposition_count; level++){
4494 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4495 SubBand *b= &p->band[level][orientation];
4498 int our_mb_start = mb_y;
4499 int our_mb_end = (mb_y + 1);
4501 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4502 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4503 if (!(s->keyframe || s->avctx->debug&512)){
4504 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4505 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4507 start_y = FFMIN(b->height, start_y);
4508 end_y = FFMIN(b->height, end_y);
4510 if (start_y != end_y){
4511 if (orientation == 0){
4512 SubBand * correlate_band = &p->band[0][0];
4513 int correlate_end_y = FFMIN(b->height, end_y + 1);
4514 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4515 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4516 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4517 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4520 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4524 STOP_TIMER("decode_subband_slice");
4528 for(; yd<slice_h; yd+=4){
4529 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4531 STOP_TIMER("idwt slice");}
4534 if(s->qlog == LOSSLESS_QLOG){
4535 for(; yq<slice_h && yq<h; yq++){
4536 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4538 line[x] <<= FRAC_BITS;
4543 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4545 y = FFMIN(p->height, slice_starty);
4546 end_y = FFMIN(p->height, slice_h);
4548 slice_buffer_release(&s->sb, y++);
4551 slice_buffer_flush(&s->sb);
4553 STOP_TIMER("idwt + predict_slices")}
4558 if(s->last_picture[s->max_ref_frames-1].data[0]){
4559 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4561 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4562 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4565 if(!(s->avctx->debug&2048))
4566 *picture= s->current_picture;
4568 *picture= s->mconly_picture;
4570 *data_size = sizeof(AVFrame);
4572 bytes_read= c->bytestream - c->bytestream_start;
4573 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4578 static int decode_end(AVCodecContext *avctx)
4580 SnowContext *s = avctx->priv_data;
4582 slice_buffer_destroy(&s->sb);
4589 AVCodec snow_decoder = {
4593 sizeof(SnowContext),
4598 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4602 #ifdef CONFIG_SNOW_ENCODER
4603 AVCodec snow_encoder = {
4607 sizeof(SnowContext),
4624 int buffer[2][width*height];
4627 s.spatial_decomposition_count=6;
4628 s.spatial_decomposition_type=1;
4630 printf("testing 5/3 DWT\n");
4631 for(i=0; i<width*height; i++)
4632 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4634 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4635 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4637 for(i=0; i<width*height; i++)
4638 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4640 printf("testing 9/7 DWT\n");
4641 s.spatial_decomposition_type=0;
4642 for(i=0; i<width*height; i++)
4643 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4645 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4646 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4648 for(i=0; i<width*height; i++)
4649 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4652 printf("testing AC coder\n");
4653 memset(s.header_state, 0, sizeof(s.header_state));
4654 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4655 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4657 for(i=-256; i<256; i++){
4659 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4660 STOP_TIMER("put_symbol")
4662 ff_rac_terminate(&s.c);
4664 memset(s.header_state, 0, sizeof(s.header_state));
4665 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4666 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4668 for(i=-256; i<256; i++){
4671 j= get_symbol(&s.c, s.header_state, 1);
4672 STOP_TIMER("get_symbol")
4673 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4677 int level, orientation, x, y;
4678 int64_t errors[8][4];
4681 memset(errors, 0, sizeof(errors));
4682 s.spatial_decomposition_count=3;
4683 s.spatial_decomposition_type=0;
4684 for(level=0; level<s.spatial_decomposition_count; level++){
4685 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4686 int w= width >> (s.spatial_decomposition_count-level);
4687 int h= height >> (s.spatial_decomposition_count-level);
4688 int stride= width << (s.spatial_decomposition_count-level);
4689 DWTELEM *buf= buffer[0];
4692 if(orientation&1) buf+=w;
4693 if(orientation>1) buf+=stride>>1;
4695 memset(buffer[0], 0, sizeof(int)*width*height);
4696 buf[w/2 + h/2*stride]= 256*256;
4697 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4698 for(y=0; y<height; y++){
4699 for(x=0; x<width; x++){
4700 int64_t d= buffer[0][x + y*width];
4702 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4704 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4706 error= (int)(sqrt(error)+0.5);
4707 errors[level][orientation]= error;
4708 if(g) g=ff_gcd(g, error);
4712 printf("static int const visual_weight[][4]={\n");
4713 for(level=0; level<s.spatial_decomposition_count; level++){
4715 for(orientation=0; orientation<4; orientation++){
4716 printf("%8"PRId64",", errors[level][orientation]/g);
4724 int w= width >> (s.spatial_decomposition_count-level);
4725 int h= height >> (s.spatial_decomposition_count-level);
4726 int stride= width << (s.spatial_decomposition_count-level);
4727 DWTELEM *buf= buffer[0];
4733 memset(buffer[0], 0, sizeof(int)*width*height);
4735 for(y=0; y<height; y++){
4736 for(x=0; x<width; x++){
4737 int tab[4]={0,2,3,1};
4738 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4741 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4745 buf[x + y*stride ]=169;
4746 buf[x + y*stride-w]=64;
4749 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4751 for(y=0; y<height; y++){
4752 for(x=0; x<width; x++){
4753 int64_t d= buffer[0][x + y*width];
4755 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4757 if(FFABS(height/2-y)<9) printf("\n");