2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
400 typedef struct x_and_coeff{
405 typedef struct SubBand{
410 int qlog; ///< log(qscale)/log[2^(1/6)]
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
421 typedef struct Plane{
424 SubBand band[MAX_DECOMPOSITIONS][4];
427 int8_t hcoeff[HTAPS_MAX/2];
432 int8_t last_hcoeff[HTAPS_MAX/2];
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
439 AVCodecContext *avctx;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
469 int spatial_scalability;
479 #define QBIAS_SHIFT 3
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507 static void iterative_me(SnowContext *s);
509 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
513 buf->base_buffer = base_buffer;
514 buf->line_count = line_count;
515 buf->line_width = line_width;
516 buf->data_count = max_allocated_lines;
517 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
518 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
520 for(i = 0; i < max_allocated_lines; i++){
521 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
524 buf->data_stack_top = max_allocated_lines - 1;
527 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
531 assert(buf->data_stack_top >= 0);
532 // assert(!buf->line[line]);
534 return buf->line[line];
536 buffer = buf->data_stack[buf->data_stack_top];
537 buf->data_stack_top--;
538 buf->line[line] = buffer;
543 static void slice_buffer_release(slice_buffer * buf, int line)
548 assert(line >= 0 && line < buf->line_count);
549 assert(buf->line[line]);
551 offset = buf->line_width * line;
552 buffer = buf->line[line];
553 buf->data_stack_top++;
554 buf->data_stack[buf->data_stack_top] = buffer;
555 buf->line[line] = NULL;
558 static void slice_buffer_flush(slice_buffer * buf)
561 for(i = 0; i < buf->line_count; i++){
563 slice_buffer_release(buf, i);
567 static void slice_buffer_destroy(slice_buffer * buf)
570 slice_buffer_flush(buf);
572 for(i = buf->data_count - 1; i >= 0; i--){
573 av_freep(&buf->data_stack[i]);
575 av_freep(&buf->data_stack);
576 av_freep(&buf->line);
580 // Avoid a name clash on SGI IRIX
583 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
584 static uint8_t qexp[QROOT];
586 static inline int mirror(int v, int m){
587 while((unsigned)v > (unsigned)m){
594 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
598 const int a= FFABS(v);
599 const int e= av_log2(a);
601 const int el= FFMIN(e, 10);
602 put_rac(c, state+0, 0);
605 put_rac(c, state+1+i, 1); //1..10
608 put_rac(c, state+1+9, 1); //1..10
610 put_rac(c, state+1+FFMIN(i,9), 0);
612 for(i=e-1; i>=el; i--){
613 put_rac(c, state+22+9, (a>>i)&1); //22..31
616 put_rac(c, state+22+i, (a>>i)&1); //22..31
620 put_rac(c, state+11 + el, v < 0); //11..21
623 put_rac(c, state+0, 0);
626 put_rac(c, state+1+i, 1); //1..10
628 put_rac(c, state+1+i, 0);
630 for(i=e-1; i>=0; i--){
631 put_rac(c, state+22+i, (a>>i)&1); //22..31
635 put_rac(c, state+11 + e, v < 0); //11..21
638 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
640 put_rac(c, state+1+FFMIN(i,9), 0);
642 for(i=e-1; i>=0; i--){
643 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
647 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651 put_rac(c, state+0, 1);
655 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
656 if(get_rac(c, state+0))
661 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
666 for(i=e-1; i>=0; i--){
667 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
677 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
679 int r= log2>=0 ? 1<<log2 : 1;
685 put_rac(c, state+4+log2, 1);
690 put_rac(c, state+4+log2, 0);
692 for(i=log2-1; i>=0; i--){
693 put_rac(c, state+31-i, (v>>i)&1);
697 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
699 int r= log2>=0 ? 1<<log2 : 1;
704 while(get_rac(c, state+4+log2)){
710 for(i=log2-1; i>=0; i--){
711 v+= get_rac(c, state+31-i)<<i;
717 static av_always_inline void
718 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
719 int dst_step, int src_step, int ref_step,
720 int width, int mul, int add, int shift,
721 int highpass, int inverse){
722 const int mirror_left= !highpass;
723 const int mirror_right= (width&1) ^ highpass;
724 const int w= (width>>1) - 1 + (highpass & width);
727 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
729 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
736 LIFT(src[i*src_step],
737 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
743 LIFT(src[w*src_step],
744 ((mul*2*ref[w*ref_step]+add)>>shift),
749 static av_always_inline void
750 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
751 int dst_step, int src_step, int ref_step,
752 int width, int mul, int add, int shift,
753 int highpass, int inverse){
754 const int mirror_left= !highpass;
755 const int mirror_right= (width&1) ^ highpass;
756 const int w= (width>>1) - 1 + (highpass & width);
759 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
761 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
768 LIFT(src[i*src_step],
769 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
775 LIFT(src[w*src_step],
776 ((mul*2*ref[w*ref_step]+add)>>shift),
782 static av_always_inline void
783 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
784 int dst_step, int src_step, int ref_step,
785 int width, int mul, int add, int shift,
786 int highpass, int inverse){
787 const int mirror_left= !highpass;
788 const int mirror_right= (width&1) ^ highpass;
789 const int w= (width>>1) - 1 + (highpass & width);
793 #define LIFTS(src, ref, inv) \
795 (src) + (((ref) + 4*(src))>>shift): \
796 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
798 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
805 LIFTS(src[i*src_step],
806 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
812 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
815 static av_always_inline void
816 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
817 int dst_step, int src_step, int ref_step,
818 int width, int mul, int add, int shift,
819 int highpass, int inverse){
820 const int mirror_left= !highpass;
821 const int mirror_right= (width&1) ^ highpass;
822 const int w= (width>>1) - 1 + (highpass & width);
826 #define LIFTS(src, ref, inv) \
828 (src) + (((ref) + 4*(src))>>shift): \
829 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
831 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
838 LIFTS(src[i*src_step],
839 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
845 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
850 static void horizontal_decompose53i(DWTELEM *b, int width){
852 const int width2= width>>1;
854 const int w2= (width+1)>>1;
856 for(x=0; x<width2; x++){
858 temp[x+w2]= b[2*x + 1];
872 for(x=1; x+1<width2; x+=2){
876 A2 += (A1 + A3 + 2)>>2;
880 A1= temp[x+1+width2];
883 A4 += (A1 + A3 + 2)>>2;
889 A2 += (A1 + A3 + 2)>>2;
894 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
895 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
899 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
902 for(i=0; i<width; i++){
903 b1[i] -= (b0[i] + b2[i])>>1;
907 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
910 for(i=0; i<width; i++){
911 b1[i] += (b0[i] + b2[i] + 2)>>2;
915 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
917 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
918 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
920 for(y=-2; y<height; y+=2){
921 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
922 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
924 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
925 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
927 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
928 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
935 static void horizontal_decompose97i(DWTELEM *b, int width){
937 const int w2= (width+1)>>1;
939 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
940 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
941 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
942 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
946 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
949 for(i=0; i<width; i++){
950 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
954 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
957 for(i=0; i<width; i++){
958 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
962 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
965 for(i=0; i<width; i++){
967 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
969 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
974 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
977 for(i=0; i<width; i++){
978 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
982 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
984 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
985 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
986 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
987 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
989 for(y=-4; y<height; y+=2){
990 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
991 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
993 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
994 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
996 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
997 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
998 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
999 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1008 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1011 for(level=0; level<decomposition_count; level++){
1013 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1014 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1019 static void horizontal_compose53i(IDWTELEM *b, int width){
1020 IDWTELEM temp[width];
1021 const int width2= width>>1;
1022 const int w2= (width+1)>>1;
1034 for(x=1; x+1<width2; x+=2){
1038 A2 += (A1 + A3 + 2)>>2;
1042 A1= temp[x+1+width2];
1045 A4 += (A1 + A3 + 2)>>2;
1051 A2 += (A1 + A3 + 2)>>2;
1055 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1056 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1058 for(x=0; x<width2; x++){
1060 b[2*x + 1]= temp[x+w2];
1066 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1069 for(i=0; i<width; i++){
1070 b1[i] += (b0[i] + b2[i])>>1;
1074 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1077 for(i=0; i<width; i++){
1078 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1082 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1083 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1084 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1088 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1089 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1090 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1094 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1097 IDWTELEM *b0= cs->b0;
1098 IDWTELEM *b1= cs->b1;
1099 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1100 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1102 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1103 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1105 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1106 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1113 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1115 IDWTELEM *b0= cs->b0;
1116 IDWTELEM *b1= cs->b1;
1117 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1118 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1120 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1121 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1123 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1124 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1131 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1133 spatial_compose53i_init(&cs, buffer, height, stride);
1134 while(cs.y <= height)
1135 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1139 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1140 IDWTELEM temp[width];
1141 const int w2= (width+1)>>1;
1143 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1144 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1145 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1146 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1149 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1152 for(i=0; i<width; i++){
1153 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1157 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1160 for(i=0; i<width; i++){
1161 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1165 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1168 for(i=0; i<width; i++){
1170 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1172 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1177 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1180 for(i=0; i<width; i++){
1181 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1185 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1188 for(i=0; i<width; i++){
1189 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1190 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1192 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1194 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1196 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1200 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1201 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1202 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1203 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1204 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1208 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1209 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1210 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1211 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1212 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1216 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1219 IDWTELEM *b0= cs->b0;
1220 IDWTELEM *b1= cs->b1;
1221 IDWTELEM *b2= cs->b2;
1222 IDWTELEM *b3= cs->b3;
1223 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1224 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1226 if(y>0 && y+4<height){
1227 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1229 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1230 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1231 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1232 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1235 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1236 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1245 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1247 IDWTELEM *b0= cs->b0;
1248 IDWTELEM *b1= cs->b1;
1249 IDWTELEM *b2= cs->b2;
1250 IDWTELEM *b3= cs->b3;
1251 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1252 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1254 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1255 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1256 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1257 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1259 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1260 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1269 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1271 spatial_compose97i_init(&cs, buffer, height, stride);
1272 while(cs.y <= height)
1273 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1276 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1278 for(level=decomposition_count-1; level>=0; level--){
1280 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1281 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1286 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1288 for(level=decomposition_count-1; level>=0; level--){
1290 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1291 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1296 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1297 const int support = type==1 ? 3 : 5;
1301 for(level=decomposition_count-1; level>=0; level--){
1302 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1304 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1306 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1313 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1314 const int support = type==1 ? 3 : 5;
1318 for(level=decomposition_count-1; level>=0; level--){
1319 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1321 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1323 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1330 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1331 DWTCompose cs[MAX_DECOMPOSITIONS];
1333 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1334 for(y=0; y<height; y+=4)
1335 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1338 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1339 const int w= b->width;
1340 const int h= b->height;
1352 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1353 v= src[x + y*stride];
1356 t= src[x + (y-1)*stride];
1358 lt= src[x - 1 + (y-1)*stride];
1361 rt= src[x + 1 + (y-1)*stride];
1365 l= src[x - 1 + y*stride];
1367 if(orientation==1) ll= src[y + (x-2)*stride];
1368 else ll= src[x - 2 + y*stride];
1374 if(px<b->parent->width && py<b->parent->height)
1375 p= parent[px + py*2*stride];
1377 if(!(/*ll|*/l|lt|t|rt|p)){
1379 runs[run_index++]= run;
1387 max_index= run_index;
1388 runs[run_index++]= run;
1390 run= runs[run_index++];
1392 put_symbol2(&s->c, b->state[30], max_index, 0);
1393 if(run_index <= max_index)
1394 put_symbol2(&s->c, b->state[1], run, 3);
1397 if(s->c.bytestream_end - s->c.bytestream < w*40){
1398 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1403 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1404 v= src[x + y*stride];
1407 t= src[x + (y-1)*stride];
1409 lt= src[x - 1 + (y-1)*stride];
1412 rt= src[x + 1 + (y-1)*stride];
1416 l= src[x - 1 + y*stride];
1418 if(orientation==1) ll= src[y + (x-2)*stride];
1419 else ll= src[x - 2 + y*stride];
1425 if(px<b->parent->width && py<b->parent->height)
1426 p= parent[px + py*2*stride];
1428 if(/*ll|*/l|lt|t|rt|p){
1429 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1431 put_rac(&s->c, &b->state[0][context], !!v);
1434 run= runs[run_index++];
1436 if(run_index <= max_index)
1437 put_symbol2(&s->c, b->state[1], run, 3);
1445 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1446 int l2= 2*FFABS(l) + (l<0);
1447 int t2= 2*FFABS(t) + (t<0);
1449 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1450 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1458 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1459 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1460 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1461 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1462 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1465 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1466 const int w= b->width;
1467 const int h= b->height;
1472 x_and_coeff *xc= b->x_coeff;
1473 x_and_coeff *prev_xc= NULL;
1474 x_and_coeff *prev2_xc= xc;
1475 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1476 x_and_coeff *prev_parent_xc= parent_xc;
1478 runs= get_symbol2(&s->c, b->state[30], 0);
1479 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1484 int lt=0, t=0, rt=0;
1486 if(y && prev_xc->x == 0){
1498 if(prev_xc->x == x + 1)
1504 if(x>>1 > parent_xc->x){
1507 if(x>>1 == parent_xc->x){
1508 p= parent_xc->coeff;
1511 if(/*ll|*/l|lt|t|rt|p){
1512 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1514 v=get_rac(&s->c, &b->state[0][context]);
1516 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1517 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1524 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1526 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1527 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1536 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1537 else max_run= FFMIN(run, w-x-1);
1539 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1545 (xc++)->x= w+1; //end marker
1551 while(parent_xc->x != parent->width+1)
1554 prev_parent_xc= parent_xc;
1556 parent_xc= prev_parent_xc;
1561 (xc++)->x= w+1; //end marker
1565 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1566 const int w= b->width;
1568 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1569 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1570 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1573 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1578 /* If we are on the second or later slice, restore our index. */
1580 new_index = save_state[0];
1583 for(y=start_y; y<h; y++){
1586 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1587 memset(line, 0, b->width*sizeof(IDWTELEM));
1588 v = b->x_coeff[new_index].coeff;
1589 x = b->x_coeff[new_index++].x;
1591 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1592 register int u= -(v&1);
1593 line[x] = (t^u) - u;
1595 v = b->x_coeff[new_index].coeff;
1596 x = b->x_coeff[new_index++].x;
1600 /* Save our variables for the next slice. */
1601 save_state[0] = new_index;
1606 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1607 int plane_index, level, orientation;
1609 for(plane_index=0; plane_index<3; plane_index++){
1610 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1611 for(orientation=level ? 1:0; orientation<4; orientation++){
1612 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1616 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1617 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1620 static int alloc_blocks(SnowContext *s){
1621 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1622 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1628 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1632 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1633 uint8_t *bytestream= d->bytestream;
1634 uint8_t *bytestream_start= d->bytestream_start;
1636 d->bytestream= bytestream;
1637 d->bytestream_start= bytestream_start;
1640 //near copy & paste from dsputil, FIXME
1641 static int pix_sum(uint8_t * pix, int line_size, int w)
1646 for (i = 0; i < w; i++) {
1647 for (j = 0; j < w; j++) {
1651 pix += line_size - w;
1656 //near copy & paste from dsputil, FIXME
1657 static int pix_norm1(uint8_t * pix, int line_size, int w)
1660 uint32_t *sq = ff_squareTbl + 256;
1663 for (i = 0; i < w; i++) {
1664 for (j = 0; j < w; j ++) {
1668 pix += line_size - w;
1673 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1674 const int w= s->b_width << s->block_max_depth;
1675 const int rem_depth= s->block_max_depth - level;
1676 const int index= (x + y*w) << rem_depth;
1677 const int block_w= 1<<rem_depth;
1690 for(j=0; j<block_w; j++){
1691 for(i=0; i<block_w; i++){
1692 s->block[index + i + j*w]= block;
1697 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1698 const int offset[3]= {
1700 ((y*c->uvstride + x)>>1),
1701 ((y*c->uvstride + x)>>1),
1705 c->src[0][i]= src [i];
1706 c->ref[0][i]= ref [i] + offset[i];
1711 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1712 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1713 if(s->ref_frames == 1){
1714 *mx = mid_pred(left->mx, top->mx, tr->mx);
1715 *my = mid_pred(left->my, top->my, tr->my);
1717 const int *scale = scale_mv_ref[ref];
1718 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1719 (top ->mx * scale[top ->ref] + 128) >>8,
1720 (tr ->mx * scale[tr ->ref] + 128) >>8);
1721 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1722 (top ->my * scale[top ->ref] + 128) >>8,
1723 (tr ->my * scale[tr ->ref] + 128) >>8);
1730 #define P_TOPRIGHT P[3]
1731 #define P_MEDIAN P[4]
1733 #define FLAG_QPEL 1 //must be 1
1735 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1736 uint8_t p_buffer[1024];
1737 uint8_t i_buffer[1024];
1738 uint8_t p_state[sizeof(s->block_state)];
1739 uint8_t i_state[sizeof(s->block_state)];
1741 uint8_t *pbbak= s->c.bytestream;
1742 uint8_t *pbbak_start= s->c.bytestream_start;
1743 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1744 const int w= s->b_width << s->block_max_depth;
1745 const int h= s->b_height << s->block_max_depth;
1746 const int rem_depth= s->block_max_depth - level;
1747 const int index= (x + y*w) << rem_depth;
1748 const int block_w= 1<<(LOG2_MB_SIZE - level);
1749 int trx= (x+1)<<rem_depth;
1750 int try= (y+1)<<rem_depth;
1751 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1752 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1753 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1754 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1755 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1756 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1757 int pl = left->color[0];
1758 int pcb= left->color[1];
1759 int pcr= left->color[2];
1763 const int stride= s->current_picture.linesize[0];
1764 const int uvstride= s->current_picture.linesize[1];
1765 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1766 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1767 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1769 int16_t last_mv[3][2];
1770 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1771 const int shift= 1+qpel;
1772 MotionEstContext *c= &s->m.me;
1773 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1774 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1775 int my_context= av_log2(2*FFABS(left->my - top->my));
1776 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1777 int ref, best_ref, ref_score, ref_mx, ref_my;
1779 assert(sizeof(s->block_state) >= 256);
1781 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1785 // clip predictors / edge ?
1787 P_LEFT[0]= left->mx;
1788 P_LEFT[1]= left->my;
1791 P_TOPRIGHT[0]= tr->mx;
1792 P_TOPRIGHT[1]= tr->my;
1794 last_mv[0][0]= s->block[index].mx;
1795 last_mv[0][1]= s->block[index].my;
1796 last_mv[1][0]= right->mx;
1797 last_mv[1][1]= right->my;
1798 last_mv[2][0]= bottom->mx;
1799 last_mv[2][1]= bottom->my;
1806 assert(c-> stride == stride);
1807 assert(c->uvstride == uvstride);
1809 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1810 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1811 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1812 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1814 c->xmin = - x*block_w - 16+2;
1815 c->ymin = - y*block_w - 16+2;
1816 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1817 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1819 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1820 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1821 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1822 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1823 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1824 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1825 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1827 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1828 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1831 c->pred_x= P_LEFT[0];
1832 c->pred_y= P_LEFT[1];
1834 c->pred_x = P_MEDIAN[0];
1835 c->pred_y = P_MEDIAN[1];
1840 for(ref=0; ref<s->ref_frames; ref++){
1841 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1843 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1844 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1846 assert(ref_mx >= c->xmin);
1847 assert(ref_mx <= c->xmax);
1848 assert(ref_my >= c->ymin);
1849 assert(ref_my <= c->ymax);
1851 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1852 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1853 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1854 if(s->ref_mvs[ref]){
1855 s->ref_mvs[ref][index][0]= ref_mx;
1856 s->ref_mvs[ref][index][1]= ref_my;
1857 s->ref_scores[ref][index]= ref_score;
1859 if(score > ref_score){
1866 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1869 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1871 pc.bytestream_start=
1872 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1873 memcpy(p_state, s->block_state, sizeof(s->block_state));
1875 if(level!=s->block_max_depth)
1876 put_rac(&pc, &p_state[4 + s_context], 1);
1877 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1878 if(s->ref_frames > 1)
1879 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1880 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1881 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1882 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1883 p_len= pc.bytestream - pc.bytestream_start;
1884 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1886 block_s= block_w*block_w;
1887 sum = pix_sum(current_data[0], stride, block_w);
1888 l= (sum + block_s/2)/block_s;
1889 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1891 block_s= block_w*block_w>>2;
1892 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1893 cb= (sum + block_s/2)/block_s;
1894 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1895 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1896 cr= (sum + block_s/2)/block_s;
1897 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1900 ic.bytestream_start=
1901 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1902 memcpy(i_state, s->block_state, sizeof(s->block_state));
1903 if(level!=s->block_max_depth)
1904 put_rac(&ic, &i_state[4 + s_context], 1);
1905 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1906 put_symbol(&ic, &i_state[32], l-pl , 1);
1907 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1908 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1909 i_len= ic.bytestream - ic.bytestream_start;
1910 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1912 // assert(score==256*256*256*64-1);
1913 assert(iscore < 255*255*256 + s->lambda2*10);
1914 assert(iscore >= 0);
1915 assert(l>=0 && l<=255);
1916 assert(pl>=0 && pl<=255);
1919 int varc= iscore >> 8;
1920 int vard= score >> 8;
1921 if (vard <= 64 || vard < varc)
1922 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1924 c->scene_change_score+= s->m.qscale;
1927 if(level!=s->block_max_depth){
1928 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1929 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1930 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1931 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1932 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1933 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1935 if(score2 < score && score2 < iscore)
1940 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1941 memcpy(pbbak, i_buffer, i_len);
1943 s->c.bytestream_start= pbbak_start;
1944 s->c.bytestream= pbbak + i_len;
1945 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1946 memcpy(s->block_state, i_state, sizeof(s->block_state));
1949 memcpy(pbbak, p_buffer, p_len);
1951 s->c.bytestream_start= pbbak_start;
1952 s->c.bytestream= pbbak + p_len;
1953 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1954 memcpy(s->block_state, p_state, sizeof(s->block_state));
1959 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1960 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1961 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1963 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1967 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
1968 const int w= s->b_width << s->block_max_depth;
1969 const int rem_depth= s->block_max_depth - level;
1970 const int index= (x + y*w) << rem_depth;
1971 int trx= (x+1)<<rem_depth;
1972 BlockNode *b= &s->block[index];
1973 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1974 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1975 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1976 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1977 int pl = left->color[0];
1978 int pcb= left->color[1];
1979 int pcr= left->color[2];
1981 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1982 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
1983 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
1984 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1987 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1991 if(level!=s->block_max_depth){
1992 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
1993 put_rac(&s->c, &s->block_state[4 + s_context], 1);
1995 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1996 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
1997 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
1998 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
1999 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2003 if(b->type & BLOCK_INTRA){
2004 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2005 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2006 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2007 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2008 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2009 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2011 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2012 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2013 if(s->ref_frames > 1)
2014 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2015 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2016 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2017 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2021 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2022 const int w= s->b_width << s->block_max_depth;
2023 const int rem_depth= s->block_max_depth - level;
2024 const int index= (x + y*w) << rem_depth;
2025 int trx= (x+1)<<rem_depth;
2026 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2027 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2028 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2029 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2030 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2033 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2037 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2039 int l = left->color[0];
2040 int cb= left->color[1];
2041 int cr= left->color[2];
2043 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2044 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2045 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2047 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2050 pred_mv(s, &mx, &my, 0, left, top, tr);
2051 l += get_symbol(&s->c, &s->block_state[32], 1);
2052 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2053 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2055 if(s->ref_frames > 1)
2056 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2057 pred_mv(s, &mx, &my, ref, left, top, tr);
2058 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2059 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2061 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2063 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2064 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2065 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2066 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2070 static void encode_blocks(SnowContext *s, int search){
2075 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2079 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2080 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2084 if(s->avctx->me_method == ME_ITER || !search)
2085 encode_q_branch2(s, 0, x, y);
2087 encode_q_branch (s, 0, x, y);
2092 static void decode_blocks(SnowContext *s){
2099 decode_q_branch(s, 0, x, y);
2104 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2105 static const uint8_t weight[64]={
2116 static const uint8_t brane[256]={
2117 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
2118 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
2119 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
2120 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
2121 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
2122 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
2123 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
2124 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
2125 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
2126 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
2127 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
2128 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
2129 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
2130 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
2131 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
2132 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
2135 static const uint8_t needs[16]={
2143 int16_t tmpIt [64*(32+HTAPS_MAX)];
2144 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
2145 int16_t *tmpI= tmpIt;
2146 uint8_t *tmp2= tmp2t[0];
2147 const uint8_t *hpel[11];
2148 assert(dx<16 && dy<16);
2149 r= brane[dx + 16*dy]&15;
2150 l= brane[dx + 16*dy]>>4;
2152 b= needs[l] | needs[r];
2153 if(p && !p->diag_mc)
2157 for(y=0; y < b_h+HTAPS_MAX-1; y++){
2158 for(x=0; x < b_w; x++){
2159 int a_1=src[x + HTAPS_MAX/2-4];
2160 int a0= src[x + HTAPS_MAX/2-3];
2161 int a1= src[x + HTAPS_MAX/2-2];
2162 int a2= src[x + HTAPS_MAX/2-1];
2163 int a3= src[x + HTAPS_MAX/2+0];
2164 int a4= src[x + HTAPS_MAX/2+1];
2165 int a5= src[x + HTAPS_MAX/2+2];
2166 int a6= src[x + HTAPS_MAX/2+3];
2168 if(!p || p->fast_mc){
2169 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2173 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
2178 if(am&(~255)) am= ~(am>>31);
2187 src += HTAPS_MAX/2 - 1;
2191 for(y=0; y < b_h; y++){
2192 for(x=0; x < b_w+1; x++){
2193 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
2194 int a0= src[x + (HTAPS_MAX/2-3)*stride];
2195 int a1= src[x + (HTAPS_MAX/2-2)*stride];
2196 int a2= src[x + (HTAPS_MAX/2-1)*stride];
2197 int a3= src[x + (HTAPS_MAX/2+0)*stride];
2198 int a4= src[x + (HTAPS_MAX/2+1)*stride];
2199 int a5= src[x + (HTAPS_MAX/2+2)*stride];
2200 int a6= src[x + (HTAPS_MAX/2+3)*stride];
2202 if(!p || p->fast_mc)
2203 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
2205 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
2207 if(am&(~255)) am= ~(am>>31);
2215 src += stride*(HTAPS_MAX/2 - 1);
2219 for(y=0; y < b_h; y++){
2220 for(x=0; x < b_w; x++){
2221 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
2222 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
2223 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
2224 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
2225 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
2226 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
2227 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
2228 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
2230 if(!p || p->fast_mc)
2231 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
2233 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
2234 if(am&(~255)) am= ~(am>>31);
2243 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
2248 hpel[ 6]= tmp2t[1] + 1;
2250 hpel[ 8]= src + stride;
2251 hpel[ 9]= hpel[1] + stride;
2252 hpel[10]= hpel[8] + 1;
2255 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
2256 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
2257 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
2258 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
2261 for(y=0; y < b_h; y++){
2262 for(x=0; x < b_w; x++){
2263 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
2264 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
2273 const uint8_t *src1= hpel[l];
2274 const uint8_t *src2= hpel[r];
2275 int a= weight[((dx&7) + (8*(dy&7)))];
2277 for(y=0; y < b_h; y++){
2278 for(x=0; x < b_w; x++){
2279 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
2288 #define mca(dx,dy,b_w)\
2289 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2290 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
2292 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2304 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2305 if(block->type & BLOCK_INTRA){
2307 const int color = block->color[plane_index];
2308 const int color4= color*0x01010101;
2310 for(y=0; y < b_h; y++){
2311 *(uint32_t*)&dst[0 + y*stride]= color4;
2312 *(uint32_t*)&dst[4 + y*stride]= color4;
2313 *(uint32_t*)&dst[8 + y*stride]= color4;
2314 *(uint32_t*)&dst[12+ y*stride]= color4;
2315 *(uint32_t*)&dst[16+ y*stride]= color4;
2316 *(uint32_t*)&dst[20+ y*stride]= color4;
2317 *(uint32_t*)&dst[24+ y*stride]= color4;
2318 *(uint32_t*)&dst[28+ y*stride]= color4;
2321 for(y=0; y < b_h; y++){
2322 *(uint32_t*)&dst[0 + y*stride]= color4;
2323 *(uint32_t*)&dst[4 + y*stride]= color4;
2324 *(uint32_t*)&dst[8 + y*stride]= color4;
2325 *(uint32_t*)&dst[12+ y*stride]= color4;
2328 for(y=0; y < b_h; y++){
2329 *(uint32_t*)&dst[0 + y*stride]= color4;
2330 *(uint32_t*)&dst[4 + y*stride]= color4;
2333 for(y=0; y < b_h; y++){
2334 *(uint32_t*)&dst[0 + y*stride]= color4;
2337 for(y=0; y < b_h; y++){
2338 for(x=0; x < b_w; x++){
2339 dst[x + y*stride]= color;
2344 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2345 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2346 int mx= block->mx*scale;
2347 int my= block->my*scale;
2348 const int dx= mx&15;
2349 const int dy= my&15;
2350 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2351 sx += (mx>>4) - (HTAPS_MAX/2-1);
2352 sy += (my>>4) - (HTAPS_MAX/2-1);
2353 src += sx + sy*stride;
2354 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
2355 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
2356 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
2359 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2360 // assert(!(b_w&(b_w-1)));
2361 assert(b_w>1 && b_h>1);
2362 assert((tab_index>=0 && tab_index<4) || b_w==32);
2363 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
2364 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
2367 for(y=0; y<b_h; y+=16){
2368 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
2369 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
2372 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
2373 else if(b_w==2*b_h){
2374 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
2375 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
2378 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
2379 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
2384 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2385 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2388 for(y=0; y<b_h; y++){
2389 //FIXME ugly misuse of obmc_stride
2390 const uint8_t *obmc1= obmc + y*obmc_stride;
2391 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2392 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2393 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2394 dst = slice_buffer_get_line(sb, src_y + y);
2395 for(x=0; x<b_w; x++){
2396 int v= obmc1[x] * block[3][x + y*src_stride]
2397 +obmc2[x] * block[2][x + y*src_stride]
2398 +obmc3[x] * block[1][x + y*src_stride]
2399 +obmc4[x] * block[0][x + y*src_stride];
2401 v <<= 8 - LOG2_OBMC_MAX;
2403 v >>= 8 - FRAC_BITS;
2406 v += dst[x + src_x];
2407 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2408 if(v&(~255)) v= ~(v>>31);
2409 dst8[x + y*src_stride] = v;
2411 dst[x + src_x] -= v;
2417 //FIXME name cleanup (b_w, block_w, b_width stuff)
2418 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2419 const int b_width = s->b_width << s->block_max_depth;
2420 const int b_height= s->b_height << s->block_max_depth;
2421 const int b_stride= b_width;
2422 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2423 BlockNode *rt= lt+1;
2424 BlockNode *lb= lt+b_stride;
2425 BlockNode *rb= lb+1;
2427 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2428 uint8_t *tmp = s->scratchbuf;
2435 }else if(b_x + 1 >= b_width){
2442 }else if(b_y + 1 >= b_height){
2447 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
2450 if(!sliced && !offset_dst)
2453 }else if(src_x + b_w > w){
2457 obmc -= src_y*obmc_stride;
2459 if(!sliced && !offset_dst)
2460 dst -= src_y*dst_stride;
2462 }else if(src_y + b_h> h){
2466 if(b_w<=0 || b_h<=0) return;
2468 assert(src_stride > 2*MB_SIZE + 5);
2470 if(!sliced && offset_dst)
2471 dst += src_x + src_y*dst_stride;
2472 dst8+= src_x + src_y*src_stride;
2473 // src += src_x + src_y*src_stride;
2475 ptmp= tmp + 3*tmp_step;
2478 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2480 if(same_block(lt, rt)){
2485 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2488 if(same_block(lt, lb)){
2490 }else if(same_block(rt, lb)){
2495 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2498 if(same_block(lt, rb) ){
2500 }else if(same_block(rt, rb)){
2502 }else if(same_block(lb, rb)){
2506 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2509 for(y=0; y<b_h; y++){
2510 for(x=0; x<b_w; x++){
2511 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2512 if(add) dst[x + y*dst_stride] += v;
2513 else dst[x + y*dst_stride] -= v;
2516 for(y=0; y<b_h; y++){
2517 uint8_t *obmc2= obmc + (obmc_stride>>1);
2518 for(x=0; x<b_w; x++){
2519 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2520 if(add) dst[x + y*dst_stride] += v;
2521 else dst[x + y*dst_stride] -= v;
2524 for(y=0; y<b_h; y++){
2525 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2526 for(x=0; x<b_w; x++){
2527 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2528 if(add) dst[x + y*dst_stride] += v;
2529 else dst[x + y*dst_stride] -= v;
2532 for(y=0; y<b_h; y++){
2533 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2534 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2535 for(x=0; x<b_w; x++){
2536 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2537 if(add) dst[x + y*dst_stride] += v;
2538 else dst[x + y*dst_stride] -= v;
2543 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2545 for(y=0; y<b_h; y++){
2546 //FIXME ugly misuse of obmc_stride
2547 const uint8_t *obmc1= obmc + y*obmc_stride;
2548 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2549 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2550 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2551 for(x=0; x<b_w; x++){
2552 int v= obmc1[x] * block[3][x + y*src_stride]
2553 +obmc2[x] * block[2][x + y*src_stride]
2554 +obmc3[x] * block[1][x + y*src_stride]
2555 +obmc4[x] * block[0][x + y*src_stride];
2557 v <<= 8 - LOG2_OBMC_MAX;
2559 v >>= 8 - FRAC_BITS;
2562 v += dst[x + y*dst_stride];
2563 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2564 if(v&(~255)) v= ~(v>>31);
2565 dst8[x + y*src_stride] = v;
2567 dst[x + y*dst_stride] -= v;
2575 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2576 Plane *p= &s->plane[plane_index];
2577 const int mb_w= s->b_width << s->block_max_depth;
2578 const int mb_h= s->b_height << s->block_max_depth;
2580 int block_size = MB_SIZE >> s->block_max_depth;
2581 int block_w = plane_index ? block_size/2 : block_size;
2582 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2583 int obmc_stride= plane_index ? block_size : 2*block_size;
2584 int ref_stride= s->current_picture.linesize[plane_index];
2585 uint8_t *dst8= s->current_picture.data[plane_index];
2589 if(s->keyframe || (s->avctx->debug&512)){
2594 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2595 // DWTELEM * line = slice_buffer_get_line(sb, y);
2596 IDWTELEM * line = sb->line[y];
2598 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2599 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2601 if(v&(~255)) v= ~(v>>31);
2602 dst8[x + y*ref_stride]= v;
2606 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2607 // DWTELEM * line = slice_buffer_get_line(sb, y);
2608 IDWTELEM * line = sb->line[y];
2610 line[x] -= 128 << FRAC_BITS;
2611 // buf[x + y*w]-= 128<<FRAC_BITS;
2619 for(mb_x=0; mb_x<=mb_w; mb_x++){
2620 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2621 block_w*mb_x - block_w/2,
2622 block_w*mb_y - block_w/2,
2625 w, ref_stride, obmc_stride,
2627 add, 0, plane_index);
2631 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2632 Plane *p= &s->plane[plane_index];
2633 const int mb_w= s->b_width << s->block_max_depth;
2634 const int mb_h= s->b_height << s->block_max_depth;
2636 int block_size = MB_SIZE >> s->block_max_depth;
2637 int block_w = plane_index ? block_size/2 : block_size;
2638 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2639 const int obmc_stride= plane_index ? block_size : 2*block_size;
2640 int ref_stride= s->current_picture.linesize[plane_index];
2641 uint8_t *dst8= s->current_picture.data[plane_index];
2645 if(s->keyframe || (s->avctx->debug&512)){
2650 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2652 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2654 if(v&(~255)) v= ~(v>>31);
2655 dst8[x + y*ref_stride]= v;
2659 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2661 buf[x + y*w]-= 128<<FRAC_BITS;
2669 for(mb_x=0; mb_x<=mb_w; mb_x++){
2670 add_yblock(s, 0, NULL, buf, dst8, obmc,
2671 block_w*mb_x - block_w/2,
2672 block_w*mb_y - block_w/2,
2675 w, ref_stride, obmc_stride,
2677 add, 1, plane_index);
2681 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2682 const int mb_h= s->b_height << s->block_max_depth;
2684 for(mb_y=0; mb_y<=mb_h; mb_y++)
2685 predict_slice(s, buf, plane_index, add, mb_y);
2688 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2690 Plane *p= &s->plane[plane_index];
2691 const int block_size = MB_SIZE >> s->block_max_depth;
2692 const int block_w = plane_index ? block_size/2 : block_size;
2693 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2694 const int obmc_stride= plane_index ? block_size : 2*block_size;
2695 const int ref_stride= s->current_picture.linesize[plane_index];
2696 uint8_t *src= s-> input_picture.data[plane_index];
2697 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2698 const int b_stride = s->b_width << s->block_max_depth;
2699 const int w= p->width;
2700 const int h= p->height;
2701 int index= mb_x + mb_y*b_stride;
2702 BlockNode *b= &s->block[index];
2703 BlockNode backup= *b;
2707 b->type|= BLOCK_INTRA;
2708 b->color[plane_index]= 0;
2709 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2712 int mb_x2= mb_x + (i &1) - 1;
2713 int mb_y2= mb_y + (i>>1) - 1;
2714 int x= block_w*mb_x2 + block_w/2;
2715 int y= block_w*mb_y2 + block_w/2;
2717 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2718 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2720 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2721 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2722 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2723 int obmc_v= obmc[index];
2725 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2726 if(x<0) obmc_v += obmc[index + block_w];
2727 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2728 if(x+block_w>w) obmc_v += obmc[index - block_w];
2729 //FIXME precalculate this or simplify it somehow else
2731 d = -dst[index] + (1<<(FRAC_BITS-1));
2733 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2734 aa += obmc_v * obmc_v; //FIXME precalculate this
2740 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2743 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2744 const int b_stride = s->b_width << s->block_max_depth;
2745 const int b_height = s->b_height<< s->block_max_depth;
2746 int index= x + y*b_stride;
2747 const BlockNode *b = &s->block[index];
2748 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2749 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2750 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2751 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2753 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2754 // int my_context= av_log2(2*FFABS(left->my - top->my));
2756 if(x<0 || x>=b_stride || y>=b_height)
2763 00001XXXX 15-30 8-15
2765 //FIXME try accurate rate
2766 //FIXME intra and inter predictors if surrounding blocks are not the same type
2767 if(b->type & BLOCK_INTRA){
2768 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2769 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2770 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2772 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2775 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2776 + av_log2(2*FFABS(dmy))
2777 + av_log2(2*b->ref));
2781 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2782 Plane *p= &s->plane[plane_index];
2783 const int block_size = MB_SIZE >> s->block_max_depth;
2784 const int block_w = plane_index ? block_size/2 : block_size;
2785 const int obmc_stride= plane_index ? block_size : 2*block_size;
2786 const int ref_stride= s->current_picture.linesize[plane_index];
2787 uint8_t *dst= s->current_picture.data[plane_index];
2788 uint8_t *src= s-> input_picture.data[plane_index];
2789 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2790 uint8_t *cur = s->scratchbuf;
2791 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
2792 const int b_stride = s->b_width << s->block_max_depth;
2793 const int b_height = s->b_height<< s->block_max_depth;
2794 const int w= p->width;
2795 const int h= p->height;
2798 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2799 int sx= block_w*mb_x - block_w/2;
2800 int sy= block_w*mb_y - block_w/2;
2801 int x0= FFMAX(0,-sx);
2802 int y0= FFMAX(0,-sy);
2803 int x1= FFMIN(block_w*2, w-sx);
2804 int y1= FFMIN(block_w*2, h-sy);
2807 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2809 for(y=y0; y<y1; y++){
2810 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2811 const IDWTELEM *pred1 = pred + y*obmc_stride;
2812 uint8_t *cur1 = cur + y*ref_stride;
2813 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2814 for(x=x0; x<x1; x++){
2815 #if FRAC_BITS >= LOG2_OBMC_MAX
2816 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2818 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2820 v = (v + pred1[x]) >> FRAC_BITS;
2821 if(v&(~255)) v= ~(v>>31);
2826 /* copy the regions where obmc[] = (uint8_t)256 */
2827 if(LOG2_OBMC_MAX == 8
2828 && (mb_x == 0 || mb_x == b_stride-1)
2829 && (mb_y == 0 || mb_y == b_height-1)){
2838 for(y=y0; y<y1; y++)
2839 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2843 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2844 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2845 /* FIXME cmps overlap but do not cover the wavelet's whole support.
2846 * So improving the score of one block is not strictly guaranteed
2847 * to improve the score of the whole frame, thus iterative motion
2848 * estimation does not always converge. */
2849 if(s->avctx->me_cmp == FF_CMP_W97)
2850 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2851 else if(s->avctx->me_cmp == FF_CMP_W53)
2852 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2856 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2857 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2862 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2871 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2873 if(mb_x == b_stride-2)
2874 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2876 return distortion + rate*penalty_factor;
2879 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2881 Plane *p= &s->plane[plane_index];
2882 const int block_size = MB_SIZE >> s->block_max_depth;
2883 const int block_w = plane_index ? block_size/2 : block_size;
2884 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2885 const int obmc_stride= plane_index ? block_size : 2*block_size;
2886 const int ref_stride= s->current_picture.linesize[plane_index];
2887 uint8_t *dst= s->current_picture.data[plane_index];
2888 uint8_t *src= s-> input_picture.data[plane_index];
2889 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
2890 // const has only been removed from zero_dst to suppress a warning
2891 static IDWTELEM zero_dst[4096]; //FIXME
2892 const int b_stride = s->b_width << s->block_max_depth;
2893 const int w= p->width;
2894 const int h= p->height;
2897 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2900 int mb_x2= mb_x + (i%3) - 1;
2901 int mb_y2= mb_y + (i/3) - 1;
2902 int x= block_w*mb_x2 + block_w/2;
2903 int y= block_w*mb_y2 + block_w/2;
2905 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2906 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2908 //FIXME find a cleaner/simpler way to skip the outside stuff
2909 for(y2= y; y2<0; y2++)
2910 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2911 for(y2= h; y2<y+block_w; y2++)
2912 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2914 for(y2= y; y2<y+block_w; y2++)
2915 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2918 for(y2= y; y2<y+block_w; y2++)
2919 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2922 assert(block_w== 8 || block_w==16);
2923 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2927 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2928 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2936 rate = get_block_bits(s, mb_x, mb_y, 2);
2937 for(i=merged?4:0; i<9; i++){
2938 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2939 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2942 return distortion + rate*penalty_factor;
2945 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2946 const int b_stride= s->b_width << s->block_max_depth;
2947 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2948 BlockNode backup= *block;
2949 int rd, index, value;
2951 assert(mb_x>=0 && mb_y>=0);
2952 assert(mb_x<b_stride);
2955 block->color[0] = p[0];
2956 block->color[1] = p[1];
2957 block->color[2] = p[2];
2958 block->type |= BLOCK_INTRA;
2960 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2961 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2962 if(s->me_cache[index] == value)
2964 s->me_cache[index]= value;
2968 block->type &= ~BLOCK_INTRA;
2971 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2983 /* special case for int[2] args we discard afterwards,
2984 * fixes compilation problem with gcc 2.95 */
2985 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2986 int p[2] = {p0, p1};
2987 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2990 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2991 const int b_stride= s->b_width << s->block_max_depth;
2992 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2993 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2994 int rd, index, value;
2996 assert(mb_x>=0 && mb_y>=0);
2997 assert(mb_x<b_stride);
2998 assert(((mb_x|mb_y)&1) == 0);
3000 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3001 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3002 if(s->me_cache[index] == value)
3004 s->me_cache[index]= value;
3009 block->type &= ~BLOCK_INTRA;
3010 block[1]= block[b_stride]= block[b_stride+1]= *block;
3012 rd= get_4block_rd(s, mb_x, mb_y, 0);
3019 block[0]= backup[0];
3020 block[1]= backup[1];
3021 block[b_stride]= backup[2];
3022 block[b_stride+1]= backup[3];
3027 static void iterative_me(SnowContext *s){
3028 int pass, mb_x, mb_y;
3029 const int b_width = s->b_width << s->block_max_depth;
3030 const int b_height= s->b_height << s->block_max_depth;
3031 const int b_stride= b_width;
3035 RangeCoder r = s->c;
3036 uint8_t state[sizeof(s->block_state)];
3037 memcpy(state, s->block_state, sizeof(s->block_state));
3038 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3039 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3040 encode_q_branch(s, 0, mb_x, mb_y);
3042 memcpy(s->block_state, state, sizeof(s->block_state));
3045 for(pass=0; pass<25; pass++){
3048 for(mb_y= 0; mb_y<b_height; mb_y++){
3049 for(mb_x= 0; mb_x<b_width; mb_x++){
3050 int dia_change, i, j, ref;
3051 int best_rd= INT_MAX, ref_rd;
3052 BlockNode backup, ref_b;
3053 const int index= mb_x + mb_y * b_stride;
3054 BlockNode *block= &s->block[index];
3055 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3056 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3057 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3058 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3059 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3060 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3061 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3062 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3063 const int b_w= (MB_SIZE >> s->block_max_depth);
3064 uint8_t obmc_edged[b_w*2][b_w*2];
3066 if(pass && (block->type & BLOCK_OPT))
3068 block->type |= BLOCK_OPT;
3072 if(!s->me_cache_generation)
3073 memset(s->me_cache, 0, sizeof(s->me_cache));
3074 s->me_cache_generation += 1<<22;
3076 //FIXME precalculate
3079 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3081 for(y=0; y<b_w*2; y++)
3082 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3083 if(mb_x==b_stride-1)
3084 for(y=0; y<b_w*2; y++)
3085 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3087 for(x=0; x<b_w*2; x++)
3088 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3089 for(y=1; y<b_w; y++)
3090 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3092 if(mb_y==b_height-1){
3093 for(x=0; x<b_w*2; x++)
3094 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3095 for(y=b_w; y<b_w*2-1; y++)
3096 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3100 //skip stuff outside the picture
3101 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3102 uint8_t *src= s-> input_picture.data[0];
3103 uint8_t *dst= s->current_picture.data[0];
3104 const int stride= s->current_picture.linesize[0];
3105 const int block_w= MB_SIZE >> s->block_max_depth;
3106 const int sx= block_w*mb_x - block_w/2;
3107 const int sy= block_w*mb_y - block_w/2;
3108 const int w= s->plane[0].width;
3109 const int h= s->plane[0].height;
3113 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3114 for(y=h; y<sy+block_w*2; y++)
3115 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3117 for(y=sy; y<sy+block_w*2; y++)
3118 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3120 if(sx+block_w*2 > w){
3121 for(y=sy; y<sy+block_w*2; y++)
3122 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3126 // intra(black) = neighbors' contribution to the current block
3128 color[i]= get_dc(s, mb_x, mb_y, i);
3130 // get previous score (cannot be cached due to OBMC)
3131 if(pass > 0 && (block->type&BLOCK_INTRA)){
3132 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3133 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3135 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3139 for(ref=0; ref < s->ref_frames; ref++){
3140 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3141 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3146 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3147 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3149 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3151 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3153 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3155 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3158 //FIXME avoid subpel interpolation / round to nearest integer
3161 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3163 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3164 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3165 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3166 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3172 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3175 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3177 //FIXME or try the standard 2 pass qpel or similar
3179 mvr[0][0]= block->mx;
3180 mvr[0][1]= block->my;
3181 if(ref_rd > best_rd){
3189 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3190 //FIXME RD style color selection
3192 if(!same_block(block, &backup)){
3193 if(tb ) tb ->type &= ~BLOCK_OPT;
3194 if(lb ) lb ->type &= ~BLOCK_OPT;
3195 if(rb ) rb ->type &= ~BLOCK_OPT;
3196 if(bb ) bb ->type &= ~BLOCK_OPT;
3197 if(tlb) tlb->type &= ~BLOCK_OPT;
3198 if(trb) trb->type &= ~BLOCK_OPT;
3199 if(blb) blb->type &= ~BLOCK_OPT;
3200 if(brb) brb->type &= ~BLOCK_OPT;
3205 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3210 if(s->block_max_depth == 1){
3212 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3213 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3215 int best_rd, init_rd;
3216 const int index= mb_x + mb_y * b_stride;
3219 b[0]= &s->block[index];
3221 b[2]= b[0]+b_stride;
3223 if(same_block(b[0], b[1]) &&
3224 same_block(b[0], b[2]) &&
3225 same_block(b[0], b[3]))
3228 if(!s->me_cache_generation)
3229 memset(s->me_cache, 0, sizeof(s->me_cache));
3230 s->me_cache_generation += 1<<22;
3232 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3234 //FIXME more multiref search?
3235 check_4block_inter(s, mb_x, mb_y,
3236 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3237 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3240 if(!(b[i]->type&BLOCK_INTRA))
3241 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3243 if(init_rd != best_rd)
3247 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3251 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3252 const int w= b->width;
3253 const int h= b->height;
3254 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3255 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3256 int x,y, thres1, thres2;
3258 if(s->qlog == LOSSLESS_QLOG){
3261 dst[x + y*stride]= src[x + y*stride];
3265 bias= bias ? 0 : (3*qmul)>>3;
3266 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3272 int i= src[x + y*stride];
3274 if((unsigned)(i+thres1) > thres2){
3277 i/= qmul; //FIXME optimize
3278 dst[x + y*stride]= i;
3282 i/= qmul; //FIXME optimize
3283 dst[x + y*stride]= -i;
3286 dst[x + y*stride]= 0;
3292 int i= src[x + y*stride];
3294 if((unsigned)(i+thres1) > thres2){
3297 i= (i + bias) / qmul; //FIXME optimize
3298 dst[x + y*stride]= i;
3302 i= (i + bias) / qmul; //FIXME optimize
3303 dst[x + y*stride]= -i;
3306 dst[x + y*stride]= 0;
3312 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3313 const int w= b->width;
3314 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3315 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3316 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3319 if(s->qlog == LOSSLESS_QLOG) return;
3321 for(y=start_y; y<end_y; y++){
3322 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3323 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3327 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3329 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3335 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3336 const int w= b->width;
3337 const int h= b->height;
3338 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3339 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3340 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3343 if(s->qlog == LOSSLESS_QLOG) return;
3347 int i= src[x + y*stride];
3349 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3351 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3357 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3358 const int w= b->width;
3359 const int h= b->height;
3362 for(y=h-1; y>=0; y--){
3363 for(x=w-1; x>=0; x--){
3364 int i= x + y*stride;
3368 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3369 else src[i] -= src[i - 1];
3371 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3372 else src[i] -= src[i - 1];
3375 if(y) src[i] -= src[i - stride];
3381 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3382 const int w= b->width;
3385 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3389 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3391 for(y=start_y; y<end_y; y++){
3393 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3394 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3398 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3399 else line[x] += line[x - 1];
3401 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3402 else line[x] += line[x - 1];
3405 if(y) line[x] += prev[x];
3411 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3412 const int w= b->width;
3413 const int h= b->height;
3418 int i= x + y*stride;
3422 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3423 else src[i] += src[i - 1];
3425 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3426 else src[i] += src[i - 1];
3429 if(y) src[i] += src[i - stride];
3435 static void encode_qlogs(SnowContext *s){
3436 int plane_index, level, orientation;
3438 for(plane_index=0; plane_index<2; plane_index++){
3439 for(level=0; level<s->spatial_decomposition_count; level++){
3440 for(orientation=level ? 1:0; orientation<4; orientation++){
3441 if(orientation==2) continue;
3442 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3448 static void encode_header(SnowContext *s){
3452 memset(kstate, MID_STATE, sizeof(kstate));
3454 put_rac(&s->c, kstate, s->keyframe);
3455 if(s->keyframe || s->always_reset){
3457 s->last_spatial_decomposition_type=
3461 s->last_block_max_depth= 0;
3462 for(plane_index=0; plane_index<2; plane_index++){
3463 Plane *p= &s->plane[plane_index];
3466 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
3470 put_symbol(&s->c, s->header_state, s->version, 0);
3471 put_rac(&s->c, s->header_state, s->always_reset);
3472 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3473 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3474 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3475 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3476 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3477 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3478 put_rac(&s->c, s->header_state, s->spatial_scalability);
3479 // put_rac(&s->c, s->header_state, s->rate_scalability);
3480 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3487 for(plane_index=0; plane_index<2; plane_index++){
3488 Plane *p= &s->plane[plane_index];
3489 update_mc |= p->last_htaps != p->htaps;
3490 update_mc |= p->last_diag_mc != p->diag_mc;
3491 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3493 put_rac(&s->c, s->header_state, update_mc);
3495 for(plane_index=0; plane_index<2; plane_index++){
3496 Plane *p= &s->plane[plane_index];
3497 put_rac(&s->c, s->header_state, p->diag_mc);
3498 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
3499 for(i= p->htaps/2; i; i--)
3500 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
3503 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
3504 put_rac(&s->c, s->header_state, 1);
3505 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3508 put_rac(&s->c, s->header_state, 0);
3511 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3512 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3513 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3514 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3515 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3519 static void update_last_header_values(SnowContext *s){
3523 for(plane_index=0; plane_index<2; plane_index++){
3524 Plane *p= &s->plane[plane_index];
3525 p->last_diag_mc= p->diag_mc;
3526 p->last_htaps = p->htaps;
3527 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
3531 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
3532 s->last_qlog = s->qlog;
3533 s->last_qbias = s->qbias;
3534 s->last_mv_scale = s->mv_scale;
3535 s->last_block_max_depth = s->block_max_depth;
3536 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
3539 static void decode_qlogs(SnowContext *s){
3540 int plane_index, level, orientation;
3542 for(plane_index=0; plane_index<3; plane_index++){
3543 for(level=0; level<s->spatial_decomposition_count; level++){
3544 for(orientation=level ? 1:0; orientation<4; orientation++){
3546 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3547 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3548 else q= get_symbol(&s->c, s->header_state, 1);
3549 s->plane[plane_index].band[level][orientation].qlog= q;
3555 #define GET_S(dst, check) \
3556 tmp= get_symbol(&s->c, s->header_state, 0);\
3558 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
3563 static int decode_header(SnowContext *s){
3564 int plane_index, tmp;
3567 memset(kstate, MID_STATE, sizeof(kstate));
3569 s->keyframe= get_rac(&s->c, kstate);
3570 if(s->keyframe || s->always_reset){
3572 s->spatial_decomposition_type=
3576 s->block_max_depth= 0;
3579 GET_S(s->version, tmp <= 0U)
3580 s->always_reset= get_rac(&s->c, s->header_state);
3581 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3582 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3583 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3584 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3585 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3586 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3587 s->spatial_scalability= get_rac(&s->c, s->header_state);
3588 // s->rate_scalability= get_rac(&s->c, s->header_state);
3589 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
3590 s->max_ref_frames++;
3596 if(get_rac(&s->c, s->header_state)){
3597 for(plane_index=0; plane_index<2; plane_index++){
3598 int htaps, i, sum=0;
3599 Plane *p= &s->plane[plane_index];
3600 p->diag_mc= get_rac(&s->c, s->header_state);
3601 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
3602 if((unsigned)htaps > HTAPS_MAX || htaps==0)
3605 for(i= htaps/2; i; i--){
3606 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
3607 sum += p->hcoeff[i];
3609 p->hcoeff[0]= 32-sum;
3611 s->plane[2].diag_mc= s->plane[1].diag_mc;
3612 s->plane[2].htaps = s->plane[1].htaps;
3613 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
3615 if(get_rac(&s->c, s->header_state)){
3616 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
3621 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3622 if(s->spatial_decomposition_type > 1U){
3623 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3626 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
3627 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
3628 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
3632 s->qlog += get_symbol(&s->c, s->header_state, 1);
3633 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3634 s->qbias += get_symbol(&s->c, s->header_state, 1);
3635 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3636 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3637 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3638 s->block_max_depth= 0;
3645 static void init_qexp(void){
3649 for(i=0; i<QROOT; i++){
3651 v *= pow(2, 1.0 / QROOT);
3655 static av_cold int common_init(AVCodecContext *avctx){
3656 SnowContext *s = avctx->priv_data;
3661 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
3663 dsputil_init(&s->dsp, avctx);
3666 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3667 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3668 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3669 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3670 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3671 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3690 #define mcfh(dx,dy)\
3691 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3692 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3693 mc_block_hpel ## dx ## dy ## 16;\
3694 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3695 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3696 mc_block_hpel ## dx ## dy ## 8;
3706 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3708 width= s->avctx->width;
3709 height= s->avctx->height;
3711 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3712 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
3714 for(i=0; i<MAX_REF_FRAMES; i++)
3715 for(j=0; j<MAX_REF_FRAMES; j++)
3716 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3718 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3719 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
3724 static int common_init_after_header(AVCodecContext *avctx){
3725 SnowContext *s = avctx->priv_data;
3726 int plane_index, level, orientation;
3728 for(plane_index=0; plane_index<3; plane_index++){
3729 int w= s->avctx->width;
3730 int h= s->avctx->height;
3733 w>>= s->chroma_h_shift;
3734 h>>= s->chroma_v_shift;
3736 s->plane[plane_index].width = w;
3737 s->plane[plane_index].height= h;
3739 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3740 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3741 SubBand *b= &s->plane[plane_index].band[level][orientation];
3743 b->buf= s->spatial_dwt_buffer;
3745 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3746 b->width = (w + !(orientation&1))>>1;
3747 b->height= (h + !(orientation>1))>>1;
3749 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3750 b->buf_x_offset = 0;
3751 b->buf_y_offset = 0;
3755 b->buf_x_offset = (w+1)>>1;
3758 b->buf += b->stride>>1;
3759 b->buf_y_offset = b->stride_line >> 1;
3761 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3764 b->parent= &s->plane[plane_index].band[level-1][orientation];
3765 //FIXME avoid this realloc
3766 av_freep(&b->x_coeff);
3767 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3777 static int qscale2qlog(int qscale){
3778 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3779 + 61*QROOT/8; //<64 >60
3782 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3784 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
3785 * FIXME we know exact mv bits at this point,
3786 * but ratecontrol isn't set up to include them. */
3787 uint32_t coef_sum= 0;
3788 int level, orientation, delta_qlog;
3790 for(level=0; level<s->spatial_decomposition_count; level++){
3791 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3792 SubBand *b= &s->plane[0].band[level][orientation];
3793 IDWTELEM *buf= b->ibuf;
3794 const int w= b->width;
3795 const int h= b->height;
3796 const int stride= b->stride;
3797 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3798 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3799 const int qdiv= (1<<16)/qmul;
3801 //FIXME this is ugly
3804 buf[x+y*stride]= b->buf[x+y*stride];
3806 decorrelate(s, b, buf, stride, 1, 0);
3809 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3813 /* ugly, ratecontrol just takes a sqrt again */
3814 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3815 assert(coef_sum < INT_MAX);
3817 if(pict->pict_type == FF_I_TYPE){
3818 s->m.current_picture.mb_var_sum= coef_sum;
3819 s->m.current_picture.mc_mb_var_sum= 0;
3821 s->m.current_picture.mc_mb_var_sum= coef_sum;
3822 s->m.current_picture.mb_var_sum= 0;
3825 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3826 if (pict->quality < 0)
3828 s->lambda= pict->quality * 3/2;
3829 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3830 s->qlog+= delta_qlog;
3834 static void calculate_visual_weight(SnowContext *s, Plane *p){
3835 int width = p->width;
3836 int height= p->height;
3837 int level, orientation, x, y;
3839 for(level=0; level<s->spatial_decomposition_count; level++){
3840 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3841 SubBand *b= &p->band[level][orientation];
3842 IDWTELEM *ibuf= b->ibuf;
3845 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3846 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3847 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3848 for(y=0; y<height; y++){
3849 for(x=0; x<width; x++){
3850 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3855 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3865 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
3866 SubBand *b= &p->band[level][orientation];
3870 int step= 1 << (s->spatial_decomposition_count - level);
3877 //FIXME bias for nonzero ?
3879 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
3880 for(y=0; y<p->height; y++){
3881 for(x=0; x<p->width; x++){
3882 int sx= (x-xo + step/2) / step / Q2_STEP;
3883 int sy= (y-yo + step/2) / step / Q2_STEP;
3884 int v= r0[x + y*p->width] - r1[x + y*p->width];
3885 assert(sx>=0 && sy>=0 && sx < score_stride);
3887 score[sx + sy*score_stride] += v*v;
3888 assert(score[sx + sy*score_stride] >= 0);
3893 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
3894 int level, orientation;
3896 for(level=0; level<s->spatial_decomposition_count; level++){
3897 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3898 SubBand *b= &p->band[level][orientation];
3899 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
3901 dequantize(s, b, dst, b->stride);
3906 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
3907 int level, orientation, ys, xs, x, y, pass;
3908 IDWTELEM best_dequant[height * stride];
3909 IDWTELEM idwt2_buffer[height * stride];
3910 const int score_stride= (width + 10)/Q2_STEP;
3911 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3912 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
3913 int threshold= (s->m.lambda * s->m.lambda) >> 6;
3915 //FIXME pass the copy cleanly ?
3917 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
3918 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
3920 for(level=0; level<s->spatial_decomposition_count; level++){
3921 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3922 SubBand *b= &p->band[level][orientation];
3923 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3924 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
3925 assert(src == b->buf); // code does not depend on this but it is true currently
3927 quantize(s, b, dst, src, b->stride, s->qbias);
3930 for(pass=0; pass<1; pass++){
3931 if(s->qbias == 0) //keyframe
3933 for(level=0; level<s->spatial_decomposition_count; level++){
3934 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3935 SubBand *b= &p->band[level][orientation];
3936 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
3937 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
3939 for(ys= 0; ys<Q2_STEP; ys++){
3940 for(xs= 0; xs<Q2_STEP; xs++){
3941 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3942 dequantize_all(s, p, idwt2_buffer, width, height);
3943 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3944 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3945 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
3946 for(y=ys; y<b->height; y+= Q2_STEP){
3947 for(x=xs; x<b->width; x+= Q2_STEP){
3948 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
3949 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
3950 //FIXME try more than just --
3953 dequantize_all(s, p, idwt2_buffer, width, height);
3954 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
3955 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
3956 for(y=ys; y<b->height; y+= Q2_STEP){
3957 for(x=xs; x<b->width; x+= Q2_STEP){
3958 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
3959 if(score[score_idx] <= best_score[score_idx] + threshold){
3960 best_score[score_idx]= score[score_idx];
3961 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
3962 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
3963 //FIXME copy instead
3972 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
3975 #endif /* QUANTIZE2==1 */
3977 static av_cold int encode_init(AVCodecContext *avctx)
3979 SnowContext *s = avctx->priv_data;
3982 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3983 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3984 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
3988 if(avctx->prediction_method == DWT_97
3989 && (avctx->flags & CODEC_FLAG_QSCALE)
3990 && avctx->global_quality == 0){
3991 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
3995 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3997 s->chroma_h_shift= 1; //FIXME XXX
3998 s->chroma_v_shift= 1;
4000 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
4001 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
4003 for(plane_index=0; plane_index<3; plane_index++){
4004 s->plane[plane_index].diag_mc= 1;
4005 s->plane[plane_index].htaps= 6;
4006 s->plane[plane_index].hcoeff[0]= 40;
4007 s->plane[plane_index].hcoeff[1]= -10;
4008 s->plane[plane_index].hcoeff[2]= 2;
4009 s->plane[plane_index].fast_mc= 1;
4018 s->m.flags = avctx->flags;
4019 s->m.bit_rate= avctx->bit_rate;
4022 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4023 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4024 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4025 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4026 h263_encode_init(&s->m); //mv_penalty
4028 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4030 if(avctx->flags&CODEC_FLAG_PASS1){
4031 if(!avctx->stats_out)
4032 avctx->stats_out = av_mallocz(256);
4034 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4035 if(ff_rate_control_init(&s->m) < 0)
4038 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4040 avctx->coded_frame= &s->current_picture;
4041 switch(avctx->pix_fmt){
4042 // case PIX_FMT_YUV444P:
4043 // case PIX_FMT_YUV422P:
4044 case PIX_FMT_YUV420P:
4046 // case PIX_FMT_YUV411P:
4047 // case PIX_FMT_YUV410P:
4048 s->colorspace_type= 0;
4050 /* case PIX_FMT_RGB32:
4054 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
4057 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4058 s->chroma_h_shift= 1;
4059 s->chroma_v_shift= 1;
4061 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4062 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4064 s->avctx->get_buffer(s->avctx, &s->input_picture);
4066 if(s->avctx->me_method == ME_ITER){
4068 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4069 for(i=0; i<s->max_ref_frames; i++){
4070 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4071 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4078 #define USE_HALFPEL_PLANE 0
4080 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
4083 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
4087 int w= s->avctx->width >>is_chroma;
4088 int h= s->avctx->height >>is_chroma;
4089 int ls= frame->linesize[p];
4090 uint8_t *src= frame->data[p];
4092 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4093 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4094 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
4101 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
4108 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4116 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
4124 static void release_buffer(AVCodecContext *avctx){
4125 SnowContext *s = avctx->priv_data;
4128 if(s->last_picture[s->max_ref_frames-1].data[0]){
4129 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4131 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4132 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4136 static int frame_start(SnowContext *s){
4138 int w= s->avctx->width; //FIXME round up to x16 ?
4139 int h= s->avctx->height;
4141 if(s->current_picture.data[0]){
4142 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4143 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4144 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4147 release_buffer(s->avctx);
4149 tmp= s->last_picture[s->max_ref_frames-1];
4150 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4151 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
4152 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
4153 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
4154 s->last_picture[0]= s->current_picture;
4155 s->current_picture= tmp;
4161 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4162 if(i && s->last_picture[i-1].key_frame)
4165 if(s->ref_frames==0){
4166 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
4171 s->current_picture.reference= 1;
4172 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4173 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4177 s->current_picture.key_frame= s->keyframe;
4182 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4183 SnowContext *s = avctx->priv_data;
4184 RangeCoder * const c= &s->c;
4185 AVFrame *pict = data;
4186 const int width= s->avctx->width;
4187 const int height= s->avctx->height;
4188 int level, orientation, plane_index, i, y;
4189 uint8_t rc_header_bak[sizeof(s->header_state)];
4190 uint8_t rc_block_bak[sizeof(s->block_state)];
4192 ff_init_range_encoder(c, buf, buf_size);
4193 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4197 for(y=0; y<(height>>shift); y++)
4198 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4199 &pict->data[i][y * pict->linesize[i]],
4202 s->new_picture = *pict;
4204 s->m.picture_number= avctx->frame_number;
4205 if(avctx->flags&CODEC_FLAG_PASS2){
4207 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4208 s->keyframe= pict->pict_type==FF_I_TYPE;
4209 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4210 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4211 if (pict->quality < 0)
4215 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4217 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4220 if(s->pass1_rc && avctx->frame_number == 0)
4221 pict->quality= 2*FF_QP2LAMBDA;
4223 s->qlog= qscale2qlog(pict->quality);
4224 s->lambda = pict->quality * 3/2;
4226 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4227 s->qlog= LOSSLESS_QLOG;
4229 }//else keep previous frame's qlog until after motion estimation
4233 s->m.current_picture_ptr= &s->m.current_picture;
4234 if(pict->pict_type == FF_P_TYPE){
4235 int block_width = (width +15)>>4;
4236 int block_height= (height+15)>>4;
4237 int stride= s->current_picture.linesize[0];
4239 assert(s->current_picture.data[0]);
4240 assert(s->last_picture[0].data[0]);
4242 s->m.avctx= s->avctx;
4243 s->m.current_picture.data[0]= s->current_picture.data[0];
4244 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4245 s->m. new_picture.data[0]= s-> input_picture.data[0];
4246 s->m. last_picture_ptr= &s->m. last_picture;
4248 s->m. last_picture.linesize[0]=
4249 s->m. new_picture.linesize[0]=
4250 s->m.current_picture.linesize[0]= stride;
4251 s->m.uvlinesize= s->current_picture.linesize[1];
4253 s->m.height= height;
4254 s->m.mb_width = block_width;
4255 s->m.mb_height= block_height;
4256 s->m.mb_stride= s->m.mb_width+1;
4257 s->m.b8_stride= 2*s->m.mb_width+1;
4259 s->m.pict_type= pict->pict_type;
4260 s->m.me_method= s->avctx->me_method;
4261 s->m.me.scene_change_score=0;
4262 s->m.flags= s->avctx->flags;
4263 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4264 s->m.out_format= FMT_H263;
4265 s->m.unrestricted_mv= 1;
4267 s->m.lambda = s->lambda;
4268 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4269 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4271 s->m.dsp= s->dsp; //move
4277 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4278 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4283 if(pict->pict_type == FF_I_TYPE)
4284 s->spatial_decomposition_count= 5;
4286 s->spatial_decomposition_count= 5;
4288 s->m.pict_type = pict->pict_type;
4289 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4291 common_init_after_header(avctx);
4293 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4294 for(plane_index=0; plane_index<3; plane_index++){
4295 calculate_visual_weight(s, &s->plane[plane_index]);
4300 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4301 encode_blocks(s, 1);
4302 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4304 for(plane_index=0; plane_index<3; plane_index++){
4305 Plane *p= &s->plane[plane_index];
4309 // int bits= put_bits_count(&s->c.pb);
4311 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4313 if(pict->data[plane_index]) //FIXME gray hack
4316 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4319 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4322 && pict->pict_type == FF_P_TYPE
4323 && !(avctx->flags&CODEC_FLAG_PASS2)
4324 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4325 ff_init_range_encoder(c, buf, buf_size);
4326 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4327 pict->pict_type= FF_I_TYPE;
4329 s->current_picture.key_frame=1;
4333 if(s->qlog == LOSSLESS_QLOG){
4336 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4342 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4348 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4350 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4352 if(s->pass1_rc && plane_index==0){
4353 int delta_qlog = ratecontrol_1pass(s, pict);
4354 if (delta_qlog <= INT_MIN)
4357 //reordering qlog in the bitstream would eliminate this reset
4358 ff_init_range_encoder(c, buf, buf_size);
4359 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4360 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4362 encode_blocks(s, 0);
4366 for(level=0; level<s->spatial_decomposition_count; level++){
4367 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4368 SubBand *b= &p->band[level][orientation];
4371 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4373 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4374 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4375 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4377 correlate(s, b, b->ibuf, b->stride, 1, 0);
4381 for(level=0; level<s->spatial_decomposition_count; level++){
4382 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4383 SubBand *b= &p->band[level][orientation];
4385 dequantize(s, b, b->ibuf, b->stride);
4389 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4390 if(s->qlog == LOSSLESS_QLOG){
4393 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4397 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4400 if(pict->pict_type == FF_I_TYPE){
4403 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4404 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4408 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4409 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4412 if(s->avctx->flags&CODEC_FLAG_PSNR){
4415 if(pict->data[plane_index]) //FIXME gray hack
4418 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4422 s->avctx->error[plane_index] += error;
4423 s->current_picture.error[plane_index] = error;
4428 update_last_header_values(s);
4430 release_buffer(avctx);
4432 s->current_picture.coded_picture_number = avctx->frame_number;
4433 s->current_picture.pict_type = pict->pict_type;
4434 s->current_picture.quality = pict->quality;
4435 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4436 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4437 s->m.current_picture.display_picture_number =
4438 s->m.current_picture.coded_picture_number = avctx->frame_number;
4439 s->m.current_picture.quality = pict->quality;
4440 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4442 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4444 if(avctx->flags&CODEC_FLAG_PASS1)
4445 ff_write_pass1_stats(&s->m);
4446 s->m.last_pict_type = s->m.pict_type;
4447 avctx->frame_bits = s->m.frame_bits;
4448 avctx->mv_bits = s->m.mv_bits;
4449 avctx->misc_bits = s->m.misc_bits;
4450 avctx->p_tex_bits = s->m.p_tex_bits;
4454 return ff_rac_terminate(c);
4457 static av_cold void common_end(SnowContext *s){
4458 int plane_index, level, orientation, i;
4460 av_freep(&s->spatial_dwt_buffer);
4461 av_freep(&s->spatial_idwt_buffer);
4464 av_freep(&s->m.me.scratchpad);
4465 av_freep(&s->m.me.map);
4466 av_freep(&s->m.me.score_map);
4467 av_freep(&s->m.obmc_scratchpad);
4469 av_freep(&s->block);
4470 av_freep(&s->scratchbuf);
4472 for(i=0; i<MAX_REF_FRAMES; i++){
4473 av_freep(&s->ref_mvs[i]);
4474 av_freep(&s->ref_scores[i]);
4475 if(s->last_picture[i].data[0])
4476 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4479 for(plane_index=0; plane_index<3; plane_index++){
4480 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4481 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4482 SubBand *b= &s->plane[plane_index].band[level][orientation];
4484 av_freep(&b->x_coeff);
4490 static av_cold int encode_end(AVCodecContext *avctx)
4492 SnowContext *s = avctx->priv_data;
4495 av_free(avctx->stats_out);
4500 static av_cold int decode_init(AVCodecContext *avctx)
4502 avctx->pix_fmt= PIX_FMT_YUV420P;
4509 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
4510 const uint8_t *buf = avpkt->data;
4511 int buf_size = avpkt->size;
4512 SnowContext *s = avctx->priv_data;
4513 RangeCoder * const c= &s->c;
4515 AVFrame *picture = data;
4516 int level, orientation, plane_index;
4518 ff_init_range_decoder(c, buf, buf_size);
4519 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4521 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4522 if(decode_header(s)<0)
4524 common_init_after_header(avctx);
4526 // realloc slice buffer for the case that spatial_decomposition_count changed
4527 slice_buffer_destroy(&s->sb);
4528 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
4530 for(plane_index=0; plane_index<3; plane_index++){
4531 Plane *p= &s->plane[plane_index];
4532 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
4533 && p->hcoeff[1]==-10
4539 if(frame_start(s) < 0)
4541 //keyframe flag duplication mess FIXME
4542 if(avctx->debug&FF_DEBUG_PICT_INFO)
4543 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4547 for(plane_index=0; plane_index<3; plane_index++){
4548 Plane *p= &s->plane[plane_index];
4552 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4554 if(s->avctx->debug&2048){
4555 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4556 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4560 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4561 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4567 for(level=0; level<s->spatial_decomposition_count; level++){
4568 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4569 SubBand *b= &p->band[level][orientation];
4570 unpack_coeffs(s, b, b->parent, orientation);
4576 const int mb_h= s->b_height << s->block_max_depth;
4577 const int block_size = MB_SIZE >> s->block_max_depth;
4578 const int block_w = plane_index ? block_size/2 : block_size;
4580 DWTCompose cs[MAX_DECOMPOSITIONS];
4585 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4586 for(mb_y=0; mb_y<=mb_h; mb_y++){
4588 int slice_starty = block_w*mb_y;
4589 int slice_h = block_w*(mb_y+1);
4590 if (!(s->keyframe || s->avctx->debug&512)){
4591 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4592 slice_h -= (block_w >> 1);
4595 for(level=0; level<s->spatial_decomposition_count; level++){
4596 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4597 SubBand *b= &p->band[level][orientation];
4600 int our_mb_start = mb_y;
4601 int our_mb_end = (mb_y + 1);
4603 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4604 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4605 if (!(s->keyframe || s->avctx->debug&512)){
4606 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4607 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4609 start_y = FFMIN(b->height, start_y);
4610 end_y = FFMIN(b->height, end_y);
4612 if (start_y != end_y){
4613 if (orientation == 0){
4614 SubBand * correlate_band = &p->band[0][0];
4615 int correlate_end_y = FFMIN(b->height, end_y + 1);
4616 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4617 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4618 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4619 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4622 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4627 for(; yd<slice_h; yd+=4){
4628 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4631 if(s->qlog == LOSSLESS_QLOG){
4632 for(; yq<slice_h && yq<h; yq++){
4633 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4635 line[x] <<= FRAC_BITS;
4640 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4642 y = FFMIN(p->height, slice_starty);
4643 end_y = FFMIN(p->height, slice_h);
4645 slice_buffer_release(&s->sb, y++);
4648 slice_buffer_flush(&s->sb);
4655 release_buffer(avctx);
4657 if(!(s->avctx->debug&2048))
4658 *picture= s->current_picture;
4660 *picture= s->mconly_picture;
4662 *data_size = sizeof(AVFrame);
4664 bytes_read= c->bytestream - c->bytestream_start;
4665 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4670 static av_cold int decode_end(AVCodecContext *avctx)
4672 SnowContext *s = avctx->priv_data;
4674 slice_buffer_destroy(&s->sb);
4681 AVCodec snow_decoder = {
4685 sizeof(SnowContext),
4690 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4692 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4695 #if CONFIG_SNOW_ENCODER
4696 AVCodec snow_encoder = {
4700 sizeof(SnowContext),
4704 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4714 #include "libavutil/lfg.h"
4719 int buffer[2][width*height];
4723 s.spatial_decomposition_count=6;
4724 s.spatial_decomposition_type=1;
4726 av_lfg_init(&prng, 1);
4728 printf("testing 5/3 DWT\n");
4729 for(i=0; i<width*height; i++)
4730 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4732 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4733 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4735 for(i=0; i<width*height; i++)
4736 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4738 printf("testing 9/7 DWT\n");
4739 s.spatial_decomposition_type=0;
4740 for(i=0; i<width*height; i++)
4741 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4743 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4746 for(i=0; i<width*height; i++)
4747 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4750 printf("testing AC coder\n");
4751 memset(s.header_state, 0, sizeof(s.header_state));
4752 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4753 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4755 for(i=-256; i<256; i++){
4756 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4758 ff_rac_terminate(&s.c);
4760 memset(s.header_state, 0, sizeof(s.header_state));
4761 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4762 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4764 for(i=-256; i<256; i++){
4766 j= get_symbol(&s.c, s.header_state, 1);
4767 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4771 int level, orientation, x, y;
4772 int64_t errors[8][4];
4775 memset(errors, 0, sizeof(errors));
4776 s.spatial_decomposition_count=3;
4777 s.spatial_decomposition_type=0;
4778 for(level=0; level<s.spatial_decomposition_count; level++){
4779 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4780 int w= width >> (s.spatial_decomposition_count-level);
4781 int h= height >> (s.spatial_decomposition_count-level);
4782 int stride= width << (s.spatial_decomposition_count-level);
4783 DWTELEM *buf= buffer[0];
4786 if(orientation&1) buf+=w;
4787 if(orientation>1) buf+=stride>>1;
4789 memset(buffer[0], 0, sizeof(int)*width*height);
4790 buf[w/2 + h/2*stride]= 256*256;
4791 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4792 for(y=0; y<height; y++){
4793 for(x=0; x<width; x++){
4794 int64_t d= buffer[0][x + y*width];
4796 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4798 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4800 error= (int)(sqrt(error)+0.5);
4801 errors[level][orientation]= error;
4802 if(g) g=av_gcd(g, error);
4806 printf("static int const visual_weight[][4]={\n");
4807 for(level=0; level<s.spatial_decomposition_count; level++){
4809 for(orientation=0; orientation<4; orientation++){
4810 printf("%8"PRId64",", errors[level][orientation]/g);
4817 int w= width >> (s.spatial_decomposition_count-level);
4818 //int h= height >> (s.spatial_decomposition_count-level);
4819 int stride= width << (s.spatial_decomposition_count-level);
4820 DWTELEM *buf= buffer[0];
4826 memset(buffer[0], 0, sizeof(int)*width*height);
4828 for(y=0; y<height; y++){
4829 for(x=0; x<width; x++){
4830 int tab[4]={0,2,3,1};
4831 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4834 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4838 buf[x + y*stride ]=169;
4839 buf[x + y*stride-w]=64;
4842 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4844 for(y=0; y<height; y++){
4845 for(x=0; x<width; x++){
4846 int64_t d= buffer[0][x + y*width];
4848 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4850 if(FFABS(height/2-y)<9) printf("\n");