2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
400 typedef struct x_and_coeff{
405 typedef struct SubBand{
410 int qlog; ///< log(qscale)/log[2^(1/6)]
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
421 typedef struct Plane{
424 SubBand band[MAX_DECOMPOSITIONS][4];
427 int8_t hcoeff[HTAPS_MAX/2];
432 int8_t last_hcoeff[HTAPS_MAX/2];
436 typedef struct SnowContext{
437 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
439 AVCodecContext *avctx;
443 AVFrame input_picture; ///< new_picture with the internal linesizes
444 AVFrame current_picture;
445 AVFrame last_picture[MAX_REF_FRAMES];
446 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
447 AVFrame mconly_picture;
448 // uint8_t q_context[16];
449 uint8_t header_state[32];
450 uint8_t block_state[128 + 32*128];
454 int spatial_decomposition_type;
455 int last_spatial_decomposition_type;
456 int temporal_decomposition_type;
457 int spatial_decomposition_count;
458 int last_spatial_decomposition_count;
459 int temporal_decomposition_count;
462 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
463 uint32_t *ref_scores[MAX_REF_FRAMES];
464 DWTELEM *spatial_dwt_buffer;
465 IDWTELEM *spatial_idwt_buffer;
469 int spatial_scalability;
479 #define QBIAS_SHIFT 3
483 int last_block_max_depth;
484 Plane plane[MAX_PLANES];
486 #define ME_CACHE_SIZE 1024
487 int me_cache[ME_CACHE_SIZE];
488 int me_cache_generation;
491 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
504 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
505 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
507 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
511 buf->base_buffer = base_buffer;
512 buf->line_count = line_count;
513 buf->line_width = line_width;
514 buf->data_count = max_allocated_lines;
515 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
516 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
518 for(i = 0; i < max_allocated_lines; i++){
519 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
522 buf->data_stack_top = max_allocated_lines - 1;
525 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
529 assert(buf->data_stack_top >= 0);
530 // assert(!buf->line[line]);
532 return buf->line[line];
534 buffer = buf->data_stack[buf->data_stack_top];
535 buf->data_stack_top--;
536 buf->line[line] = buffer;
541 static void slice_buffer_release(slice_buffer * buf, int line)
545 assert(line >= 0 && line < buf->line_count);
546 assert(buf->line[line]);
548 buffer = buf->line[line];
549 buf->data_stack_top++;
550 buf->data_stack[buf->data_stack_top] = buffer;
551 buf->line[line] = NULL;
554 static void slice_buffer_flush(slice_buffer * buf)
557 for(i = 0; i < buf->line_count; i++){
559 slice_buffer_release(buf, i);
563 static void slice_buffer_destroy(slice_buffer * buf)
566 slice_buffer_flush(buf);
568 for(i = buf->data_count - 1; i >= 0; i--){
569 av_freep(&buf->data_stack[i]);
571 av_freep(&buf->data_stack);
572 av_freep(&buf->line);
576 // Avoid a name clash on SGI IRIX
579 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
580 static uint8_t qexp[QROOT];
582 static inline int mirror(int v, int m){
583 while((unsigned)v > (unsigned)m){
590 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
594 const int a= FFABS(v);
595 const int e= av_log2(a);
597 const int el= FFMIN(e, 10);
598 put_rac(c, state+0, 0);
601 put_rac(c, state+1+i, 1); //1..10
604 put_rac(c, state+1+9, 1); //1..10
606 put_rac(c, state+1+FFMIN(i,9), 0);
608 for(i=e-1; i>=el; i--){
609 put_rac(c, state+22+9, (a>>i)&1); //22..31
612 put_rac(c, state+22+i, (a>>i)&1); //22..31
616 put_rac(c, state+11 + el, v < 0); //11..21
619 put_rac(c, state+0, 0);
622 put_rac(c, state+1+i, 1); //1..10
624 put_rac(c, state+1+i, 0);
626 for(i=e-1; i>=0; i--){
627 put_rac(c, state+22+i, (a>>i)&1); //22..31
631 put_rac(c, state+11 + e, v < 0); //11..21
634 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
636 put_rac(c, state+1+9, 0);
638 for(i=e-1; i>=0; i--){
639 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
643 put_rac(c, state+11 + 10, v < 0); //11..21
647 put_rac(c, state+0, 1);
651 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
652 if(get_rac(c, state+0))
657 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
662 for(i=e-1; i>=0; i--){
663 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
666 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
671 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
673 int r= log2>=0 ? 1<<log2 : 1;
679 put_rac(c, state+4+log2, 1);
684 put_rac(c, state+4+log2, 0);
686 for(i=log2-1; i>=0; i--){
687 put_rac(c, state+31-i, (v>>i)&1);
691 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
693 int r= log2>=0 ? 1<<log2 : 1;
698 while(get_rac(c, state+4+log2)){
704 for(i=log2-1; i>=0; i--){
705 v+= get_rac(c, state+31-i)<<i;
711 static av_always_inline void
712 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
713 int dst_step, int src_step, int ref_step,
714 int width, int mul, int add, int shift,
715 int highpass, int inverse){
716 const int mirror_left= !highpass;
717 const int mirror_right= (width&1) ^ highpass;
718 const int w= (width>>1) - 1 + (highpass & width);
721 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
723 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
730 LIFT(src[i*src_step],
731 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
737 LIFT(src[w*src_step],
738 ((mul*2*ref[w*ref_step]+add)>>shift),
743 static av_always_inline void
744 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
745 int dst_step, int src_step, int ref_step,
746 int width, int mul, int add, int shift,
747 int highpass, int inverse){
748 const int mirror_left= !highpass;
749 const int mirror_right= (width&1) ^ highpass;
750 const int w= (width>>1) - 1 + (highpass & width);
753 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
755 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
762 LIFT(src[i*src_step],
763 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
769 LIFT(src[w*src_step],
770 ((mul*2*ref[w*ref_step]+add)>>shift),
776 static av_always_inline void
777 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
778 int dst_step, int src_step, int ref_step,
779 int width, int mul, int add, int shift,
780 int highpass, int inverse){
781 const int mirror_left= !highpass;
782 const int mirror_right= (width&1) ^ highpass;
783 const int w= (width>>1) - 1 + (highpass & width);
787 #define LIFTS(src, ref, inv) \
789 (src) + (((ref) + 4*(src))>>shift): \
790 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
792 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
799 LIFTS(src[i*src_step],
800 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
806 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
809 static av_always_inline void
810 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
811 int dst_step, int src_step, int ref_step,
812 int width, int mul, int add, int shift,
813 int highpass, int inverse){
814 const int mirror_left= !highpass;
815 const int mirror_right= (width&1) ^ highpass;
816 const int w= (width>>1) - 1 + (highpass & width);
820 #define LIFTS(src, ref, inv) \
822 (src) + (((ref) + 4*(src))>>shift): \
823 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
825 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
832 LIFTS(src[i*src_step],
833 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
839 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
844 static void horizontal_decompose53i(DWTELEM *b, int width){
846 const int width2= width>>1;
848 const int w2= (width+1)>>1;
850 for(x=0; x<width2; x++){
852 temp[x+w2]= b[2*x + 1];
866 for(x=1; x+1<width2; x+=2){
870 A2 += (A1 + A3 + 2)>>2;
874 A1= temp[x+1+width2];
877 A4 += (A1 + A3 + 2)>>2;
883 A2 += (A1 + A3 + 2)>>2;
888 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
889 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
893 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
896 for(i=0; i<width; i++){
897 b1[i] -= (b0[i] + b2[i])>>1;
901 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
904 for(i=0; i<width; i++){
905 b1[i] += (b0[i] + b2[i] + 2)>>2;
909 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
911 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
912 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
914 for(y=-2; y<height; y+=2){
915 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
916 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
918 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
919 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
921 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
922 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
929 static void horizontal_decompose97i(DWTELEM *b, int width){
931 const int w2= (width+1)>>1;
933 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
934 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
935 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
936 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
940 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
943 for(i=0; i<width; i++){
944 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
948 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
951 for(i=0; i<width; i++){
952 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
956 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
959 for(i=0; i<width; i++){
961 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
963 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
968 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
971 for(i=0; i<width; i++){
972 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
976 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
978 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
979 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
980 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
981 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
983 for(y=-4; y<height; y+=2){
984 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
985 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
987 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
988 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
990 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
991 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
992 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
993 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1002 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1005 for(level=0; level<decomposition_count; level++){
1007 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1008 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1013 static void horizontal_compose53i(IDWTELEM *b, int width){
1014 IDWTELEM temp[width];
1015 const int width2= width>>1;
1016 const int w2= (width+1)>>1;
1028 for(x=1; x+1<width2; x+=2){
1032 A2 += (A1 + A3 + 2)>>2;
1036 A1= temp[x+1+width2];
1039 A4 += (A1 + A3 + 2)>>2;
1045 A2 += (A1 + A3 + 2)>>2;
1049 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1050 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1052 for(x=0; x<width2; x++){
1054 b[2*x + 1]= temp[x+w2];
1060 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1063 for(i=0; i<width; i++){
1064 b1[i] += (b0[i] + b2[i])>>1;
1068 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1071 for(i=0; i<width; i++){
1072 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1076 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1077 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1078 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1082 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1083 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1084 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1088 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1091 IDWTELEM *b0= cs->b0;
1092 IDWTELEM *b1= cs->b1;
1093 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1094 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1096 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1097 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1099 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1100 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1107 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1109 IDWTELEM *b0= cs->b0;
1110 IDWTELEM *b1= cs->b1;
1111 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1112 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1114 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1115 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1117 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1118 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1125 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1127 spatial_compose53i_init(&cs, buffer, height, stride);
1128 while(cs.y <= height)
1129 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1133 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1134 IDWTELEM temp[width];
1135 const int w2= (width+1)>>1;
1137 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1138 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1139 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1140 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1143 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1146 for(i=0; i<width; i++){
1147 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1151 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1154 for(i=0; i<width; i++){
1155 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1159 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1162 for(i=0; i<width; i++){
1164 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1166 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1171 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174 for(i=0; i<width; i++){
1175 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1179 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1182 for(i=0; i<width; i++){
1183 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1184 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1186 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1188 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1190 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1194 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1195 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1196 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1197 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1198 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1202 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1203 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1204 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1205 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1206 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1210 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1213 IDWTELEM *b0= cs->b0;
1214 IDWTELEM *b1= cs->b1;
1215 IDWTELEM *b2= cs->b2;
1216 IDWTELEM *b3= cs->b3;
1217 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1218 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1220 if(y>0 && y+4<height){
1221 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1223 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1224 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1225 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1226 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1229 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1230 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1239 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1241 IDWTELEM *b0= cs->b0;
1242 IDWTELEM *b1= cs->b1;
1243 IDWTELEM *b2= cs->b2;
1244 IDWTELEM *b3= cs->b3;
1245 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1246 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1249 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1250 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1251 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1253 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1254 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1263 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1265 spatial_compose97i_init(&cs, buffer, height, stride);
1266 while(cs.y <= height)
1267 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1270 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1272 for(level=decomposition_count-1; level>=0; level--){
1274 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1275 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1280 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1281 const int support = type==1 ? 3 : 5;
1285 for(level=decomposition_count-1; level>=0; level--){
1286 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1288 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1290 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1297 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1298 const int w= b->width;
1299 const int h= b->height;
1304 x_and_coeff *xc= b->x_coeff;
1305 x_and_coeff *prev_xc= NULL;
1306 x_and_coeff *prev2_xc= xc;
1307 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1308 x_and_coeff *prev_parent_xc= parent_xc;
1310 runs= get_symbol2(&s->c, b->state[30], 0);
1311 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1316 int lt=0, t=0, rt=0;
1318 if(y && prev_xc->x == 0){
1330 if(prev_xc->x == x + 1)
1336 if(x>>1 > parent_xc->x){
1339 if(x>>1 == parent_xc->x){
1340 p= parent_xc->coeff;
1343 if(/*ll|*/l|lt|t|rt|p){
1344 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1346 v=get_rac(&s->c, &b->state[0][context]);
1348 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1349 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1356 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1358 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1359 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1368 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1369 else max_run= FFMIN(run, w-x-1);
1371 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1377 (xc++)->x= w+1; //end marker
1383 while(parent_xc->x != parent->width+1)
1386 prev_parent_xc= parent_xc;
1388 parent_xc= prev_parent_xc;
1393 (xc++)->x= w+1; //end marker
1397 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1398 const int w= b->width;
1400 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1401 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1402 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1405 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1410 /* If we are on the second or later slice, restore our index. */
1412 new_index = save_state[0];
1415 for(y=start_y; y<h; y++){
1418 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1419 memset(line, 0, b->width*sizeof(IDWTELEM));
1420 v = b->x_coeff[new_index].coeff;
1421 x = b->x_coeff[new_index++].x;
1423 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1424 register int u= -(v&1);
1425 line[x] = (t^u) - u;
1427 v = b->x_coeff[new_index].coeff;
1428 x = b->x_coeff[new_index++].x;
1432 /* Save our variables for the next slice. */
1433 save_state[0] = new_index;
1438 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1439 int plane_index, level, orientation;
1441 for(plane_index=0; plane_index<3; plane_index++){
1442 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1443 for(orientation=level ? 1:0; orientation<4; orientation++){
1444 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1448 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1449 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1452 static int alloc_blocks(SnowContext *s){
1453 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1454 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1460 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1464 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1465 uint8_t *bytestream= d->bytestream;
1466 uint8_t *bytestream_start= d->bytestream_start;
1468 d->bytestream= bytestream;
1469 d->bytestream_start= bytestream_start;
1472 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1473 const int w= s->b_width << s->block_max_depth;
1474 const int rem_depth= s->block_max_depth - level;
1475 const int index= (x + y*w) << rem_depth;
1476 const int block_w= 1<<rem_depth;
1489 for(j=0; j<block_w; j++){
1490 for(i=0; i<block_w; i++){
1491 s->block[index + i + j*w]= block;
1496 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1497 const int offset[3]= {
1499 ((y*c->uvstride + x)>>1),
1500 ((y*c->uvstride + x)>>1),
1504 c->src[0][i]= src [i];
1505 c->ref[0][i]= ref [i] + offset[i];
1510 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1511 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1512 if(s->ref_frames == 1){
1513 *mx = mid_pred(left->mx, top->mx, tr->mx);
1514 *my = mid_pred(left->my, top->my, tr->my);
1516 const int *scale = scale_mv_ref[ref];
1517 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1518 (top ->mx * scale[top ->ref] + 128) >>8,
1519 (tr ->mx * scale[tr ->ref] + 128) >>8);
1520 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1521 (top ->my * scale[top ->ref] + 128) >>8,
1522 (tr ->my * scale[tr ->ref] + 128) >>8);
1526 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1527 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1528 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1530 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1534 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1535 const int w= s->b_width << s->block_max_depth;
1536 const int rem_depth= s->block_max_depth - level;
1537 const int index= (x + y*w) << rem_depth;
1538 int trx= (x+1)<<rem_depth;
1539 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1540 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1541 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1542 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1543 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1546 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1550 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1552 int l = left->color[0];
1553 int cb= left->color[1];
1554 int cr= left->color[2];
1556 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1557 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1558 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
1560 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1563 pred_mv(s, &mx, &my, 0, left, top, tr);
1564 l += get_symbol(&s->c, &s->block_state[32], 1);
1565 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1566 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1568 if(s->ref_frames > 1)
1569 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
1570 pred_mv(s, &mx, &my, ref, left, top, tr);
1571 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1572 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
1574 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
1576 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1577 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1578 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1579 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1583 static void decode_blocks(SnowContext *s){
1590 decode_q_branch(s, 0, x, y);
1595 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1596 static const uint8_t weight[64]={
1607 static const uint8_t brane[256]={
1608 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1609 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1610 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1611 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1612 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1613 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1614 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1615 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1616 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1617 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1618 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1619 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1620 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1621 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1622 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1623 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1626 static const uint8_t needs[16]={
1634 int16_t tmpIt [64*(32+HTAPS_MAX)];
1635 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
1636 int16_t *tmpI= tmpIt;
1637 uint8_t *tmp2= tmp2t[0];
1638 const uint8_t *hpel[11];
1639 assert(dx<16 && dy<16);
1640 r= brane[dx + 16*dy]&15;
1641 l= brane[dx + 16*dy]>>4;
1643 b= needs[l] | needs[r];
1644 if(p && !p->diag_mc)
1648 for(y=0; y < b_h+HTAPS_MAX-1; y++){
1649 for(x=0; x < b_w; x++){
1650 int a_1=src[x + HTAPS_MAX/2-4];
1651 int a0= src[x + HTAPS_MAX/2-3];
1652 int a1= src[x + HTAPS_MAX/2-2];
1653 int a2= src[x + HTAPS_MAX/2-1];
1654 int a3= src[x + HTAPS_MAX/2+0];
1655 int a4= src[x + HTAPS_MAX/2+1];
1656 int a5= src[x + HTAPS_MAX/2+2];
1657 int a6= src[x + HTAPS_MAX/2+3];
1659 if(!p || p->fast_mc){
1660 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1664 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1669 if(am&(~255)) am= ~(am>>31);
1678 src += HTAPS_MAX/2 - 1;
1682 for(y=0; y < b_h; y++){
1683 for(x=0; x < b_w+1; x++){
1684 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1685 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1686 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1687 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1688 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1689 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1690 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1691 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1693 if(!p || p->fast_mc)
1694 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1696 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1698 if(am&(~255)) am= ~(am>>31);
1706 src += stride*(HTAPS_MAX/2 - 1);
1710 for(y=0; y < b_h; y++){
1711 for(x=0; x < b_w; x++){
1712 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1713 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1714 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1715 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1716 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1717 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1718 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1719 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1721 if(!p || p->fast_mc)
1722 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1724 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1725 if(am&(~255)) am= ~(am>>31);
1734 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1739 hpel[ 6]= tmp2t[1] + 1;
1741 hpel[ 8]= src + stride;
1742 hpel[ 9]= hpel[1] + stride;
1743 hpel[10]= hpel[8] + 1;
1746 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1747 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1748 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1749 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1752 for(y=0; y < b_h; y++){
1753 for(x=0; x < b_w; x++){
1754 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1755 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1764 const uint8_t *src1= hpel[l];
1765 const uint8_t *src2= hpel[r];
1766 int a= weight[((dx&7) + (8*(dy&7)))];
1768 for(y=0; y < b_h; y++){
1769 for(x=0; x < b_w; x++){
1770 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1779 #define mca(dx,dy,b_w)\
1780 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1781 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1783 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1795 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1796 if(block->type & BLOCK_INTRA){
1798 const int color = block->color[plane_index];
1799 const int color4= color*0x01010101;
1801 for(y=0; y < b_h; y++){
1802 *(uint32_t*)&dst[0 + y*stride]= color4;
1803 *(uint32_t*)&dst[4 + y*stride]= color4;
1804 *(uint32_t*)&dst[8 + y*stride]= color4;
1805 *(uint32_t*)&dst[12+ y*stride]= color4;
1806 *(uint32_t*)&dst[16+ y*stride]= color4;
1807 *(uint32_t*)&dst[20+ y*stride]= color4;
1808 *(uint32_t*)&dst[24+ y*stride]= color4;
1809 *(uint32_t*)&dst[28+ y*stride]= color4;
1812 for(y=0; y < b_h; y++){
1813 *(uint32_t*)&dst[0 + y*stride]= color4;
1814 *(uint32_t*)&dst[4 + y*stride]= color4;
1815 *(uint32_t*)&dst[8 + y*stride]= color4;
1816 *(uint32_t*)&dst[12+ y*stride]= color4;
1819 for(y=0; y < b_h; y++){
1820 *(uint32_t*)&dst[0 + y*stride]= color4;
1821 *(uint32_t*)&dst[4 + y*stride]= color4;
1824 for(y=0; y < b_h; y++){
1825 *(uint32_t*)&dst[0 + y*stride]= color4;
1828 for(y=0; y < b_h; y++){
1829 for(x=0; x < b_w; x++){
1830 dst[x + y*stride]= color;
1835 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1836 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1837 int mx= block->mx*scale;
1838 int my= block->my*scale;
1839 const int dx= mx&15;
1840 const int dy= my&15;
1841 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1842 sx += (mx>>4) - (HTAPS_MAX/2-1);
1843 sy += (my>>4) - (HTAPS_MAX/2-1);
1844 src += sx + sy*stride;
1845 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1846 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1847 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1850 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1851 // assert(!(b_w&(b_w-1)));
1852 assert(b_w>1 && b_h>1);
1853 assert((tab_index>=0 && tab_index<4) || b_w==32);
1854 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1855 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1858 for(y=0; y<b_h; y+=16){
1859 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1860 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1863 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1864 else if(b_w==2*b_h){
1865 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1866 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1869 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1870 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1875 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1876 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1879 for(y=0; y<b_h; y++){
1880 //FIXME ugly misuse of obmc_stride
1881 const uint8_t *obmc1= obmc + y*obmc_stride;
1882 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1883 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1884 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1885 dst = slice_buffer_get_line(sb, src_y + y);
1886 for(x=0; x<b_w; x++){
1887 int v= obmc1[x] * block[3][x + y*src_stride]
1888 +obmc2[x] * block[2][x + y*src_stride]
1889 +obmc3[x] * block[1][x + y*src_stride]
1890 +obmc4[x] * block[0][x + y*src_stride];
1892 v <<= 8 - LOG2_OBMC_MAX;
1894 v >>= 8 - FRAC_BITS;
1897 v += dst[x + src_x];
1898 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1899 if(v&(~255)) v= ~(v>>31);
1900 dst8[x + y*src_stride] = v;
1902 dst[x + src_x] -= v;
1908 //FIXME name cleanup (b_w, block_w, b_width stuff)
1909 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1910 const int b_width = s->b_width << s->block_max_depth;
1911 const int b_height= s->b_height << s->block_max_depth;
1912 const int b_stride= b_width;
1913 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1914 BlockNode *rt= lt+1;
1915 BlockNode *lb= lt+b_stride;
1916 BlockNode *rb= lb+1;
1918 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1919 uint8_t *tmp = s->scratchbuf;
1926 }else if(b_x + 1 >= b_width){
1933 }else if(b_y + 1 >= b_height){
1938 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1941 if(!sliced && !offset_dst)
1944 }else if(src_x + b_w > w){
1948 obmc -= src_y*obmc_stride;
1950 if(!sliced && !offset_dst)
1951 dst -= src_y*dst_stride;
1953 }else if(src_y + b_h> h){
1957 if(b_w<=0 || b_h<=0) return;
1959 assert(src_stride > 2*MB_SIZE + 5);
1961 if(!sliced && offset_dst)
1962 dst += src_x + src_y*dst_stride;
1963 dst8+= src_x + src_y*src_stride;
1964 // src += src_x + src_y*src_stride;
1966 ptmp= tmp + 3*tmp_step;
1969 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1971 if(same_block(lt, rt)){
1976 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1979 if(same_block(lt, lb)){
1981 }else if(same_block(rt, lb)){
1986 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1989 if(same_block(lt, rb) ){
1991 }else if(same_block(rt, rb)){
1993 }else if(same_block(lb, rb)){
1997 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2000 for(y=0; y<b_h; y++){
2001 for(x=0; x<b_w; x++){
2002 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2003 if(add) dst[x + y*dst_stride] += v;
2004 else dst[x + y*dst_stride] -= v;
2007 for(y=0; y<b_h; y++){
2008 uint8_t *obmc2= obmc + (obmc_stride>>1);
2009 for(x=0; x<b_w; x++){
2010 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2011 if(add) dst[x + y*dst_stride] += v;
2012 else dst[x + y*dst_stride] -= v;
2015 for(y=0; y<b_h; y++){
2016 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2017 for(x=0; x<b_w; x++){
2018 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2019 if(add) dst[x + y*dst_stride] += v;
2020 else dst[x + y*dst_stride] -= v;
2023 for(y=0; y<b_h; y++){
2024 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2025 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2026 for(x=0; x<b_w; x++){
2027 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2028 if(add) dst[x + y*dst_stride] += v;
2029 else dst[x + y*dst_stride] -= v;
2034 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2036 for(y=0; y<b_h; y++){
2037 //FIXME ugly misuse of obmc_stride
2038 const uint8_t *obmc1= obmc + y*obmc_stride;
2039 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2040 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2041 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2042 for(x=0; x<b_w; x++){
2043 int v= obmc1[x] * block[3][x + y*src_stride]
2044 +obmc2[x] * block[2][x + y*src_stride]
2045 +obmc3[x] * block[1][x + y*src_stride]
2046 +obmc4[x] * block[0][x + y*src_stride];
2048 v <<= 8 - LOG2_OBMC_MAX;
2050 v >>= 8 - FRAC_BITS;
2053 v += dst[x + y*dst_stride];
2054 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2055 if(v&(~255)) v= ~(v>>31);
2056 dst8[x + y*src_stride] = v;
2058 dst[x + y*dst_stride] -= v;
2066 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2067 Plane *p= &s->plane[plane_index];
2068 const int mb_w= s->b_width << s->block_max_depth;
2069 const int mb_h= s->b_height << s->block_max_depth;
2071 int block_size = MB_SIZE >> s->block_max_depth;
2072 int block_w = plane_index ? block_size/2 : block_size;
2073 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2074 int obmc_stride= plane_index ? block_size : 2*block_size;
2075 int ref_stride= s->current_picture.linesize[plane_index];
2076 uint8_t *dst8= s->current_picture.data[plane_index];
2080 if(s->keyframe || (s->avctx->debug&512)){
2085 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2086 // DWTELEM * line = slice_buffer_get_line(sb, y);
2087 IDWTELEM * line = sb->line[y];
2089 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2090 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2092 if(v&(~255)) v= ~(v>>31);
2093 dst8[x + y*ref_stride]= v;
2097 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2098 // DWTELEM * line = slice_buffer_get_line(sb, y);
2099 IDWTELEM * line = sb->line[y];
2101 line[x] -= 128 << FRAC_BITS;
2102 // buf[x + y*w]-= 128<<FRAC_BITS;
2110 for(mb_x=0; mb_x<=mb_w; mb_x++){
2111 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2112 block_w*mb_x - block_w/2,
2113 block_w*mb_y - block_w/2,
2116 w, ref_stride, obmc_stride,
2118 add, 0, plane_index);
2122 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2123 Plane *p= &s->plane[plane_index];
2124 const int mb_w= s->b_width << s->block_max_depth;
2125 const int mb_h= s->b_height << s->block_max_depth;
2127 int block_size = MB_SIZE >> s->block_max_depth;
2128 int block_w = plane_index ? block_size/2 : block_size;
2129 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2130 const int obmc_stride= plane_index ? block_size : 2*block_size;
2131 int ref_stride= s->current_picture.linesize[plane_index];
2132 uint8_t *dst8= s->current_picture.data[plane_index];
2136 if(s->keyframe || (s->avctx->debug&512)){
2141 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2143 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2145 if(v&(~255)) v= ~(v>>31);
2146 dst8[x + y*ref_stride]= v;
2150 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2152 buf[x + y*w]-= 128<<FRAC_BITS;
2160 for(mb_x=0; mb_x<=mb_w; mb_x++){
2161 add_yblock(s, 0, NULL, buf, dst8, obmc,
2162 block_w*mb_x - block_w/2,
2163 block_w*mb_y - block_w/2,
2166 w, ref_stride, obmc_stride,
2168 add, 1, plane_index);
2172 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2173 const int mb_h= s->b_height << s->block_max_depth;
2175 for(mb_y=0; mb_y<=mb_h; mb_y++)
2176 predict_slice(s, buf, plane_index, add, mb_y);
2179 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
2180 const int w= b->width;
2181 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2182 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2183 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2186 if(s->qlog == LOSSLESS_QLOG) return;
2188 for(y=start_y; y<end_y; y++){
2189 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2190 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2194 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2196 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2202 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
2203 const int w= b->width;
2206 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
2210 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2212 for(y=start_y; y<end_y; y++){
2214 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2215 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2219 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2220 else line[x] += line[x - 1];
2222 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2223 else line[x] += line[x - 1];
2226 if(y) line[x] += prev[x];
2232 static void decode_qlogs(SnowContext *s){
2233 int plane_index, level, orientation;
2235 for(plane_index=0; plane_index<3; plane_index++){
2236 for(level=0; level<s->spatial_decomposition_count; level++){
2237 for(orientation=level ? 1:0; orientation<4; orientation++){
2239 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2240 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2241 else q= get_symbol(&s->c, s->header_state, 1);
2242 s->plane[plane_index].band[level][orientation].qlog= q;
2248 #define GET_S(dst, check) \
2249 tmp= get_symbol(&s->c, s->header_state, 0);\
2251 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
2256 static int decode_header(SnowContext *s){
2257 int plane_index, tmp;
2260 memset(kstate, MID_STATE, sizeof(kstate));
2262 s->keyframe= get_rac(&s->c, kstate);
2263 if(s->keyframe || s->always_reset){
2265 s->spatial_decomposition_type=
2269 s->block_max_depth= 0;
2272 GET_S(s->version, tmp <= 0U)
2273 s->always_reset= get_rac(&s->c, s->header_state);
2274 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2275 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2276 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2277 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2278 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2279 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2280 s->spatial_scalability= get_rac(&s->c, s->header_state);
2281 // s->rate_scalability= get_rac(&s->c, s->header_state);
2282 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
2283 s->max_ref_frames++;
2289 if(get_rac(&s->c, s->header_state)){
2290 for(plane_index=0; plane_index<2; plane_index++){
2291 int htaps, i, sum=0;
2292 Plane *p= &s->plane[plane_index];
2293 p->diag_mc= get_rac(&s->c, s->header_state);
2294 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
2295 if((unsigned)htaps > HTAPS_MAX || htaps==0)
2298 for(i= htaps/2; i; i--){
2299 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
2300 sum += p->hcoeff[i];
2302 p->hcoeff[0]= 32-sum;
2304 s->plane[2].diag_mc= s->plane[1].diag_mc;
2305 s->plane[2].htaps = s->plane[1].htaps;
2306 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
2308 if(get_rac(&s->c, s->header_state)){
2309 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2314 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
2315 if(s->spatial_decomposition_type > 1U){
2316 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2319 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
2320 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
2321 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
2325 s->qlog += get_symbol(&s->c, s->header_state, 1);
2326 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
2327 s->qbias += get_symbol(&s->c, s->header_state, 1);
2328 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
2329 if(s->block_max_depth > 1 || s->block_max_depth < 0){
2330 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
2331 s->block_max_depth= 0;
2338 static void init_qexp(void){
2342 for(i=0; i<QROOT; i++){
2344 v *= pow(2, 1.0 / QROOT);
2348 static av_cold int common_init(AVCodecContext *avctx){
2349 SnowContext *s = avctx->priv_data;
2354 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
2356 dsputil_init(&s->dsp, avctx);
2359 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2360 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2361 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2362 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2363 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2364 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2383 #define mcfh(dx,dy)\
2384 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2385 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2386 mc_block_hpel ## dx ## dy ## 16;\
2387 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2388 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2389 mc_block_hpel ## dx ## dy ## 8;
2399 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2401 width= s->avctx->width;
2402 height= s->avctx->height;
2404 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
2405 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
2407 for(i=0; i<MAX_REF_FRAMES; i++)
2408 for(j=0; j<MAX_REF_FRAMES; j++)
2409 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
2411 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2412 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
2417 static int common_init_after_header(AVCodecContext *avctx){
2418 SnowContext *s = avctx->priv_data;
2419 int plane_index, level, orientation;
2421 for(plane_index=0; plane_index<3; plane_index++){
2422 int w= s->avctx->width;
2423 int h= s->avctx->height;
2426 w>>= s->chroma_h_shift;
2427 h>>= s->chroma_v_shift;
2429 s->plane[plane_index].width = w;
2430 s->plane[plane_index].height= h;
2432 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2433 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2434 SubBand *b= &s->plane[plane_index].band[level][orientation];
2436 b->buf= s->spatial_dwt_buffer;
2438 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2439 b->width = (w + !(orientation&1))>>1;
2440 b->height= (h + !(orientation>1))>>1;
2442 b->stride_line = 1 << (s->spatial_decomposition_count - level);
2443 b->buf_x_offset = 0;
2444 b->buf_y_offset = 0;
2448 b->buf_x_offset = (w+1)>>1;
2451 b->buf += b->stride>>1;
2452 b->buf_y_offset = b->stride_line >> 1;
2454 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2457 b->parent= &s->plane[plane_index].band[level-1][orientation];
2458 //FIXME avoid this realloc
2459 av_freep(&b->x_coeff);
2460 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2475 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
2476 SubBand *b= &p->band[level][orientation];
2480 int step= 1 << (s->spatial_decomposition_count - level);
2487 //FIXME bias for nonzero ?
2489 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
2490 for(y=0; y<p->height; y++){
2491 for(x=0; x<p->width; x++){
2492 int sx= (x-xo + step/2) / step / Q2_STEP;
2493 int sy= (y-yo + step/2) / step / Q2_STEP;
2494 int v= r0[x + y*p->width] - r1[x + y*p->width];
2495 assert(sx>=0 && sy>=0 && sx < score_stride);
2497 score[sx + sy*score_stride] += v*v;
2498 assert(score[sx + sy*score_stride] >= 0);
2503 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
2504 int level, orientation;
2506 for(level=0; level<s->spatial_decomposition_count; level++){
2507 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2508 SubBand *b= &p->band[level][orientation];
2509 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
2511 dequantize(s, b, dst, b->stride);
2516 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
2517 int level, orientation, ys, xs, x, y, pass;
2518 IDWTELEM best_dequant[height * stride];
2519 IDWTELEM idwt2_buffer[height * stride];
2520 const int score_stride= (width + 10)/Q2_STEP;
2521 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2522 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2523 int threshold= (s->m.lambda * s->m.lambda) >> 6;
2525 //FIXME pass the copy cleanly ?
2527 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
2528 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
2530 for(level=0; level<s->spatial_decomposition_count; level++){
2531 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2532 SubBand *b= &p->band[level][orientation];
2533 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2534 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
2535 assert(src == b->buf); // code does not depend on this but it is true currently
2537 quantize(s, b, dst, src, b->stride, s->qbias);
2540 for(pass=0; pass<1; pass++){
2541 if(s->qbias == 0) //keyframe
2543 for(level=0; level<s->spatial_decomposition_count; level++){
2544 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2545 SubBand *b= &p->band[level][orientation];
2546 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
2547 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2549 for(ys= 0; ys<Q2_STEP; ys++){
2550 for(xs= 0; xs<Q2_STEP; xs++){
2551 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2552 dequantize_all(s, p, idwt2_buffer, width, height);
2553 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2554 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2555 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2556 for(y=ys; y<b->height; y+= Q2_STEP){
2557 for(x=xs; x<b->width; x+= Q2_STEP){
2558 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
2559 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
2560 //FIXME try more than just --
2563 dequantize_all(s, p, idwt2_buffer, width, height);
2564 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2565 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2566 for(y=ys; y<b->height; y+= Q2_STEP){
2567 for(x=xs; x<b->width; x+= Q2_STEP){
2568 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
2569 if(score[score_idx] <= best_score[score_idx] + threshold){
2570 best_score[score_idx]= score[score_idx];
2571 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
2572 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
2573 //FIXME copy instead
2582 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
2585 #endif /* QUANTIZE2==1 */
2587 #define USE_HALFPEL_PLANE 0
2589 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
2592 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
2596 int w= s->avctx->width >>is_chroma;
2597 int h= s->avctx->height >>is_chroma;
2598 int ls= frame->linesize[p];
2599 uint8_t *src= frame->data[p];
2601 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2602 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2603 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2610 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
2617 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2625 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2633 static void release_buffer(AVCodecContext *avctx){
2634 SnowContext *s = avctx->priv_data;
2637 if(s->last_picture[s->max_ref_frames-1].data[0]){
2638 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2640 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
2641 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
2645 static int frame_start(SnowContext *s){
2647 int w= s->avctx->width; //FIXME round up to x16 ?
2648 int h= s->avctx->height;
2650 if(s->current_picture.data[0]){
2651 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2652 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2653 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2656 release_buffer(s->avctx);
2658 tmp= s->last_picture[s->max_ref_frames-1];
2659 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
2660 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
2661 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
2662 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
2663 s->last_picture[0]= s->current_picture;
2664 s->current_picture= tmp;
2670 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2671 if(i && s->last_picture[i-1].key_frame)
2674 if(s->ref_frames==0){
2675 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2680 s->current_picture.reference= 1;
2681 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2682 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2686 s->current_picture.key_frame= s->keyframe;
2691 static av_cold void common_end(SnowContext *s){
2692 int plane_index, level, orientation, i;
2694 av_freep(&s->spatial_dwt_buffer);
2695 av_freep(&s->spatial_idwt_buffer);
2698 av_freep(&s->m.me.scratchpad);
2699 av_freep(&s->m.me.map);
2700 av_freep(&s->m.me.score_map);
2701 av_freep(&s->m.obmc_scratchpad);
2703 av_freep(&s->block);
2704 av_freep(&s->scratchbuf);
2706 for(i=0; i<MAX_REF_FRAMES; i++){
2707 av_freep(&s->ref_mvs[i]);
2708 av_freep(&s->ref_scores[i]);
2709 if(s->last_picture[i].data[0])
2710 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2713 for(plane_index=0; plane_index<3; plane_index++){
2714 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2715 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2716 SubBand *b= &s->plane[plane_index].band[level][orientation];
2718 av_freep(&b->x_coeff);
2724 static av_cold int decode_init(AVCodecContext *avctx)
2726 avctx->pix_fmt= PIX_FMT_YUV420P;
2733 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2734 const uint8_t *buf = avpkt->data;
2735 int buf_size = avpkt->size;
2736 SnowContext *s = avctx->priv_data;
2737 RangeCoder * const c= &s->c;
2739 AVFrame *picture = data;
2740 int level, orientation, plane_index;
2742 ff_init_range_decoder(c, buf, buf_size);
2743 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2745 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2746 if(decode_header(s)<0)
2748 common_init_after_header(avctx);
2750 // realloc slice buffer for the case that spatial_decomposition_count changed
2751 slice_buffer_destroy(&s->sb);
2752 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2754 for(plane_index=0; plane_index<3; plane_index++){
2755 Plane *p= &s->plane[plane_index];
2756 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2757 && p->hcoeff[1]==-10
2763 if(frame_start(s) < 0)
2765 //keyframe flag duplication mess FIXME
2766 if(avctx->debug&FF_DEBUG_PICT_INFO)
2767 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2771 for(plane_index=0; plane_index<3; plane_index++){
2772 Plane *p= &s->plane[plane_index];
2776 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2778 if(s->avctx->debug&2048){
2779 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2780 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2784 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2785 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2791 for(level=0; level<s->spatial_decomposition_count; level++){
2792 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2793 SubBand *b= &p->band[level][orientation];
2794 unpack_coeffs(s, b, b->parent, orientation);
2800 const int mb_h= s->b_height << s->block_max_depth;
2801 const int block_size = MB_SIZE >> s->block_max_depth;
2802 const int block_w = plane_index ? block_size/2 : block_size;
2804 DWTCompose cs[MAX_DECOMPOSITIONS];
2809 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2810 for(mb_y=0; mb_y<=mb_h; mb_y++){
2812 int slice_starty = block_w*mb_y;
2813 int slice_h = block_w*(mb_y+1);
2814 if (!(s->keyframe || s->avctx->debug&512)){
2815 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2816 slice_h -= (block_w >> 1);
2819 for(level=0; level<s->spatial_decomposition_count; level++){
2820 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2821 SubBand *b= &p->band[level][orientation];
2824 int our_mb_start = mb_y;
2825 int our_mb_end = (mb_y + 1);
2827 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2828 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2829 if (!(s->keyframe || s->avctx->debug&512)){
2830 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2831 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2833 start_y = FFMIN(b->height, start_y);
2834 end_y = FFMIN(b->height, end_y);
2836 if (start_y != end_y){
2837 if (orientation == 0){
2838 SubBand * correlate_band = &p->band[0][0];
2839 int correlate_end_y = FFMIN(b->height, end_y + 1);
2840 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2841 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2842 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2843 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2846 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2851 for(; yd<slice_h; yd+=4){
2852 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2855 if(s->qlog == LOSSLESS_QLOG){
2856 for(; yq<slice_h && yq<h; yq++){
2857 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2859 line[x] <<= FRAC_BITS;
2864 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2866 y = FFMIN(p->height, slice_starty);
2867 end_y = FFMIN(p->height, slice_h);
2869 slice_buffer_release(&s->sb, y++);
2872 slice_buffer_flush(&s->sb);
2879 release_buffer(avctx);
2881 if(!(s->avctx->debug&2048))
2882 *picture= s->current_picture;
2884 *picture= s->mconly_picture;
2886 *data_size = sizeof(AVFrame);
2888 bytes_read= c->bytestream - c->bytestream_start;
2889 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2894 static av_cold int decode_end(AVCodecContext *avctx)
2896 SnowContext *s = avctx->priv_data;
2898 slice_buffer_destroy(&s->sb);
2905 AVCodec snow_decoder = {
2909 sizeof(SnowContext),
2914 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2916 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2919 #if CONFIG_SNOW_ENCODER
2920 static av_cold int encode_init(AVCodecContext *avctx)
2922 SnowContext *s = avctx->priv_data;
2925 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2926 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2927 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2931 if(avctx->prediction_method == DWT_97
2932 && (avctx->flags & CODEC_FLAG_QSCALE)
2933 && avctx->global_quality == 0){
2934 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2938 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2940 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2941 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2943 for(plane_index=0; plane_index<3; plane_index++){
2944 s->plane[plane_index].diag_mc= 1;
2945 s->plane[plane_index].htaps= 6;
2946 s->plane[plane_index].hcoeff[0]= 40;
2947 s->plane[plane_index].hcoeff[1]= -10;
2948 s->plane[plane_index].hcoeff[2]= 2;
2949 s->plane[plane_index].fast_mc= 1;
2958 s->m.flags = avctx->flags;
2959 s->m.bit_rate= avctx->bit_rate;
2962 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2963 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2964 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2965 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2966 h263_encode_init(&s->m); //mv_penalty
2968 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2970 if(avctx->flags&CODEC_FLAG_PASS1){
2971 if(!avctx->stats_out)
2972 avctx->stats_out = av_mallocz(256);
2974 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2975 if(ff_rate_control_init(&s->m) < 0)
2978 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2980 avctx->coded_frame= &s->current_picture;
2981 switch(avctx->pix_fmt){
2982 // case PIX_FMT_YUV444P:
2983 // case PIX_FMT_YUV422P:
2984 case PIX_FMT_YUV420P:
2986 // case PIX_FMT_YUV411P:
2987 // case PIX_FMT_YUV410P:
2988 s->colorspace_type= 0;
2990 /* case PIX_FMT_RGB32:
2994 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
2997 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
2998 s->chroma_h_shift= 1;
2999 s->chroma_v_shift= 1;
3001 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3002 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3004 s->avctx->get_buffer(s->avctx, &s->input_picture);
3006 if(s->avctx->me_method == ME_ITER){
3008 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3009 for(i=0; i<s->max_ref_frames; i++){
3010 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3011 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3018 //near copy & paste from dsputil, FIXME
3019 static int pix_sum(uint8_t * pix, int line_size, int w)
3024 for (i = 0; i < w; i++) {
3025 for (j = 0; j < w; j++) {
3029 pix += line_size - w;
3034 //near copy & paste from dsputil, FIXME
3035 static int pix_norm1(uint8_t * pix, int line_size, int w)
3038 uint32_t *sq = ff_squareTbl + 256;
3041 for (i = 0; i < w; i++) {
3042 for (j = 0; j < w; j ++) {
3046 pix += line_size - w;
3054 #define P_TOPRIGHT P[3]
3055 #define P_MEDIAN P[4]
3057 #define FLAG_QPEL 1 //must be 1
3059 static int encode_q_branch(SnowContext *s, int level, int x, int y){
3060 uint8_t p_buffer[1024];
3061 uint8_t i_buffer[1024];
3062 uint8_t p_state[sizeof(s->block_state)];
3063 uint8_t i_state[sizeof(s->block_state)];
3065 uint8_t *pbbak= s->c.bytestream;
3066 uint8_t *pbbak_start= s->c.bytestream_start;
3067 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
3068 const int w= s->b_width << s->block_max_depth;
3069 const int h= s->b_height << s->block_max_depth;
3070 const int rem_depth= s->block_max_depth - level;
3071 const int index= (x + y*w) << rem_depth;
3072 const int block_w= 1<<(LOG2_MB_SIZE - level);
3073 int trx= (x+1)<<rem_depth;
3074 int try= (y+1)<<rem_depth;
3075 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3076 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3077 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
3078 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
3079 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3080 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3081 int pl = left->color[0];
3082 int pcb= left->color[1];
3083 int pcr= left->color[2];
3087 const int stride= s->current_picture.linesize[0];
3088 const int uvstride= s->current_picture.linesize[1];
3089 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
3090 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
3091 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
3093 int16_t last_mv[3][2];
3094 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
3095 const int shift= 1+qpel;
3096 MotionEstContext *c= &s->m.me;
3097 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3098 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3099 int my_context= av_log2(2*FFABS(left->my - top->my));
3100 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3101 int ref, best_ref, ref_score, ref_mx, ref_my;
3103 assert(sizeof(s->block_state) >= 256);
3105 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3109 // clip predictors / edge ?
3111 P_LEFT[0]= left->mx;
3112 P_LEFT[1]= left->my;
3115 P_TOPRIGHT[0]= tr->mx;
3116 P_TOPRIGHT[1]= tr->my;
3118 last_mv[0][0]= s->block[index].mx;
3119 last_mv[0][1]= s->block[index].my;
3120 last_mv[1][0]= right->mx;
3121 last_mv[1][1]= right->my;
3122 last_mv[2][0]= bottom->mx;
3123 last_mv[2][1]= bottom->my;
3130 assert(c-> stride == stride);
3131 assert(c->uvstride == uvstride);
3133 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
3134 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
3135 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
3136 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
3138 c->xmin = - x*block_w - 16+3;
3139 c->ymin = - y*block_w - 16+3;
3140 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3141 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3143 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
3144 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
3145 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
3146 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
3147 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
3148 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
3149 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
3151 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
3152 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
3155 c->pred_x= P_LEFT[0];
3156 c->pred_y= P_LEFT[1];
3158 c->pred_x = P_MEDIAN[0];
3159 c->pred_y = P_MEDIAN[1];
3164 for(ref=0; ref<s->ref_frames; ref++){
3165 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
3167 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
3168 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
3170 assert(ref_mx >= c->xmin);
3171 assert(ref_mx <= c->xmax);
3172 assert(ref_my >= c->ymin);
3173 assert(ref_my <= c->ymax);
3175 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
3176 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
3177 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
3178 if(s->ref_mvs[ref]){
3179 s->ref_mvs[ref][index][0]= ref_mx;
3180 s->ref_mvs[ref][index][1]= ref_my;
3181 s->ref_scores[ref][index]= ref_score;
3183 if(score > ref_score){
3190 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
3193 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
3195 pc.bytestream_start=
3196 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
3197 memcpy(p_state, s->block_state, sizeof(s->block_state));
3199 if(level!=s->block_max_depth)
3200 put_rac(&pc, &p_state[4 + s_context], 1);
3201 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
3202 if(s->ref_frames > 1)
3203 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
3204 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
3205 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
3206 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
3207 p_len= pc.bytestream - pc.bytestream_start;
3208 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
3210 block_s= block_w*block_w;
3211 sum = pix_sum(current_data[0], stride, block_w);
3212 l= (sum + block_s/2)/block_s;
3213 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
3215 block_s= block_w*block_w>>2;
3216 sum = pix_sum(current_data[1], uvstride, block_w>>1);
3217 cb= (sum + block_s/2)/block_s;
3218 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
3219 sum = pix_sum(current_data[2], uvstride, block_w>>1);
3220 cr= (sum + block_s/2)/block_s;
3221 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
3224 ic.bytestream_start=
3225 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
3226 memcpy(i_state, s->block_state, sizeof(s->block_state));
3227 if(level!=s->block_max_depth)
3228 put_rac(&ic, &i_state[4 + s_context], 1);
3229 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
3230 put_symbol(&ic, &i_state[32], l-pl , 1);
3231 put_symbol(&ic, &i_state[64], cb-pcb, 1);
3232 put_symbol(&ic, &i_state[96], cr-pcr, 1);
3233 i_len= ic.bytestream - ic.bytestream_start;
3234 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
3236 // assert(score==256*256*256*64-1);
3237 assert(iscore < 255*255*256 + s->lambda2*10);
3238 assert(iscore >= 0);
3239 assert(l>=0 && l<=255);
3240 assert(pl>=0 && pl<=255);
3243 int varc= iscore >> 8;
3244 int vard= score >> 8;
3245 if (vard <= 64 || vard < varc)
3246 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
3248 c->scene_change_score+= s->m.qscale;
3251 if(level!=s->block_max_depth){
3252 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3253 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
3254 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
3255 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
3256 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
3257 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
3259 if(score2 < score && score2 < iscore)
3264 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3265 memcpy(pbbak, i_buffer, i_len);
3267 s->c.bytestream_start= pbbak_start;
3268 s->c.bytestream= pbbak + i_len;
3269 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
3270 memcpy(s->block_state, i_state, sizeof(s->block_state));
3273 memcpy(pbbak, p_buffer, p_len);
3275 s->c.bytestream_start= pbbak_start;
3276 s->c.bytestream= pbbak + p_len;
3277 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
3278 memcpy(s->block_state, p_state, sizeof(s->block_state));
3283 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
3284 const int w= s->b_width << s->block_max_depth;
3285 const int rem_depth= s->block_max_depth - level;
3286 const int index= (x + y*w) << rem_depth;
3287 int trx= (x+1)<<rem_depth;
3288 BlockNode *b= &s->block[index];
3289 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3290 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3291 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3292 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3293 int pl = left->color[0];
3294 int pcb= left->color[1];
3295 int pcr= left->color[2];
3297 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3298 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
3299 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
3300 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3303 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3307 if(level!=s->block_max_depth){
3308 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
3309 put_rac(&s->c, &s->block_state[4 + s_context], 1);
3311 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3312 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
3313 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
3314 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
3315 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
3319 if(b->type & BLOCK_INTRA){
3320 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3321 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
3322 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
3323 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
3324 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
3325 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
3327 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
3328 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
3329 if(s->ref_frames > 1)
3330 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
3331 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
3332 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
3333 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
3337 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
3339 Plane *p= &s->plane[plane_index];
3340 const int block_size = MB_SIZE >> s->block_max_depth;
3341 const int block_w = plane_index ? block_size/2 : block_size;
3342 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3343 const int obmc_stride= plane_index ? block_size : 2*block_size;
3344 const int ref_stride= s->current_picture.linesize[plane_index];
3345 uint8_t *src= s-> input_picture.data[plane_index];
3346 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
3347 const int b_stride = s->b_width << s->block_max_depth;
3348 const int w= p->width;
3349 const int h= p->height;
3350 int index= mb_x + mb_y*b_stride;
3351 BlockNode *b= &s->block[index];
3352 BlockNode backup= *b;
3356 b->type|= BLOCK_INTRA;
3357 b->color[plane_index]= 0;
3358 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
3361 int mb_x2= mb_x + (i &1) - 1;
3362 int mb_y2= mb_y + (i>>1) - 1;
3363 int x= block_w*mb_x2 + block_w/2;
3364 int y= block_w*mb_y2 + block_w/2;
3366 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
3367 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3369 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3370 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3371 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3372 int obmc_v= obmc[index];
3374 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3375 if(x<0) obmc_v += obmc[index + block_w];
3376 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3377 if(x+block_w>w) obmc_v += obmc[index - block_w];
3378 //FIXME precalculate this or simplify it somehow else
3380 d = -dst[index] + (1<<(FRAC_BITS-1));
3382 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3383 aa += obmc_v * obmc_v; //FIXME precalculate this
3389 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
3392 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3393 const int b_stride = s->b_width << s->block_max_depth;
3394 const int b_height = s->b_height<< s->block_max_depth;
3395 int index= x + y*b_stride;
3396 const BlockNode *b = &s->block[index];
3397 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3398 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3399 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3400 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3402 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3403 // int my_context= av_log2(2*FFABS(left->my - top->my));
3405 if(x<0 || x>=b_stride || y>=b_height)
3412 00001XXXX 15-30 8-15
3414 //FIXME try accurate rate
3415 //FIXME intra and inter predictors if surrounding blocks are not the same type
3416 if(b->type & BLOCK_INTRA){
3417 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
3418 + av_log2(2*FFABS(left->color[1] - b->color[1]))
3419 + av_log2(2*FFABS(left->color[2] - b->color[2])));
3421 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3424 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
3425 + av_log2(2*FFABS(dmy))
3426 + av_log2(2*b->ref));
3430 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3431 Plane *p= &s->plane[plane_index];
3432 const int block_size = MB_SIZE >> s->block_max_depth;
3433 const int block_w = plane_index ? block_size/2 : block_size;
3434 const int obmc_stride= plane_index ? block_size : 2*block_size;
3435 const int ref_stride= s->current_picture.linesize[plane_index];
3436 uint8_t *dst= s->current_picture.data[plane_index];
3437 uint8_t *src= s-> input_picture.data[plane_index];
3438 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3439 uint8_t *cur = s->scratchbuf;
3440 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
3441 const int b_stride = s->b_width << s->block_max_depth;
3442 const int b_height = s->b_height<< s->block_max_depth;
3443 const int w= p->width;
3444 const int h= p->height;
3447 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3448 int sx= block_w*mb_x - block_w/2;
3449 int sy= block_w*mb_y - block_w/2;
3450 int x0= FFMAX(0,-sx);
3451 int y0= FFMAX(0,-sy);
3452 int x1= FFMIN(block_w*2, w-sx);
3453 int y1= FFMIN(block_w*2, h-sy);
3456 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3458 for(y=y0; y<y1; y++){
3459 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3460 const IDWTELEM *pred1 = pred + y*obmc_stride;
3461 uint8_t *cur1 = cur + y*ref_stride;
3462 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3463 for(x=x0; x<x1; x++){
3464 #if FRAC_BITS >= LOG2_OBMC_MAX
3465 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3467 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
3469 v = (v + pred1[x]) >> FRAC_BITS;
3470 if(v&(~255)) v= ~(v>>31);
3475 /* copy the regions where obmc[] = (uint8_t)256 */
3476 if(LOG2_OBMC_MAX == 8
3477 && (mb_x == 0 || mb_x == b_stride-1)
3478 && (mb_y == 0 || mb_y == b_height-1)){
3487 for(y=y0; y<y1; y++)
3488 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3492 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3493 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3494 /* FIXME cmps overlap but do not cover the wavelet's whole support.
3495 * So improving the score of one block is not strictly guaranteed
3496 * to improve the score of the whole frame, thus iterative motion
3497 * estimation does not always converge. */
3498 if(s->avctx->me_cmp == FF_CMP_W97)
3499 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3500 else if(s->avctx->me_cmp == FF_CMP_W53)
3501 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3505 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3506 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3511 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3520 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3522 if(mb_x == b_stride-2)
3523 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3525 return distortion + rate*penalty_factor;
3528 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3530 Plane *p= &s->plane[plane_index];
3531 const int block_size = MB_SIZE >> s->block_max_depth;
3532 const int block_w = plane_index ? block_size/2 : block_size;
3533 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3534 const int obmc_stride= plane_index ? block_size : 2*block_size;
3535 const int ref_stride= s->current_picture.linesize[plane_index];
3536 uint8_t *dst= s->current_picture.data[plane_index];
3537 uint8_t *src= s-> input_picture.data[plane_index];
3538 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
3539 // const has only been removed from zero_dst to suppress a warning
3540 static IDWTELEM zero_dst[4096]; //FIXME
3541 const int b_stride = s->b_width << s->block_max_depth;
3542 const int w= p->width;
3543 const int h= p->height;
3546 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3549 int mb_x2= mb_x + (i%3) - 1;
3550 int mb_y2= mb_y + (i/3) - 1;
3551 int x= block_w*mb_x2 + block_w/2;
3552 int y= block_w*mb_y2 + block_w/2;
3554 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3555 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3557 //FIXME find a cleaner/simpler way to skip the outside stuff
3558 for(y2= y; y2<0; y2++)
3559 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3560 for(y2= h; y2<y+block_w; y2++)
3561 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3563 for(y2= y; y2<y+block_w; y2++)
3564 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3567 for(y2= y; y2<y+block_w; y2++)
3568 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3571 assert(block_w== 8 || block_w==16);
3572 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3576 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3577 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3585 rate = get_block_bits(s, mb_x, mb_y, 2);
3586 for(i=merged?4:0; i<9; i++){
3587 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3588 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3591 return distortion + rate*penalty_factor;
3594 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3596 for(level=decomposition_count-1; level>=0; level--){
3598 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
3599 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
3604 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
3605 const int support = type==1 ? 3 : 5;
3609 for(level=decomposition_count-1; level>=0; level--){
3610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
3612 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3614 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3621 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3622 DWTCompose cs[MAX_DECOMPOSITIONS];
3624 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
3625 for(y=0; y<height; y+=4)
3626 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
3629 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3630 const int w= b->width;
3631 const int h= b->height;
3643 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3644 v= src[x + y*stride];
3647 t= src[x + (y-1)*stride];
3649 lt= src[x - 1 + (y-1)*stride];
3652 rt= src[x + 1 + (y-1)*stride];
3656 l= src[x - 1 + y*stride];
3658 if(orientation==1) ll= src[y + (x-2)*stride];
3659 else ll= src[x - 2 + y*stride];
3665 if(px<b->parent->width && py<b->parent->height)
3666 p= parent[px + py*2*stride];
3668 if(!(/*ll|*/l|lt|t|rt|p)){
3670 runs[run_index++]= run;
3678 max_index= run_index;
3679 runs[run_index++]= run;
3681 run= runs[run_index++];
3683 put_symbol2(&s->c, b->state[30], max_index, 0);
3684 if(run_index <= max_index)
3685 put_symbol2(&s->c, b->state[1], run, 3);
3688 if(s->c.bytestream_end - s->c.bytestream < w*40){
3689 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3694 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3695 v= src[x + y*stride];
3698 t= src[x + (y-1)*stride];
3700 lt= src[x - 1 + (y-1)*stride];
3703 rt= src[x + 1 + (y-1)*stride];
3707 l= src[x - 1 + y*stride];
3709 if(orientation==1) ll= src[y + (x-2)*stride];
3710 else ll= src[x - 2 + y*stride];
3716 if(px<b->parent->width && py<b->parent->height)
3717 p= parent[px + py*2*stride];
3719 if(/*ll|*/l|lt|t|rt|p){
3720 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3722 put_rac(&s->c, &b->state[0][context], !!v);
3725 run= runs[run_index++];
3727 if(run_index <= max_index)
3728 put_symbol2(&s->c, b->state[1], run, 3);
3736 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3737 int l2= 2*FFABS(l) + (l<0);
3738 int t2= 2*FFABS(t) + (t<0);
3740 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3741 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3749 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3750 // encode_subband_qtree(s, b, src, parent, stride, orientation);
3751 // encode_subband_z0run(s, b, src, parent, stride, orientation);
3752 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3753 // encode_subband_dzr(s, b, src, parent, stride, orientation);
3756 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3757 const int b_stride= s->b_width << s->block_max_depth;
3758 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3759 BlockNode backup= *block;
3760 int rd, index, value;
3762 assert(mb_x>=0 && mb_y>=0);
3763 assert(mb_x<b_stride);
3766 block->color[0] = p[0];
3767 block->color[1] = p[1];
3768 block->color[2] = p[2];
3769 block->type |= BLOCK_INTRA;
3771 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3772 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3773 if(s->me_cache[index] == value)
3775 s->me_cache[index]= value;
3779 block->type &= ~BLOCK_INTRA;
3782 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3794 /* special case for int[2] args we discard afterwards,
3795 * fixes compilation problem with gcc 2.95 */
3796 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3797 int p[2] = {p0, p1};
3798 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3801 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3802 const int b_stride= s->b_width << s->block_max_depth;
3803 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3804 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3805 int rd, index, value;
3807 assert(mb_x>=0 && mb_y>=0);
3808 assert(mb_x<b_stride);
3809 assert(((mb_x|mb_y)&1) == 0);
3811 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3812 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3813 if(s->me_cache[index] == value)
3815 s->me_cache[index]= value;
3820 block->type &= ~BLOCK_INTRA;
3821 block[1]= block[b_stride]= block[b_stride+1]= *block;
3823 rd= get_4block_rd(s, mb_x, mb_y, 0);
3830 block[0]= backup[0];
3831 block[1]= backup[1];
3832 block[b_stride]= backup[2];
3833 block[b_stride+1]= backup[3];
3838 static void iterative_me(SnowContext *s){
3839 int pass, mb_x, mb_y;
3840 const int b_width = s->b_width << s->block_max_depth;
3841 const int b_height= s->b_height << s->block_max_depth;
3842 const int b_stride= b_width;
3846 RangeCoder r = s->c;
3847 uint8_t state[sizeof(s->block_state)];
3848 memcpy(state, s->block_state, sizeof(s->block_state));
3849 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3850 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3851 encode_q_branch(s, 0, mb_x, mb_y);
3853 memcpy(s->block_state, state, sizeof(s->block_state));
3856 for(pass=0; pass<25; pass++){
3859 for(mb_y= 0; mb_y<b_height; mb_y++){
3860 for(mb_x= 0; mb_x<b_width; mb_x++){
3861 int dia_change, i, j, ref;
3862 int best_rd= INT_MAX, ref_rd;
3863 BlockNode backup, ref_b;
3864 const int index= mb_x + mb_y * b_stride;
3865 BlockNode *block= &s->block[index];
3866 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3867 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3868 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3869 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3870 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3871 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3872 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3873 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3874 const int b_w= (MB_SIZE >> s->block_max_depth);
3875 uint8_t obmc_edged[b_w*2][b_w*2];
3877 if(pass && (block->type & BLOCK_OPT))
3879 block->type |= BLOCK_OPT;
3883 if(!s->me_cache_generation)
3884 memset(s->me_cache, 0, sizeof(s->me_cache));
3885 s->me_cache_generation += 1<<22;
3887 //FIXME precalculate
3890 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3892 for(y=0; y<b_w*2; y++)
3893 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3894 if(mb_x==b_stride-1)
3895 for(y=0; y<b_w*2; y++)
3896 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3898 for(x=0; x<b_w*2; x++)
3899 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3900 for(y=1; y<b_w; y++)
3901 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3903 if(mb_y==b_height-1){
3904 for(x=0; x<b_w*2; x++)
3905 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3906 for(y=b_w; y<b_w*2-1; y++)
3907 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3911 //skip stuff outside the picture
3912 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3913 uint8_t *src= s-> input_picture.data[0];
3914 uint8_t *dst= s->current_picture.data[0];
3915 const int stride= s->current_picture.linesize[0];
3916 const int block_w= MB_SIZE >> s->block_max_depth;
3917 const int sx= block_w*mb_x - block_w/2;
3918 const int sy= block_w*mb_y - block_w/2;
3919 const int w= s->plane[0].width;
3920 const int h= s->plane[0].height;
3924 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3925 for(y=h; y<sy+block_w*2; y++)
3926 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3928 for(y=sy; y<sy+block_w*2; y++)
3929 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3931 if(sx+block_w*2 > w){
3932 for(y=sy; y<sy+block_w*2; y++)
3933 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3937 // intra(black) = neighbors' contribution to the current block
3939 color[i]= get_dc(s, mb_x, mb_y, i);
3941 // get previous score (cannot be cached due to OBMC)
3942 if(pass > 0 && (block->type&BLOCK_INTRA)){
3943 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3944 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3946 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3950 for(ref=0; ref < s->ref_frames; ref++){
3951 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3952 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3957 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3958 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3960 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3962 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3964 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3966 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3969 //FIXME avoid subpel interpolation / round to nearest integer
3972 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3974 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3975 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3976 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3977 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3983 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3986 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3988 //FIXME or try the standard 2 pass qpel or similar
3990 mvr[0][0]= block->mx;
3991 mvr[0][1]= block->my;
3992 if(ref_rd > best_rd){
4000 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
4001 //FIXME RD style color selection
4003 if(!same_block(block, &backup)){
4004 if(tb ) tb ->type &= ~BLOCK_OPT;
4005 if(lb ) lb ->type &= ~BLOCK_OPT;
4006 if(rb ) rb ->type &= ~BLOCK_OPT;
4007 if(bb ) bb ->type &= ~BLOCK_OPT;
4008 if(tlb) tlb->type &= ~BLOCK_OPT;
4009 if(trb) trb->type &= ~BLOCK_OPT;
4010 if(blb) blb->type &= ~BLOCK_OPT;
4011 if(brb) brb->type &= ~BLOCK_OPT;
4016 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
4021 if(s->block_max_depth == 1){
4023 for(mb_y= 0; mb_y<b_height; mb_y+=2){
4024 for(mb_x= 0; mb_x<b_width; mb_x+=2){
4026 int best_rd, init_rd;
4027 const int index= mb_x + mb_y * b_stride;
4030 b[0]= &s->block[index];
4032 b[2]= b[0]+b_stride;
4034 if(same_block(b[0], b[1]) &&
4035 same_block(b[0], b[2]) &&
4036 same_block(b[0], b[3]))
4039 if(!s->me_cache_generation)
4040 memset(s->me_cache, 0, sizeof(s->me_cache));
4041 s->me_cache_generation += 1<<22;
4043 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
4045 //FIXME more multiref search?
4046 check_4block_inter(s, mb_x, mb_y,
4047 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
4048 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
4051 if(!(b[i]->type&BLOCK_INTRA))
4052 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
4054 if(init_rd != best_rd)
4058 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
4062 static void encode_blocks(SnowContext *s, int search){
4067 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
4071 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
4072 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4076 if(s->avctx->me_method == ME_ITER || !search)
4077 encode_q_branch2(s, 0, x, y);
4079 encode_q_branch (s, 0, x, y);
4084 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
4085 const int w= b->width;
4086 const int h= b->height;
4087 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4088 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
4089 int x,y, thres1, thres2;
4091 if(s->qlog == LOSSLESS_QLOG){
4094 dst[x + y*stride]= src[x + y*stride];
4098 bias= bias ? 0 : (3*qmul)>>3;
4099 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
4105 int i= src[x + y*stride];
4107 if((unsigned)(i+thres1) > thres2){
4110 i/= qmul; //FIXME optimize
4111 dst[x + y*stride]= i;
4115 i/= qmul; //FIXME optimize
4116 dst[x + y*stride]= -i;
4119 dst[x + y*stride]= 0;
4125 int i= src[x + y*stride];
4127 if((unsigned)(i+thres1) > thres2){
4130 i= (i + bias) / qmul; //FIXME optimize
4131 dst[x + y*stride]= i;
4135 i= (i + bias) / qmul; //FIXME optimize
4136 dst[x + y*stride]= -i;
4139 dst[x + y*stride]= 0;
4145 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
4146 const int w= b->width;
4147 const int h= b->height;
4148 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4149 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4150 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
4153 if(s->qlog == LOSSLESS_QLOG) return;
4157 int i= src[x + y*stride];
4159 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
4161 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
4167 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4168 const int w= b->width;
4169 const int h= b->height;
4172 for(y=h-1; y>=0; y--){
4173 for(x=w-1; x>=0; x--){
4174 int i= x + y*stride;
4178 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4179 else src[i] -= src[i - 1];
4181 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4182 else src[i] -= src[i - 1];
4185 if(y) src[i] -= src[i - stride];
4191 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4192 const int w= b->width;
4193 const int h= b->height;
4198 int i= x + y*stride;
4202 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4203 else src[i] += src[i - 1];
4205 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4206 else src[i] += src[i - 1];
4209 if(y) src[i] += src[i - stride];
4215 static void encode_qlogs(SnowContext *s){
4216 int plane_index, level, orientation;
4218 for(plane_index=0; plane_index<2; plane_index++){
4219 for(level=0; level<s->spatial_decomposition_count; level++){
4220 for(orientation=level ? 1:0; orientation<4; orientation++){
4221 if(orientation==2) continue;
4222 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
4228 static void encode_header(SnowContext *s){
4232 memset(kstate, MID_STATE, sizeof(kstate));
4234 put_rac(&s->c, kstate, s->keyframe);
4235 if(s->keyframe || s->always_reset){
4237 s->last_spatial_decomposition_type=
4241 s->last_block_max_depth= 0;
4242 for(plane_index=0; plane_index<2; plane_index++){
4243 Plane *p= &s->plane[plane_index];
4246 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
4250 put_symbol(&s->c, s->header_state, s->version, 0);
4251 put_rac(&s->c, s->header_state, s->always_reset);
4252 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
4253 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
4254 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4255 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
4256 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
4257 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
4258 put_rac(&s->c, s->header_state, s->spatial_scalability);
4259 // put_rac(&s->c, s->header_state, s->rate_scalability);
4260 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
4267 for(plane_index=0; plane_index<2; plane_index++){
4268 Plane *p= &s->plane[plane_index];
4269 update_mc |= p->last_htaps != p->htaps;
4270 update_mc |= p->last_diag_mc != p->diag_mc;
4271 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4273 put_rac(&s->c, s->header_state, update_mc);
4275 for(plane_index=0; plane_index<2; plane_index++){
4276 Plane *p= &s->plane[plane_index];
4277 put_rac(&s->c, s->header_state, p->diag_mc);
4278 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
4279 for(i= p->htaps/2; i; i--)
4280 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
4283 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4284 put_rac(&s->c, s->header_state, 1);
4285 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4288 put_rac(&s->c, s->header_state, 0);
4291 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
4292 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
4293 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
4294 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
4295 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
4299 static void update_last_header_values(SnowContext *s){
4303 for(plane_index=0; plane_index<2; plane_index++){
4304 Plane *p= &s->plane[plane_index];
4305 p->last_diag_mc= p->diag_mc;
4306 p->last_htaps = p->htaps;
4307 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4311 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
4312 s->last_qlog = s->qlog;
4313 s->last_qbias = s->qbias;
4314 s->last_mv_scale = s->mv_scale;
4315 s->last_block_max_depth = s->block_max_depth;
4316 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
4319 static int qscale2qlog(int qscale){
4320 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
4321 + 61*QROOT/8; //<64 >60
4324 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
4326 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
4327 * FIXME we know exact mv bits at this point,
4328 * but ratecontrol isn't set up to include them. */
4329 uint32_t coef_sum= 0;
4330 int level, orientation, delta_qlog;
4332 for(level=0; level<s->spatial_decomposition_count; level++){
4333 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4334 SubBand *b= &s->plane[0].band[level][orientation];
4335 IDWTELEM *buf= b->ibuf;
4336 const int w= b->width;
4337 const int h= b->height;
4338 const int stride= b->stride;
4339 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
4340 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4341 const int qdiv= (1<<16)/qmul;
4343 //FIXME this is ugly
4346 buf[x+y*stride]= b->buf[x+y*stride];
4348 decorrelate(s, b, buf, stride, 1, 0);
4351 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
4355 /* ugly, ratecontrol just takes a sqrt again */
4356 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
4357 assert(coef_sum < INT_MAX);
4359 if(pict->pict_type == FF_I_TYPE){
4360 s->m.current_picture.mb_var_sum= coef_sum;
4361 s->m.current_picture.mc_mb_var_sum= 0;
4363 s->m.current_picture.mc_mb_var_sum= coef_sum;
4364 s->m.current_picture.mb_var_sum= 0;
4367 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
4368 if (pict->quality < 0)
4370 s->lambda= pict->quality * 3/2;
4371 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
4372 s->qlog+= delta_qlog;
4376 static void calculate_visual_weight(SnowContext *s, Plane *p){
4377 int width = p->width;
4378 int height= p->height;
4379 int level, orientation, x, y;
4381 for(level=0; level<s->spatial_decomposition_count; level++){
4382 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4383 SubBand *b= &p->band[level][orientation];
4384 IDWTELEM *ibuf= b->ibuf;
4387 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
4388 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
4389 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
4390 for(y=0; y<height; y++){
4391 for(x=0; x<width; x++){
4392 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
4397 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
4402 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4403 SnowContext *s = avctx->priv_data;
4404 RangeCoder * const c= &s->c;
4405 AVFrame *pict = data;
4406 const int width= s->avctx->width;
4407 const int height= s->avctx->height;
4408 int level, orientation, plane_index, i, y;
4409 uint8_t rc_header_bak[sizeof(s->header_state)];
4410 uint8_t rc_block_bak[sizeof(s->block_state)];
4412 ff_init_range_encoder(c, buf, buf_size);
4413 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4417 for(y=0; y<(height>>shift); y++)
4418 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4419 &pict->data[i][y * pict->linesize[i]],
4422 s->new_picture = *pict;
4424 s->m.picture_number= avctx->frame_number;
4425 if(avctx->flags&CODEC_FLAG_PASS2){
4427 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4428 s->keyframe= pict->pict_type==FF_I_TYPE;
4429 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4430 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4431 if (pict->quality < 0)
4435 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4437 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4440 if(s->pass1_rc && avctx->frame_number == 0)
4441 pict->quality= 2*FF_QP2LAMBDA;
4443 s->qlog= qscale2qlog(pict->quality);
4444 s->lambda = pict->quality * 3/2;
4446 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4447 s->qlog= LOSSLESS_QLOG;
4449 }//else keep previous frame's qlog until after motion estimation
4453 s->m.current_picture_ptr= &s->m.current_picture;
4454 if(pict->pict_type == FF_P_TYPE){
4455 int block_width = (width +15)>>4;
4456 int block_height= (height+15)>>4;
4457 int stride= s->current_picture.linesize[0];
4459 assert(s->current_picture.data[0]);
4460 assert(s->last_picture[0].data[0]);
4462 s->m.avctx= s->avctx;
4463 s->m.current_picture.data[0]= s->current_picture.data[0];
4464 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4465 s->m. new_picture.data[0]= s-> input_picture.data[0];
4466 s->m. last_picture_ptr= &s->m. last_picture;
4468 s->m. last_picture.linesize[0]=
4469 s->m. new_picture.linesize[0]=
4470 s->m.current_picture.linesize[0]= stride;
4471 s->m.uvlinesize= s->current_picture.linesize[1];
4473 s->m.height= height;
4474 s->m.mb_width = block_width;
4475 s->m.mb_height= block_height;
4476 s->m.mb_stride= s->m.mb_width+1;
4477 s->m.b8_stride= 2*s->m.mb_width+1;
4479 s->m.pict_type= pict->pict_type;
4480 s->m.me_method= s->avctx->me_method;
4481 s->m.me.scene_change_score=0;
4482 s->m.flags= s->avctx->flags;
4483 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4484 s->m.out_format= FMT_H263;
4485 s->m.unrestricted_mv= 1;
4487 s->m.lambda = s->lambda;
4488 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4489 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4491 s->m.dsp= s->dsp; //move
4497 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4498 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4503 if(pict->pict_type == FF_I_TYPE)
4504 s->spatial_decomposition_count= 5;
4506 s->spatial_decomposition_count= 5;
4508 s->m.pict_type = pict->pict_type;
4509 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4511 common_init_after_header(avctx);
4513 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4514 for(plane_index=0; plane_index<3; plane_index++){
4515 calculate_visual_weight(s, &s->plane[plane_index]);
4520 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4521 encode_blocks(s, 1);
4522 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4524 for(plane_index=0; plane_index<3; plane_index++){
4525 Plane *p= &s->plane[plane_index];
4529 // int bits= put_bits_count(&s->c.pb);
4531 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4533 if(pict->data[plane_index]) //FIXME gray hack
4536 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4539 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4542 && pict->pict_type == FF_P_TYPE
4543 && !(avctx->flags&CODEC_FLAG_PASS2)
4544 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4545 ff_init_range_encoder(c, buf, buf_size);
4546 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4547 pict->pict_type= FF_I_TYPE;
4549 s->current_picture.key_frame=1;
4553 if(s->qlog == LOSSLESS_QLOG){
4556 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4562 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4568 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4570 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4572 if(s->pass1_rc && plane_index==0){
4573 int delta_qlog = ratecontrol_1pass(s, pict);
4574 if (delta_qlog <= INT_MIN)
4577 //reordering qlog in the bitstream would eliminate this reset
4578 ff_init_range_encoder(c, buf, buf_size);
4579 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4580 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4582 encode_blocks(s, 0);
4586 for(level=0; level<s->spatial_decomposition_count; level++){
4587 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4588 SubBand *b= &p->band[level][orientation];
4591 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4593 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4594 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4595 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4597 correlate(s, b, b->ibuf, b->stride, 1, 0);
4601 for(level=0; level<s->spatial_decomposition_count; level++){
4602 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4603 SubBand *b= &p->band[level][orientation];
4605 dequantize(s, b, b->ibuf, b->stride);
4609 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4610 if(s->qlog == LOSSLESS_QLOG){
4613 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4617 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4620 if(pict->pict_type == FF_I_TYPE){
4623 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4624 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4628 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4629 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4632 if(s->avctx->flags&CODEC_FLAG_PSNR){
4635 if(pict->data[plane_index]) //FIXME gray hack
4638 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4642 s->avctx->error[plane_index] += error;
4643 s->current_picture.error[plane_index] = error;
4648 update_last_header_values(s);
4650 release_buffer(avctx);
4652 s->current_picture.coded_picture_number = avctx->frame_number;
4653 s->current_picture.pict_type = pict->pict_type;
4654 s->current_picture.quality = pict->quality;
4655 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4656 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4657 s->m.current_picture.display_picture_number =
4658 s->m.current_picture.coded_picture_number = avctx->frame_number;
4659 s->m.current_picture.quality = pict->quality;
4660 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4662 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4664 if(avctx->flags&CODEC_FLAG_PASS1)
4665 ff_write_pass1_stats(&s->m);
4666 s->m.last_pict_type = s->m.pict_type;
4667 avctx->frame_bits = s->m.frame_bits;
4668 avctx->mv_bits = s->m.mv_bits;
4669 avctx->misc_bits = s->m.misc_bits;
4670 avctx->p_tex_bits = s->m.p_tex_bits;
4674 return ff_rac_terminate(c);
4677 static av_cold int encode_end(AVCodecContext *avctx)
4679 SnowContext *s = avctx->priv_data;
4682 av_free(avctx->stats_out);
4687 AVCodec snow_encoder = {
4691 sizeof(SnowContext),
4695 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4705 #include "libavutil/lfg.h"
4710 int buffer[2][width*height];
4714 s.spatial_decomposition_count=6;
4715 s.spatial_decomposition_type=1;
4717 av_lfg_init(&prng, 1);
4719 printf("testing 5/3 DWT\n");
4720 for(i=0; i<width*height; i++)
4721 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4723 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4724 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4726 for(i=0; i<width*height; i++)
4727 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4729 printf("testing 9/7 DWT\n");
4730 s.spatial_decomposition_type=0;
4731 for(i=0; i<width*height; i++)
4732 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4734 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4735 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4737 for(i=0; i<width*height; i++)
4738 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4741 printf("testing AC coder\n");
4742 memset(s.header_state, 0, sizeof(s.header_state));
4743 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4744 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4746 for(i=-256; i<256; i++){
4747 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4749 ff_rac_terminate(&s.c);
4751 memset(s.header_state, 0, sizeof(s.header_state));
4752 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4753 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4755 for(i=-256; i<256; i++){
4757 j= get_symbol(&s.c, s.header_state, 1);
4758 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4762 int level, orientation, x, y;
4763 int64_t errors[8][4];
4766 memset(errors, 0, sizeof(errors));
4767 s.spatial_decomposition_count=3;
4768 s.spatial_decomposition_type=0;
4769 for(level=0; level<s.spatial_decomposition_count; level++){
4770 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4771 int w= width >> (s.spatial_decomposition_count-level);
4772 int h= height >> (s.spatial_decomposition_count-level);
4773 int stride= width << (s.spatial_decomposition_count-level);
4774 DWTELEM *buf= buffer[0];
4777 if(orientation&1) buf+=w;
4778 if(orientation>1) buf+=stride>>1;
4780 memset(buffer[0], 0, sizeof(int)*width*height);
4781 buf[w/2 + h/2*stride]= 256*256;
4782 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4783 for(y=0; y<height; y++){
4784 for(x=0; x<width; x++){
4785 int64_t d= buffer[0][x + y*width];
4787 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4789 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4791 error= (int)(sqrt(error)+0.5);
4792 errors[level][orientation]= error;
4793 if(g) g=av_gcd(g, error);
4797 printf("static int const visual_weight[][4]={\n");
4798 for(level=0; level<s.spatial_decomposition_count; level++){
4800 for(orientation=0; orientation<4; orientation++){
4801 printf("%8"PRId64",", errors[level][orientation]/g);
4808 int w= width >> (s.spatial_decomposition_count-level);
4809 //int h= height >> (s.spatial_decomposition_count-level);
4810 int stride= width << (s.spatial_decomposition_count-level);
4811 DWTELEM *buf= buffer[0];
4817 memset(buffer[0], 0, sizeof(int)*width*height);
4819 for(y=0; y<height; y++){
4820 for(x=0; x<width; x++){
4821 int tab[4]={0,2,3,1};
4822 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4825 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4829 buf[x + y*stride ]=169;
4830 buf[x + y*stride-w]=64;
4833 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4835 for(y=0; y<height; y++){
4836 for(x=0; x<width; x++){
4837 int64_t d= buffer[0][x + y*width];
4839 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4841 if(FFABS(height/2-y)<9) printf("\n");