2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
182 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
183 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
184 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
185 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
186 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
187 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
188 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
189 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
190 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
191 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
192 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
193 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
194 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
195 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
196 1, 8, 16, 32, 48, 72, 92,116,140,164,184,208,224,240,248,255,255,248,240,224,208,184,164,140,116, 92, 72, 48, 32, 16, 8, 1,
197 0, 4, 16, 32, 48, 68, 88,112,136,160,180,204,220,232,244,248,248,244,232,220,204,180,160,136,112, 88, 68, 48, 32, 16, 4, 0,
198 0, 8, 16, 28, 48, 64, 88,108,132,152,176,192,208,224,232,240,240,232,224,208,192,176,152,132,108, 88, 64, 48, 28, 16, 8, 0,
199 0, 4, 16, 28, 44, 60, 80,100,124,144,164,180,196,208,220,224,224,220,208,196,180,164,144,124,100, 80, 60, 44, 28, 16, 4, 0,
200 0, 4, 12, 24, 40, 56, 76, 92,112,132,152,168,180,192,204,208,208,204,192,180,168,152,132,112, 92, 76, 56, 40, 24, 12, 4, 0,
201 0, 4, 12, 24, 36, 48, 68, 84,100,120,136,152,164,176,180,184,184,180,176,164,152,136,120,100, 84, 68, 48, 36, 24, 12, 4, 0,
202 0, 4, 12, 20, 32, 44, 60, 76, 88,104,120,132,144,152,160,164,164,160,152,144,132,120,104, 88, 76, 60, 44, 32, 20, 12, 4, 0,
203 0, 4, 8, 16, 28, 40, 52, 64, 76, 88,100,112,124,132,136,140,140,136,132,124,112,100, 88, 76, 64, 52, 40, 28, 16, 8, 4, 0,
204 0, 4, 8, 16, 24, 32, 40, 52, 64, 76, 84, 92,100,108,112,116,116,112,108,100, 92, 84, 76, 64, 52, 40, 32, 24, 16, 8, 4, 0,
205 0, 4, 4, 12, 16, 24, 32, 40, 52, 60, 68, 76, 80, 88, 88, 92, 92, 88, 88, 80, 76, 68, 60, 52, 40, 32, 24, 16, 12, 4, 4, 0,
206 0, 4, 4, 8, 12, 20, 24, 32, 40, 44, 52, 56, 60, 64, 68, 72, 72, 68, 64, 60, 56, 52, 44, 40, 32, 24, 20, 12, 8, 4, 4, 0,
207 0, 0, 4, 8, 8, 12, 16, 24, 28, 32, 36, 40, 44, 48, 48, 48, 48, 48, 48, 44, 40, 36, 32, 28, 24, 16, 12, 8, 8, 4, 0, 0,
208 0, 0, 4, 4, 8, 8, 12, 16, 16, 20, 24, 24, 28, 28, 32, 32, 32, 32, 28, 28, 24, 24, 20, 16, 16, 12, 8, 8, 4, 4, 0, 0,
209 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 16, 16, 16, 16, 16, 16, 16, 16, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
210 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
216 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
217 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
218 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
219 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
220 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
221 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
222 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
223 4, 24, 60,104,152,196,228,248,248,228,196,152,104, 60, 24, 4,
224 4, 20, 52, 96,136,180,212,228,228,212,180,136, 96, 52, 20, 4,
225 0, 20, 44, 80,116,152,180,196,196,180,152,116, 80, 44, 20, 0,
226 0, 16, 36, 60, 92,116,136,152,152,136,116, 92, 60, 36, 16, 0,
227 0, 8, 24, 44, 60, 80, 96,104,104, 96, 80, 60, 44, 24, 8, 0,
228 0, 4, 16, 24, 36, 44, 52, 60, 60, 52, 44, 36, 24, 16, 4, 0,
229 0, 4, 4, 8, 16, 20, 20, 24, 24, 20, 20, 16, 8, 4, 4, 0,
230 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
293 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
294 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
295 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
296 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
297 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
298 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
299 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
300 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
301 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
302 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
303 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
304 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
305 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
306 1, 4, 16, 28, 48, 68, 92,116,140,164,188,208,228,240,252,255,255,252,240,228,208,188,164,140,116, 92, 68, 48, 28, 16, 4, 1,
307 0, 4, 16, 28, 48, 68, 88,112,136,160,184,204,224,236,244,252,252,244,236,224,204,184,160,136,112, 88, 68, 48, 28, 16, 4, 0,
308 0, 4, 12, 28, 44, 64, 84,108,132,156,176,196,212,228,236,240,240,236,228,212,196,176,156,132,108, 84, 64, 44, 28, 12, 4, 0,
309 0, 4, 12, 24, 44, 60, 80,104,124,148,168,184,200,212,224,228,228,224,212,200,184,168,148,124,104, 80, 60, 44, 24, 12, 4, 0,
310 0, 4, 12, 24, 36, 56, 76, 96,116,136,152,172,184,196,204,208,208,204,196,184,172,152,136,116, 96, 76, 56, 36, 24, 12, 4, 0,
311 0, 4, 12, 20, 36, 48, 68, 84,104,120,140,152,168,176,184,188,188,184,176,168,152,140,120,104, 84, 68, 48, 36, 20, 12, 4, 0,
312 0, 4, 12, 20, 28, 44, 60, 76, 92,104,120,136,148,156,160,164,164,160,156,148,136,120,104, 92, 76, 60, 44, 28, 20, 12, 4, 0,
313 0, 4, 8, 16, 24, 36, 48, 64, 76, 92,104,116,124,132,136,140,140,136,132,124,116,104, 92, 76, 64, 48, 36, 24, 16, 8, 4, 0,
314 0, 4, 8, 12, 20, 32, 40, 52, 64, 76, 84, 96,104,108,112,116,116,112,108,104, 96, 84, 76, 64, 52, 40, 32, 20, 12, 8, 4, 0,
315 0, 4, 4, 8, 16, 24, 32, 40, 48, 60, 68, 76, 80, 84, 88, 92, 92, 88, 84, 80, 76, 68, 60, 48, 40, 32, 24, 16, 8, 4, 4, 0,
316 0, 0, 4, 8, 12, 20, 24, 32, 36, 44, 48, 56, 60, 64, 68, 68, 68, 68, 64, 60, 56, 48, 44, 36, 32, 24, 20, 12, 8, 4, 0, 0,
317 0, 0, 4, 4, 8, 12, 16, 20, 24, 28, 36, 40, 44, 44, 48, 48, 48, 48, 44, 44, 40, 36, 28, 24, 20, 16, 12, 8, 4, 4, 0, 0,
318 0, 0, 4, 4, 4, 8, 8, 12, 16, 20, 20, 24, 28, 28, 28, 28, 28, 28, 28, 28, 24, 20, 20, 16, 12, 8, 8, 4, 4, 4, 0, 0,
319 0, 0, 0, 4, 4, 4, 4, 8, 8, 12, 12, 12, 12, 16, 16, 16, 16, 16, 16, 12, 12, 12, 12, 8, 8, 4, 4, 4, 4, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 4, 4, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
326 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
327 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
328 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
329 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
330 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
331 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
332 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
333 0, 20, 56,104,152,196,232,252,252,232,196,152,104, 56, 20, 0,
334 4, 20, 52, 96,140,184,216,232,232,216,184,140, 96, 52, 20, 4,
335 4, 16, 44, 80,120,156,184,196,196,184,156,120, 80, 44, 16, 4,
336 0, 12, 32, 64, 92,120,140,152,152,140,120, 92, 64, 32, 12, 0,
337 0, 8, 24, 40, 60, 80, 96,104,104, 96, 80, 60, 40, 24, 8, 0,
338 0, 4, 12, 24, 32, 44, 52, 56, 56, 52, 44, 32, 24, 12, 4, 0,
339 0, 0, 4, 8, 12, 16, 20, 20, 20, 20, 16, 12, 8, 4, 0, 0,
340 0, 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t * const obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 #define ENCODER_EXTRA_BITS 4
400 typedef struct x_and_coeff{
405 typedef struct SubBand{
410 int qlog; ///< log(qscale)/log[2^(1/6)]
415 int stride_line; ///< Stride measured in lines, not pixels.
416 x_and_coeff * x_coeff;
417 struct SubBand *parent;
418 uint8_t state[/*7*2*/ 7 + 512][32];
421 typedef struct Plane{
424 SubBand band[MAX_DECOMPOSITIONS][4];
427 int8_t hcoeff[HTAPS_MAX/2];
432 int8_t last_hcoeff[HTAPS_MAX/2];
436 typedef struct SnowContext{
438 AVCodecContext *avctx;
442 AVFrame input_picture; ///< new_picture with the internal linesizes
443 AVFrame current_picture;
444 AVFrame last_picture[MAX_REF_FRAMES];
445 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
446 AVFrame mconly_picture;
447 // uint8_t q_context[16];
448 uint8_t header_state[32];
449 uint8_t block_state[128 + 32*128];
453 int spatial_decomposition_type;
454 int last_spatial_decomposition_type;
455 int temporal_decomposition_type;
456 int spatial_decomposition_count;
457 int last_spatial_decomposition_count;
458 int temporal_decomposition_count;
461 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
462 uint32_t *ref_scores[MAX_REF_FRAMES];
463 DWTELEM *spatial_dwt_buffer;
464 IDWTELEM *spatial_idwt_buffer;
468 int spatial_scalability;
478 #define QBIAS_SHIFT 3
482 int last_block_max_depth;
483 Plane plane[MAX_PLANES];
485 #define ME_CACHE_SIZE 1024
486 int me_cache[ME_CACHE_SIZE];
487 int me_cache_generation;
490 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to eventually make the motion estimation independent of MpegEncContext, so this will be removed then (FIXME/XXX)
503 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
504 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
506 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
510 buf->base_buffer = base_buffer;
511 buf->line_count = line_count;
512 buf->line_width = line_width;
513 buf->data_count = max_allocated_lines;
514 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
515 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
517 for(i = 0; i < max_allocated_lines; i++){
518 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
521 buf->data_stack_top = max_allocated_lines - 1;
524 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
528 assert(buf->data_stack_top >= 0);
529 // assert(!buf->line[line]);
531 return buf->line[line];
533 buffer = buf->data_stack[buf->data_stack_top];
534 buf->data_stack_top--;
535 buf->line[line] = buffer;
540 static void slice_buffer_release(slice_buffer * buf, int line)
544 assert(line >= 0 && line < buf->line_count);
545 assert(buf->line[line]);
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
553 static void slice_buffer_flush(slice_buffer * buf)
556 for(i = 0; i < buf->line_count; i++){
558 slice_buffer_release(buf, i);
562 static void slice_buffer_destroy(slice_buffer * buf)
565 slice_buffer_flush(buf);
567 for(i = buf->data_count - 1; i >= 0; i--){
568 av_freep(&buf->data_stack[i]);
570 av_freep(&buf->data_stack);
571 av_freep(&buf->line);
575 // Avoid a name clash on SGI IRIX
578 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
579 static uint8_t qexp[QROOT];
581 static inline int mirror(int v, int m){
582 while((unsigned)v > (unsigned)m){
589 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
593 const int a= FFABS(v);
594 const int e= av_log2(a);
596 const int el= FFMIN(e, 10);
597 put_rac(c, state+0, 0);
600 put_rac(c, state+1+i, 1); //1..10
603 put_rac(c, state+1+9, 1); //1..10
605 put_rac(c, state+1+FFMIN(i,9), 0);
607 for(i=e-1; i>=el; i--){
608 put_rac(c, state+22+9, (a>>i)&1); //22..31
611 put_rac(c, state+22+i, (a>>i)&1); //22..31
615 put_rac(c, state+11 + el, v < 0); //11..21
618 put_rac(c, state+0, 0);
621 put_rac(c, state+1+i, 1); //1..10
623 put_rac(c, state+1+i, 0);
625 for(i=e-1; i>=0; i--){
626 put_rac(c, state+22+i, (a>>i)&1); //22..31
630 put_rac(c, state+11 + e, v < 0); //11..21
633 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
635 put_rac(c, state+1+9, 0);
637 for(i=e-1; i>=0; i--){
638 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
642 put_rac(c, state+11 + 10, v < 0); //11..21
646 put_rac(c, state+0, 1);
650 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
651 if(get_rac(c, state+0))
656 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
661 for(i=e-1; i>=0; i--){
662 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
665 e= -(is_signed && get_rac(c, state+11 + FFMIN(e,10))); //11..21
670 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
672 int r= log2>=0 ? 1<<log2 : 1;
678 put_rac(c, state+4+log2, 1);
683 put_rac(c, state+4+log2, 0);
685 for(i=log2-1; i>=0; i--){
686 put_rac(c, state+31-i, (v>>i)&1);
690 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
692 int r= log2>=0 ? 1<<log2 : 1;
697 while(get_rac(c, state+4+log2)){
703 for(i=log2-1; i>=0; i--){
704 v+= get_rac(c, state+31-i)<<i;
710 static av_always_inline void
711 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
712 int dst_step, int src_step, int ref_step,
713 int width, int mul, int add, int shift,
714 int highpass, int inverse){
715 const int mirror_left= !highpass;
716 const int mirror_right= (width&1) ^ highpass;
717 const int w= (width>>1) - 1 + (highpass & width);
720 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
722 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
729 LIFT(src[i*src_step],
730 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
736 LIFT(src[w*src_step],
737 ((mul*2*ref[w*ref_step]+add)>>shift),
742 static av_always_inline void
743 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
744 int dst_step, int src_step, int ref_step,
745 int width, int mul, int add, int shift,
746 int highpass, int inverse){
747 const int mirror_left= !highpass;
748 const int mirror_right= (width&1) ^ highpass;
749 const int w= (width>>1) - 1 + (highpass & width);
752 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
754 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
761 LIFT(src[i*src_step],
762 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
768 LIFT(src[w*src_step],
769 ((mul*2*ref[w*ref_step]+add)>>shift),
775 static av_always_inline void
776 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
777 int dst_step, int src_step, int ref_step,
778 int width, int mul, int add, int shift,
779 int highpass, int inverse){
780 const int mirror_left= !highpass;
781 const int mirror_right= (width&1) ^ highpass;
782 const int w= (width>>1) - 1 + (highpass & width);
786 #define LIFTS(src, ref, inv) \
788 (src) + (((ref) + 4*(src))>>shift): \
789 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
791 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
798 LIFTS(src[i*src_step],
799 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
805 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
808 static av_always_inline void
809 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
810 int dst_step, int src_step, int ref_step,
811 int width, int mul, int add, int shift,
812 int highpass, int inverse){
813 const int mirror_left= !highpass;
814 const int mirror_right= (width&1) ^ highpass;
815 const int w= (width>>1) - 1 + (highpass & width);
819 #define LIFTS(src, ref, inv) \
821 (src) + (((ref) + 4*(src))>>shift): \
822 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
824 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
831 LIFTS(src[i*src_step],
832 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
838 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
843 static void horizontal_decompose53i(DWTELEM *b, int width){
845 const int width2= width>>1;
847 const int w2= (width+1)>>1;
849 for(x=0; x<width2; x++){
851 temp[x+w2]= b[2*x + 1];
865 for(x=1; x+1<width2; x+=2){
869 A2 += (A1 + A3 + 2)>>2;
873 A1= temp[x+1+width2];
876 A4 += (A1 + A3 + 2)>>2;
882 A2 += (A1 + A3 + 2)>>2;
887 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
888 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
892 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
895 for(i=0; i<width; i++){
896 b1[i] -= (b0[i] + b2[i])>>1;
900 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
903 for(i=0; i<width; i++){
904 b1[i] += (b0[i] + b2[i] + 2)>>2;
908 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
910 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
911 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
913 for(y=-2; y<height; y+=2){
914 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
915 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
917 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
918 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
920 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
921 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
928 static void horizontal_decompose97i(DWTELEM *b, int width){
930 const int w2= (width+1)>>1;
932 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
933 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
934 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
935 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
939 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
942 for(i=0; i<width; i++){
943 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
947 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
950 for(i=0; i<width; i++){
951 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
955 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
958 for(i=0; i<width; i++){
960 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
962 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
967 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
970 for(i=0; i<width; i++){
971 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
975 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
977 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
978 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
979 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
980 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
982 for(y=-4; y<height; y+=2){
983 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
984 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
986 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
987 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
989 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
990 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
991 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
992 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1001 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1004 for(level=0; level<decomposition_count; level++){
1006 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1007 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1012 static void horizontal_compose53i(IDWTELEM *b, int width){
1013 IDWTELEM temp[width];
1014 const int width2= width>>1;
1015 const int w2= (width+1)>>1;
1018 for(x=0; x<width2; x++){
1020 temp[2*x + 1]= b[x+w2];
1025 b[0] = temp[0] - ((temp[1]+1)>>1);
1026 for(x=2; x<width-1; x+=2){
1027 b[x ] = temp[x ] - ((temp[x-1] + temp[x+1]+2)>>2);
1028 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1031 b[x ] = temp[x ] - ((temp[x-1]+1)>>1);
1032 b[x-1] = temp[x-1] + ((b [x-2] + b [x ]+1)>>1);
1034 b[x-1] = temp[x-1] + b[x-2];
1037 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1040 for(i=0; i<width; i++){
1041 b1[i] += (b0[i] + b2[i])>>1;
1045 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1048 for(i=0; i<width; i++){
1049 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1053 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1054 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1055 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1059 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1060 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1061 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1065 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1068 IDWTELEM *b0= cs->b0;
1069 IDWTELEM *b1= cs->b1;
1070 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1071 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1073 if(y+1<(unsigned)height && y<(unsigned)height){
1076 for(x=0; x<width; x++){
1077 b2[x] -= (b1[x] + b3[x] + 2)>>2;
1078 b1[x] += (b0[x] + b2[x])>>1;
1081 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1082 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1085 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1086 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1093 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1095 IDWTELEM *b0= cs->b0;
1096 IDWTELEM *b1= cs->b1;
1097 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1098 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1100 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1101 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1103 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1104 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1111 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1113 spatial_compose53i_init(&cs, buffer, height, stride);
1114 while(cs.y <= height)
1115 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1119 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1120 IDWTELEM temp[width];
1121 const int w2= (width+1)>>1;
1123 #if 0 //maybe more understadable but slower
1124 inv_lift (temp , b , b +w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1125 inv_lift (temp+1 , b +w2, temp , 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
1127 inv_liftS(b , temp , temp+1 , 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
1128 inv_lift (b+1 , temp+1 , b , 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
1131 temp[0] = b[0] - ((3*b[w2]+2)>>2);
1132 for(x=1; x<(width>>1); x++){
1133 temp[2*x ] = b[x ] - ((3*(b [x+w2-1] + b[x+w2])+4)>>3);
1134 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1137 temp[2*x ] = b[x ] - ((3*b [x+w2-1]+2)>>2);
1138 temp[2*x-1] = b[x+w2-1] - temp[2*x-2] - temp[2*x];
1140 temp[2*x-1] = b[x+w2-1] - 2*temp[2*x-2];
1142 b[0] = temp[0] + ((2*temp[0] + temp[1]+4)>>3);
1143 for(x=2; x<width-1; x+=2){
1144 b[x ] = temp[x ] + ((4*temp[x ] + temp[x-1] + temp[x+1]+8)>>4);
1145 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1148 b[x ] = temp[x ] + ((2*temp[x ] + temp[x-1]+4)>>3);
1149 b[x-1] = temp[x-1] + ((3*(b [x-2] + b [x ] ))>>1);
1151 b[x-1] = temp[x-1] + 3*b [x-2];
1155 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1158 for(i=0; i<width; i++){
1159 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1163 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1166 for(i=0; i<width; i++){
1167 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1171 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1174 for(i=0; i<width; i++){
1176 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1178 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1183 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1186 for(i=0; i<width; i++){
1187 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1191 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1194 for(i=0; i<width; i++){
1195 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1196 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1198 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1200 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1202 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1206 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer * sb, int height, int stride_line){
1207 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1208 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1209 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1210 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1214 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height, int stride){
1215 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1216 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1217 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1218 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1222 static void spatial_compose97i_dy_buffered(DSPContext *dsp, DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line){
1225 IDWTELEM *b0= cs->b0;
1226 IDWTELEM *b1= cs->b1;
1227 IDWTELEM *b2= cs->b2;
1228 IDWTELEM *b3= cs->b3;
1229 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1230 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1232 if(y>0 && y+4<height){
1233 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1235 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1236 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1237 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1238 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1241 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1242 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1251 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride){
1253 IDWTELEM *b0= cs->b0;
1254 IDWTELEM *b1= cs->b1;
1255 IDWTELEM *b2= cs->b2;
1256 IDWTELEM *b3= cs->b3;
1257 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1258 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1260 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1261 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1262 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1263 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1265 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1266 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1275 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1277 spatial_compose97i_init(&cs, buffer, height, stride);
1278 while(cs.y <= height)
1279 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1282 static void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1284 for(level=decomposition_count-1; level>=0; level--){
1286 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1287 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1292 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, DWTCompose *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1293 const int support = type==1 ? 3 : 5;
1297 for(level=decomposition_count-1; level>=0; level--){
1298 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1300 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1302 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1309 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1310 const int w= b->width;
1311 const int h= b->height;
1315 x_and_coeff *xc= b->x_coeff;
1316 x_and_coeff *prev_xc= NULL;
1317 x_and_coeff *prev2_xc= xc;
1318 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1319 x_and_coeff *prev_parent_xc= parent_xc;
1321 runs= get_symbol2(&s->c, b->state[30], 0);
1322 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1327 int lt=0, t=0, rt=0;
1329 if(y && prev_xc->x == 0){
1341 if(prev_xc->x == x + 1)
1347 if(x>>1 > parent_xc->x){
1350 if(x>>1 == parent_xc->x){
1351 p= parent_xc->coeff;
1354 if(/*ll|*/l|lt|t|rt|p){
1355 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1357 v=get_rac(&s->c, &b->state[0][context]);
1359 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1360 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1367 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1369 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1370 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1379 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1380 else max_run= FFMIN(run, w-x-1);
1382 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1388 (xc++)->x= w+1; //end marker
1394 while(parent_xc->x != parent->width+1)
1397 prev_parent_xc= parent_xc;
1399 parent_xc= prev_parent_xc;
1404 (xc++)->x= w+1; //end marker
1407 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1408 const int w= b->width;
1410 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1411 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1412 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1415 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1420 /* If we are on the second or later slice, restore our index. */
1422 new_index = save_state[0];
1425 for(y=start_y; y<h; y++){
1428 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1429 memset(line, 0, b->width*sizeof(IDWTELEM));
1430 v = b->x_coeff[new_index].coeff;
1431 x = b->x_coeff[new_index++].x;
1433 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1434 register int u= -(v&1);
1435 line[x] = (t^u) - u;
1437 v = b->x_coeff[new_index].coeff;
1438 x = b->x_coeff[new_index++].x;
1442 /* Save our variables for the next slice. */
1443 save_state[0] = new_index;
1448 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1449 int plane_index, level, orientation;
1451 for(plane_index=0; plane_index<3; plane_index++){
1452 for(level=0; level<MAX_DECOMPOSITIONS; level++){
1453 for(orientation=level ? 1:0; orientation<4; orientation++){
1454 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1458 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1459 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1462 static int alloc_blocks(SnowContext *s){
1463 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1464 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1470 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1474 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1475 uint8_t *bytestream= d->bytestream;
1476 uint8_t *bytestream_start= d->bytestream_start;
1478 d->bytestream= bytestream;
1479 d->bytestream_start= bytestream_start;
1482 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1483 const int w= s->b_width << s->block_max_depth;
1484 const int rem_depth= s->block_max_depth - level;
1485 const int index= (x + y*w) << rem_depth;
1486 const int block_w= 1<<rem_depth;
1499 for(j=0; j<block_w; j++){
1500 for(i=0; i<block_w; i++){
1501 s->block[index + i + j*w]= block;
1506 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1507 const int offset[3]= {
1509 ((y*c->uvstride + x)>>1),
1510 ((y*c->uvstride + x)>>1),
1514 c->src[0][i]= src [i];
1515 c->ref[0][i]= ref [i] + offset[i];
1520 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1521 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1522 if(s->ref_frames == 1){
1523 *mx = mid_pred(left->mx, top->mx, tr->mx);
1524 *my = mid_pred(left->my, top->my, tr->my);
1526 const int *scale = scale_mv_ref[ref];
1527 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1528 (top ->mx * scale[top ->ref] + 128) >>8,
1529 (tr ->mx * scale[tr ->ref] + 128) >>8);
1530 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1531 (top ->my * scale[top ->ref] + 128) >>8,
1532 (tr ->my * scale[tr ->ref] + 128) >>8);
1536 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
1537 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
1538 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
1540 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
1544 static void decode_q_branch(SnowContext *s, int level, int x, int y){
1545 const int w= s->b_width << s->block_max_depth;
1546 const int rem_depth= s->block_max_depth - level;
1547 const int index= (x + y*w) << rem_depth;
1548 int trx= (x+1)<<rem_depth;
1549 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1550 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1551 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1552 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1553 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1556 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
1560 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
1562 int l = left->color[0];
1563 int cb= left->color[1];
1564 int cr= left->color[2];
1566 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1567 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
1568 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
1570 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
1573 pred_mv(s, &mx, &my, 0, left, top, tr);
1574 l += get_symbol(&s->c, &s->block_state[32], 1);
1575 cb+= get_symbol(&s->c, &s->block_state[64], 1);
1576 cr+= get_symbol(&s->c, &s->block_state[96], 1);
1578 if(s->ref_frames > 1)
1579 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
1580 pred_mv(s, &mx, &my, ref, left, top, tr);
1581 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
1582 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
1584 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
1586 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
1587 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
1588 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
1589 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
1593 static void decode_blocks(SnowContext *s){
1600 decode_q_branch(s, 0, x, y);
1605 static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
1606 static const uint8_t weight[64]={
1617 static const uint8_t brane[256]={
1618 0x00,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x11,0x12,0x12,0x12,0x12,0x12,0x12,0x12,
1619 0x04,0x05,0xcc,0xcc,0xcc,0xcc,0xcc,0x41,0x15,0x16,0xcc,0xcc,0xcc,0xcc,0xcc,0x52,
1620 0x04,0xcc,0x05,0xcc,0xcc,0xcc,0x41,0xcc,0x15,0xcc,0x16,0xcc,0xcc,0xcc,0x52,0xcc,
1621 0x04,0xcc,0xcc,0x05,0xcc,0x41,0xcc,0xcc,0x15,0xcc,0xcc,0x16,0xcc,0x52,0xcc,0xcc,
1622 0x04,0xcc,0xcc,0xcc,0x41,0xcc,0xcc,0xcc,0x15,0xcc,0xcc,0xcc,0x16,0xcc,0xcc,0xcc,
1623 0x04,0xcc,0xcc,0x41,0xcc,0x05,0xcc,0xcc,0x15,0xcc,0xcc,0x52,0xcc,0x16,0xcc,0xcc,
1624 0x04,0xcc,0x41,0xcc,0xcc,0xcc,0x05,0xcc,0x15,0xcc,0x52,0xcc,0xcc,0xcc,0x16,0xcc,
1625 0x04,0x41,0xcc,0xcc,0xcc,0xcc,0xcc,0x05,0x15,0x52,0xcc,0xcc,0xcc,0xcc,0xcc,0x16,
1626 0x44,0x45,0x45,0x45,0x45,0x45,0x45,0x45,0x55,0x56,0x56,0x56,0x56,0x56,0x56,0x56,
1627 0x48,0x49,0xcc,0xcc,0xcc,0xcc,0xcc,0x85,0x59,0x5A,0xcc,0xcc,0xcc,0xcc,0xcc,0x96,
1628 0x48,0xcc,0x49,0xcc,0xcc,0xcc,0x85,0xcc,0x59,0xcc,0x5A,0xcc,0xcc,0xcc,0x96,0xcc,
1629 0x48,0xcc,0xcc,0x49,0xcc,0x85,0xcc,0xcc,0x59,0xcc,0xcc,0x5A,0xcc,0x96,0xcc,0xcc,
1630 0x48,0xcc,0xcc,0xcc,0x49,0xcc,0xcc,0xcc,0x59,0xcc,0xcc,0xcc,0x96,0xcc,0xcc,0xcc,
1631 0x48,0xcc,0xcc,0x85,0xcc,0x49,0xcc,0xcc,0x59,0xcc,0xcc,0x96,0xcc,0x5A,0xcc,0xcc,
1632 0x48,0xcc,0x85,0xcc,0xcc,0xcc,0x49,0xcc,0x59,0xcc,0x96,0xcc,0xcc,0xcc,0x5A,0xcc,
1633 0x48,0x85,0xcc,0xcc,0xcc,0xcc,0xcc,0x49,0x59,0x96,0xcc,0xcc,0xcc,0xcc,0xcc,0x5A,
1636 static const uint8_t needs[16]={
1644 int16_t tmpIt [64*(32+HTAPS_MAX)];
1645 uint8_t tmp2t[3][stride*(32+HTAPS_MAX)];
1646 int16_t *tmpI= tmpIt;
1647 uint8_t *tmp2= tmp2t[0];
1648 const uint8_t *hpel[11];
1649 assert(dx<16 && dy<16);
1650 r= brane[dx + 16*dy]&15;
1651 l= brane[dx + 16*dy]>>4;
1653 b= needs[l] | needs[r];
1654 if(p && !p->diag_mc)
1658 for(y=0; y < b_h+HTAPS_MAX-1; y++){
1659 for(x=0; x < b_w; x++){
1660 int a_1=src[x + HTAPS_MAX/2-4];
1661 int a0= src[x + HTAPS_MAX/2-3];
1662 int a1= src[x + HTAPS_MAX/2-2];
1663 int a2= src[x + HTAPS_MAX/2-1];
1664 int a3= src[x + HTAPS_MAX/2+0];
1665 int a4= src[x + HTAPS_MAX/2+1];
1666 int a5= src[x + HTAPS_MAX/2+2];
1667 int a6= src[x + HTAPS_MAX/2+3];
1669 if(!p || p->fast_mc){
1670 am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
1674 am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
1679 if(am&(~255)) am= ~(am>>31);
1688 src += HTAPS_MAX/2 - 1;
1692 for(y=0; y < b_h; y++){
1693 for(x=0; x < b_w+1; x++){
1694 int a_1=src[x + (HTAPS_MAX/2-4)*stride];
1695 int a0= src[x + (HTAPS_MAX/2-3)*stride];
1696 int a1= src[x + (HTAPS_MAX/2-2)*stride];
1697 int a2= src[x + (HTAPS_MAX/2-1)*stride];
1698 int a3= src[x + (HTAPS_MAX/2+0)*stride];
1699 int a4= src[x + (HTAPS_MAX/2+1)*stride];
1700 int a5= src[x + (HTAPS_MAX/2+2)*stride];
1701 int a6= src[x + (HTAPS_MAX/2+3)*stride];
1703 if(!p || p->fast_mc)
1704 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
1706 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
1708 if(am&(~255)) am= ~(am>>31);
1716 src += stride*(HTAPS_MAX/2 - 1);
1720 for(y=0; y < b_h; y++){
1721 for(x=0; x < b_w; x++){
1722 int a_1=tmpI[x + (HTAPS_MAX/2-4)*64];
1723 int a0= tmpI[x + (HTAPS_MAX/2-3)*64];
1724 int a1= tmpI[x + (HTAPS_MAX/2-2)*64];
1725 int a2= tmpI[x + (HTAPS_MAX/2-1)*64];
1726 int a3= tmpI[x + (HTAPS_MAX/2+0)*64];
1727 int a4= tmpI[x + (HTAPS_MAX/2+1)*64];
1728 int a5= tmpI[x + (HTAPS_MAX/2+2)*64];
1729 int a6= tmpI[x + (HTAPS_MAX/2+3)*64];
1731 if(!p || p->fast_mc)
1732 am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
1734 am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
1735 if(am&(~255)) am= ~(am>>31);
1744 hpel[ 1]= tmp2t[0] + stride*(HTAPS_MAX/2-1);
1749 hpel[ 6]= tmp2t[1] + 1;
1751 hpel[ 8]= src + stride;
1752 hpel[ 9]= hpel[1] + stride;
1753 hpel[10]= hpel[8] + 1;
1756 const uint8_t *src1= hpel[dx/8 + dy/8*4 ];
1757 const uint8_t *src2= hpel[dx/8 + dy/8*4+1];
1758 const uint8_t *src3= hpel[dx/8 + dy/8*4+4];
1759 const uint8_t *src4= hpel[dx/8 + dy/8*4+5];
1762 for(y=0; y < b_h; y++){
1763 for(x=0; x < b_w; x++){
1764 dst[x]= ((8-dx)*(8-dy)*src1[x] + dx*(8-dy)*src2[x]+
1765 (8-dx)* dy *src3[x] + dx* dy *src4[x]+32)>>6;
1774 const uint8_t *src1= hpel[l];
1775 const uint8_t *src2= hpel[r];
1776 int a= weight[((dx&7) + (8*(dy&7)))];
1778 for(y=0; y < b_h; y++){
1779 for(x=0; x < b_w; x++){
1780 dst[x]= (a*src1[x] + b*src2[x] + 4)>>3;
1789 #define mca(dx,dy,b_w)\
1790 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
1791 uint8_t tmp[stride*(b_w+HTAPS_MAX-1)];\
1793 mc_block(NULL, dst, src-(HTAPS_MAX/2-1)-(HTAPS_MAX/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
1805 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
1806 if(block->type & BLOCK_INTRA){
1808 const int color = block->color[plane_index];
1809 const int color4= color*0x01010101;
1811 for(y=0; y < b_h; y++){
1812 *(uint32_t*)&dst[0 + y*stride]= color4;
1813 *(uint32_t*)&dst[4 + y*stride]= color4;
1814 *(uint32_t*)&dst[8 + y*stride]= color4;
1815 *(uint32_t*)&dst[12+ y*stride]= color4;
1816 *(uint32_t*)&dst[16+ y*stride]= color4;
1817 *(uint32_t*)&dst[20+ y*stride]= color4;
1818 *(uint32_t*)&dst[24+ y*stride]= color4;
1819 *(uint32_t*)&dst[28+ y*stride]= color4;
1822 for(y=0; y < b_h; y++){
1823 *(uint32_t*)&dst[0 + y*stride]= color4;
1824 *(uint32_t*)&dst[4 + y*stride]= color4;
1825 *(uint32_t*)&dst[8 + y*stride]= color4;
1826 *(uint32_t*)&dst[12+ y*stride]= color4;
1829 for(y=0; y < b_h; y++){
1830 *(uint32_t*)&dst[0 + y*stride]= color4;
1831 *(uint32_t*)&dst[4 + y*stride]= color4;
1834 for(y=0; y < b_h; y++){
1835 *(uint32_t*)&dst[0 + y*stride]= color4;
1838 for(y=0; y < b_h; y++){
1839 for(x=0; x < b_w; x++){
1840 dst[x + y*stride]= color;
1845 uint8_t *src= s->last_picture[block->ref].data[plane_index];
1846 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
1847 int mx= block->mx*scale;
1848 int my= block->my*scale;
1849 const int dx= mx&15;
1850 const int dy= my&15;
1851 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
1852 sx += (mx>>4) - (HTAPS_MAX/2-1);
1853 sy += (my>>4) - (HTAPS_MAX/2-1);
1854 src += sx + sy*stride;
1855 if( (unsigned)sx >= w - b_w - (HTAPS_MAX-2)
1856 || (unsigned)sy >= h - b_h - (HTAPS_MAX-2)){
1857 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS_MAX-1, b_h+HTAPS_MAX-1, sx, sy, w, h);
1860 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
1861 // assert(!(b_w&(b_w-1)));
1862 assert(b_w>1 && b_h>1);
1863 assert((tab_index>=0 && tab_index<4) || b_w==32);
1864 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
1865 mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
1868 for(y=0; y<b_h; y+=16){
1869 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
1870 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
1873 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
1874 else if(b_w==2*b_h){
1875 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride);
1876 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
1879 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride);
1880 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
1885 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
1886 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
1889 for(y=0; y<b_h; y++){
1890 //FIXME ugly misuse of obmc_stride
1891 const uint8_t *obmc1= obmc + y*obmc_stride;
1892 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
1893 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
1894 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
1895 dst = slice_buffer_get_line(sb, src_y + y);
1896 for(x=0; x<b_w; x++){
1897 int v= obmc1[x] * block[3][x + y*src_stride]
1898 +obmc2[x] * block[2][x + y*src_stride]
1899 +obmc3[x] * block[1][x + y*src_stride]
1900 +obmc4[x] * block[0][x + y*src_stride];
1902 v <<= 8 - LOG2_OBMC_MAX;
1904 v >>= 8 - FRAC_BITS;
1907 v += dst[x + src_x];
1908 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
1909 if(v&(~255)) v= ~(v>>31);
1910 dst8[x + y*src_stride] = v;
1912 dst[x + src_x] -= v;
1918 //FIXME name cleanup (b_w, block_w, b_width stuff)
1919 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
1920 const int b_width = s->b_width << s->block_max_depth;
1921 const int b_height= s->b_height << s->block_max_depth;
1922 const int b_stride= b_width;
1923 BlockNode *lt= &s->block[b_x + b_y*b_stride];
1924 BlockNode *rt= lt+1;
1925 BlockNode *lb= lt+b_stride;
1926 BlockNode *rb= lb+1;
1928 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
1929 uint8_t *tmp = s->scratchbuf;
1936 }else if(b_x + 1 >= b_width){
1943 }else if(b_y + 1 >= b_height){
1948 if(src_x<0){ //FIXME merge with prev & always round internal width up to *16
1951 if(!sliced && !offset_dst)
1954 }else if(src_x + b_w > w){
1958 obmc -= src_y*obmc_stride;
1960 if(!sliced && !offset_dst)
1961 dst -= src_y*dst_stride;
1963 }else if(src_y + b_h> h){
1967 if(b_w<=0 || b_h<=0) return;
1969 assert(src_stride > 2*MB_SIZE + 5);
1971 if(!sliced && offset_dst)
1972 dst += src_x + src_y*dst_stride;
1973 dst8+= src_x + src_y*src_stride;
1974 // src += src_x + src_y*src_stride;
1976 ptmp= tmp + 3*tmp_step;
1979 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
1981 if(same_block(lt, rt)){
1986 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
1989 if(same_block(lt, lb)){
1991 }else if(same_block(rt, lb)){
1996 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
1999 if(same_block(lt, rb) ){
2001 }else if(same_block(rt, rb)){
2003 }else if(same_block(lb, rb)){
2007 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2010 for(y=0; y<b_h; y++){
2011 for(x=0; x<b_w; x++){
2012 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2013 if(add) dst[x + y*dst_stride] += v;
2014 else dst[x + y*dst_stride] -= v;
2017 for(y=0; y<b_h; y++){
2018 uint8_t *obmc2= obmc + (obmc_stride>>1);
2019 for(x=0; x<b_w; x++){
2020 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2021 if(add) dst[x + y*dst_stride] += v;
2022 else dst[x + y*dst_stride] -= v;
2025 for(y=0; y<b_h; y++){
2026 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2027 for(x=0; x<b_w; x++){
2028 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2029 if(add) dst[x + y*dst_stride] += v;
2030 else dst[x + y*dst_stride] -= v;
2033 for(y=0; y<b_h; y++){
2034 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2035 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2036 for(x=0; x<b_w; x++){
2037 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2038 if(add) dst[x + y*dst_stride] += v;
2039 else dst[x + y*dst_stride] -= v;
2044 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2046 for(y=0; y<b_h; y++){
2047 //FIXME ugly misuse of obmc_stride
2048 const uint8_t *obmc1= obmc + y*obmc_stride;
2049 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2050 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2051 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2052 for(x=0; x<b_w; x++){
2053 int v= obmc1[x] * block[3][x + y*src_stride]
2054 +obmc2[x] * block[2][x + y*src_stride]
2055 +obmc3[x] * block[1][x + y*src_stride]
2056 +obmc4[x] * block[0][x + y*src_stride];
2058 v <<= 8 - LOG2_OBMC_MAX;
2060 v >>= 8 - FRAC_BITS;
2063 v += dst[x + y*dst_stride];
2064 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2065 if(v&(~255)) v= ~(v>>31);
2066 dst8[x + y*src_stride] = v;
2068 dst[x + y*dst_stride] -= v;
2076 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2077 Plane *p= &s->plane[plane_index];
2078 const int mb_w= s->b_width << s->block_max_depth;
2079 const int mb_h= s->b_height << s->block_max_depth;
2081 int block_size = MB_SIZE >> s->block_max_depth;
2082 int block_w = plane_index ? block_size/2 : block_size;
2083 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2084 int obmc_stride= plane_index ? block_size : 2*block_size;
2085 int ref_stride= s->current_picture.linesize[plane_index];
2086 uint8_t *dst8= s->current_picture.data[plane_index];
2090 if(s->keyframe || (s->avctx->debug&512)){
2095 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2096 // DWTELEM * line = slice_buffer_get_line(sb, y);
2097 IDWTELEM * line = sb->line[y];
2099 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2100 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2102 if(v&(~255)) v= ~(v>>31);
2103 dst8[x + y*ref_stride]= v;
2107 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2108 // DWTELEM * line = slice_buffer_get_line(sb, y);
2109 IDWTELEM * line = sb->line[y];
2111 line[x] -= 128 << FRAC_BITS;
2112 // buf[x + y*w]-= 128<<FRAC_BITS;
2120 for(mb_x=0; mb_x<=mb_w; mb_x++){
2121 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2122 block_w*mb_x - block_w/2,
2123 block_w*mb_y - block_w/2,
2126 w, ref_stride, obmc_stride,
2128 add, 0, plane_index);
2132 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2133 Plane *p= &s->plane[plane_index];
2134 const int mb_w= s->b_width << s->block_max_depth;
2135 const int mb_h= s->b_height << s->block_max_depth;
2137 int block_size = MB_SIZE >> s->block_max_depth;
2138 int block_w = plane_index ? block_size/2 : block_size;
2139 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2140 const int obmc_stride= plane_index ? block_size : 2*block_size;
2141 int ref_stride= s->current_picture.linesize[plane_index];
2142 uint8_t *dst8= s->current_picture.data[plane_index];
2146 if(s->keyframe || (s->avctx->debug&512)){
2151 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2153 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2155 if(v&(~255)) v= ~(v>>31);
2156 dst8[x + y*ref_stride]= v;
2160 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2162 buf[x + y*w]-= 128<<FRAC_BITS;
2170 for(mb_x=0; mb_x<=mb_w; mb_x++){
2171 add_yblock(s, 0, NULL, buf, dst8, obmc,
2172 block_w*mb_x - block_w/2,
2173 block_w*mb_y - block_w/2,
2176 w, ref_stride, obmc_stride,
2178 add, 1, plane_index);
2182 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2183 const int mb_h= s->b_height << s->block_max_depth;
2185 for(mb_y=0; mb_y<=mb_h; mb_y++)
2186 predict_slice(s, buf, plane_index, add, mb_y);
2189 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
2190 const int w= b->width;
2191 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
2192 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
2193 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
2196 if(s->qlog == LOSSLESS_QLOG) return;
2198 for(y=start_y; y<end_y; y++){
2199 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2200 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2204 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
2206 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
2212 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
2213 const int w= b->width;
2216 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
2220 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2222 for(y=start_y; y<end_y; y++){
2224 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
2225 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
2229 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
2230 else line[x] += line[x - 1];
2232 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
2233 else line[x] += line[x - 1];
2236 if(y) line[x] += prev[x];
2242 static void decode_qlogs(SnowContext *s){
2243 int plane_index, level, orientation;
2245 for(plane_index=0; plane_index<3; plane_index++){
2246 for(level=0; level<s->spatial_decomposition_count; level++){
2247 for(orientation=level ? 1:0; orientation<4; orientation++){
2249 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
2250 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
2251 else q= get_symbol(&s->c, s->header_state, 1);
2252 s->plane[plane_index].band[level][orientation].qlog= q;
2258 #define GET_S(dst, check) \
2259 tmp= get_symbol(&s->c, s->header_state, 0);\
2261 av_log(s->avctx, AV_LOG_ERROR, "Error " #dst " is %d\n", tmp);\
2266 static int decode_header(SnowContext *s){
2267 int plane_index, tmp;
2270 memset(kstate, MID_STATE, sizeof(kstate));
2272 s->keyframe= get_rac(&s->c, kstate);
2273 if(s->keyframe || s->always_reset){
2275 s->spatial_decomposition_type=
2279 s->block_max_depth= 0;
2282 GET_S(s->version, tmp <= 0U)
2283 s->always_reset= get_rac(&s->c, s->header_state);
2284 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
2285 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
2286 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2287 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
2288 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
2289 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
2290 s->spatial_scalability= get_rac(&s->c, s->header_state);
2291 // s->rate_scalability= get_rac(&s->c, s->header_state);
2292 GET_S(s->max_ref_frames, tmp < (unsigned)MAX_REF_FRAMES)
2293 s->max_ref_frames++;
2299 if(get_rac(&s->c, s->header_state)){
2300 for(plane_index=0; plane_index<2; plane_index++){
2301 int htaps, i, sum=0;
2302 Plane *p= &s->plane[plane_index];
2303 p->diag_mc= get_rac(&s->c, s->header_state);
2304 htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
2305 if((unsigned)htaps > HTAPS_MAX || htaps==0)
2308 for(i= htaps/2; i; i--){
2309 p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
2310 sum += p->hcoeff[i];
2312 p->hcoeff[0]= 32-sum;
2314 s->plane[2].diag_mc= s->plane[1].diag_mc;
2315 s->plane[2].htaps = s->plane[1].htaps;
2316 memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
2318 if(get_rac(&s->c, s->header_state)){
2319 GET_S(s->spatial_decomposition_count, 0 < tmp && tmp <= MAX_DECOMPOSITIONS)
2324 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
2325 if(s->spatial_decomposition_type > 1U){
2326 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
2329 if(FFMIN(s->avctx-> width>>s->chroma_h_shift,
2330 s->avctx->height>>s->chroma_v_shift) >> (s->spatial_decomposition_count-1) <= 0){
2331 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_count %d too large for size", s->spatial_decomposition_count);
2335 s->qlog += get_symbol(&s->c, s->header_state, 1);
2336 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
2337 s->qbias += get_symbol(&s->c, s->header_state, 1);
2338 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
2339 if(s->block_max_depth > 1 || s->block_max_depth < 0){
2340 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
2341 s->block_max_depth= 0;
2348 static void init_qexp(void){
2352 for(i=0; i<QROOT; i++){
2354 v *= pow(2, 1.0 / QROOT);
2358 static av_cold int common_init(AVCodecContext *avctx){
2359 SnowContext *s = avctx->priv_data;
2364 s->max_ref_frames=1; //just make sure its not an invalid value in case of no initial keyframe
2366 dsputil_init(&s->dsp, avctx);
2369 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
2370 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
2371 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
2372 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
2373 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
2374 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
2393 #define mcfh(dx,dy)\
2394 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
2395 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
2396 mc_block_hpel ## dx ## dy ## 16;\
2397 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
2398 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
2399 mc_block_hpel ## dx ## dy ## 8;
2409 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
2411 width= s->avctx->width;
2412 height= s->avctx->height;
2414 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
2415 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this does not belong here
2417 for(i=0; i<MAX_REF_FRAMES; i++)
2418 for(j=0; j<MAX_REF_FRAMES; j++)
2419 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
2421 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
2422 s->scratchbuf = av_malloc(s->mconly_picture.linesize[0]*7*MB_SIZE);
2427 static int common_init_after_header(AVCodecContext *avctx){
2428 SnowContext *s = avctx->priv_data;
2429 int plane_index, level, orientation;
2431 for(plane_index=0; plane_index<3; plane_index++){
2432 int w= s->avctx->width;
2433 int h= s->avctx->height;
2436 w>>= s->chroma_h_shift;
2437 h>>= s->chroma_v_shift;
2439 s->plane[plane_index].width = w;
2440 s->plane[plane_index].height= h;
2442 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2443 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2444 SubBand *b= &s->plane[plane_index].band[level][orientation];
2446 b->buf= s->spatial_dwt_buffer;
2448 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
2449 b->width = (w + !(orientation&1))>>1;
2450 b->height= (h + !(orientation>1))>>1;
2452 b->stride_line = 1 << (s->spatial_decomposition_count - level);
2453 b->buf_x_offset = 0;
2454 b->buf_y_offset = 0;
2458 b->buf_x_offset = (w+1)>>1;
2461 b->buf += b->stride>>1;
2462 b->buf_y_offset = b->stride_line >> 1;
2464 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
2467 b->parent= &s->plane[plane_index].band[level-1][orientation];
2468 //FIXME avoid this realloc
2469 av_freep(&b->x_coeff);
2470 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
2485 static void find_sse(SnowContext *s, Plane *p, int *score, int score_stride, IDWTELEM *r0, IDWTELEM *r1, int level, int orientation){
2486 SubBand *b= &p->band[level][orientation];
2490 int step= 1 << (s->spatial_decomposition_count - level);
2497 //FIXME bias for nonzero ?
2499 memset(score, 0, sizeof(*score)*score_stride*((p->height + Q2_STEP-1)/Q2_STEP));
2500 for(y=0; y<p->height; y++){
2501 for(x=0; x<p->width; x++){
2502 int sx= (x-xo + step/2) / step / Q2_STEP;
2503 int sy= (y-yo + step/2) / step / Q2_STEP;
2504 int v= r0[x + y*p->width] - r1[x + y*p->width];
2505 assert(sx>=0 && sy>=0 && sx < score_stride);
2507 score[sx + sy*score_stride] += v*v;
2508 assert(score[sx + sy*score_stride] >= 0);
2513 static void dequantize_all(SnowContext *s, Plane *p, IDWTELEM *buffer, int width, int height){
2514 int level, orientation;
2516 for(level=0; level<s->spatial_decomposition_count; level++){
2517 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2518 SubBand *b= &p->band[level][orientation];
2519 IDWTELEM *dst= buffer + (b->ibuf - s->spatial_idwt_buffer);
2521 dequantize(s, b, dst, b->stride);
2526 static void dwt_quantize(SnowContext *s, Plane *p, DWTELEM *buffer, int width, int height, int stride, int type){
2527 int level, orientation, ys, xs, x, y, pass;
2528 IDWTELEM best_dequant[height * stride];
2529 IDWTELEM idwt2_buffer[height * stride];
2530 const int score_stride= (width + 10)/Q2_STEP;
2531 int best_score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2532 int score[(width + 10)/Q2_STEP * (height + 10)/Q2_STEP]; //FIXME size
2533 int threshold= (s->m.lambda * s->m.lambda) >> 6;
2535 //FIXME pass the copy cleanly ?
2537 // memcpy(dwt_buffer, buffer, height * stride * sizeof(DWTELEM));
2538 ff_spatial_dwt(buffer, width, height, stride, type, s->spatial_decomposition_count);
2540 for(level=0; level<s->spatial_decomposition_count; level++){
2541 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2542 SubBand *b= &p->band[level][orientation];
2543 IDWTELEM *dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2544 DWTELEM *src= buffer + (b-> buf - s->spatial_dwt_buffer);
2545 assert(src == b->buf); // code does not depend on this but it is true currently
2547 quantize(s, b, dst, src, b->stride, s->qbias);
2550 for(pass=0; pass<1; pass++){
2551 if(s->qbias == 0) //keyframe
2553 for(level=0; level<s->spatial_decomposition_count; level++){
2554 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2555 SubBand *b= &p->band[level][orientation];
2556 IDWTELEM *dst= idwt2_buffer + (b->ibuf - s->spatial_idwt_buffer);
2557 IDWTELEM *best_dst= best_dequant + (b->ibuf - s->spatial_idwt_buffer);
2559 for(ys= 0; ys<Q2_STEP; ys++){
2560 for(xs= 0; xs<Q2_STEP; xs++){
2561 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2562 dequantize_all(s, p, idwt2_buffer, width, height);
2563 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2564 find_sse(s, p, best_score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2565 memcpy(idwt2_buffer, best_dequant, height * stride * sizeof(IDWTELEM));
2566 for(y=ys; y<b->height; y+= Q2_STEP){
2567 for(x=xs; x<b->width; x+= Q2_STEP){
2568 if(dst[x + y*b->stride]<0) dst[x + y*b->stride]++;
2569 if(dst[x + y*b->stride]>0) dst[x + y*b->stride]--;
2570 //FIXME try more than just --
2573 dequantize_all(s, p, idwt2_buffer, width, height);
2574 ff_spatial_idwt(idwt2_buffer, width, height, stride, type, s->spatial_decomposition_count);
2575 find_sse(s, p, score, score_stride, idwt2_buffer, s->spatial_idwt_buffer, level, orientation);
2576 for(y=ys; y<b->height; y+= Q2_STEP){
2577 for(x=xs; x<b->width; x+= Q2_STEP){
2578 int score_idx= x/Q2_STEP + (y/Q2_STEP)*score_stride;
2579 if(score[score_idx] <= best_score[score_idx] + threshold){
2580 best_score[score_idx]= score[score_idx];
2581 if(best_dst[x + y*b->stride]<0) best_dst[x + y*b->stride]++;
2582 if(best_dst[x + y*b->stride]>0) best_dst[x + y*b->stride]--;
2583 //FIXME copy instead
2592 memcpy(s->spatial_idwt_buffer, best_dequant, height * stride * sizeof(IDWTELEM)); //FIXME work with that directly instead of copy at the end
2595 #endif /* QUANTIZE2==1 */
2597 #define USE_HALFPEL_PLANE 0
2599 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
2602 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
2606 int w= s->avctx->width >>is_chroma;
2607 int h= s->avctx->height >>is_chroma;
2608 int ls= frame->linesize[p];
2609 uint8_t *src= frame->data[p];
2611 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2612 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2613 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
2620 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
2627 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2635 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
2643 static void release_buffer(AVCodecContext *avctx){
2644 SnowContext *s = avctx->priv_data;
2647 if(s->last_picture[s->max_ref_frames-1].data[0]){
2648 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
2650 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
2651 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
2655 static int frame_start(SnowContext *s){
2657 int w= s->avctx->width; //FIXME round up to x16 ?
2658 int h= s->avctx->height;
2660 if(s->current_picture.data[0]){
2661 s->dsp.draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
2662 s->dsp.draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
2663 s->dsp.draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
2666 release_buffer(s->avctx);
2668 tmp= s->last_picture[s->max_ref_frames-1];
2669 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
2670 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
2671 if(USE_HALFPEL_PLANE && s->current_picture.data[0])
2672 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
2673 s->last_picture[0]= s->current_picture;
2674 s->current_picture= tmp;
2680 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
2681 if(i && s->last_picture[i-1].key_frame)
2684 if(s->ref_frames==0){
2685 av_log(s->avctx,AV_LOG_ERROR, "No reference frames\n");
2690 s->current_picture.reference= 1;
2691 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
2692 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
2696 s->current_picture.key_frame= s->keyframe;
2701 static av_cold void common_end(SnowContext *s){
2702 int plane_index, level, orientation, i;
2704 av_freep(&s->spatial_dwt_buffer);
2705 av_freep(&s->spatial_idwt_buffer);
2708 av_freep(&s->m.me.scratchpad);
2709 av_freep(&s->m.me.map);
2710 av_freep(&s->m.me.score_map);
2711 av_freep(&s->m.obmc_scratchpad);
2713 av_freep(&s->block);
2714 av_freep(&s->scratchbuf);
2716 for(i=0; i<MAX_REF_FRAMES; i++){
2717 av_freep(&s->ref_mvs[i]);
2718 av_freep(&s->ref_scores[i]);
2719 if(s->last_picture[i].data[0])
2720 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
2723 for(plane_index=0; plane_index<3; plane_index++){
2724 for(level=s->spatial_decomposition_count-1; level>=0; level--){
2725 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2726 SubBand *b= &s->plane[plane_index].band[level][orientation];
2728 av_freep(&b->x_coeff);
2732 if (s->mconly_picture.data[0])
2733 s->avctx->release_buffer(s->avctx, &s->mconly_picture);
2734 if (s->current_picture.data[0])
2735 s->avctx->release_buffer(s->avctx, &s->current_picture);
2738 static av_cold int decode_init(AVCodecContext *avctx)
2740 avctx->pix_fmt= PIX_FMT_YUV420P;
2747 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, AVPacket *avpkt){
2748 const uint8_t *buf = avpkt->data;
2749 int buf_size = avpkt->size;
2750 SnowContext *s = avctx->priv_data;
2751 RangeCoder * const c= &s->c;
2753 AVFrame *picture = data;
2754 int level, orientation, plane_index;
2756 ff_init_range_decoder(c, buf, buf_size);
2757 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
2759 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
2760 if(decode_header(s)<0)
2762 common_init_after_header(avctx);
2764 // realloc slice buffer for the case that spatial_decomposition_count changed
2765 slice_buffer_destroy(&s->sb);
2766 slice_buffer_init(&s->sb, s->plane[0].height, (MB_SIZE >> s->block_max_depth) + s->spatial_decomposition_count * 8 + 1, s->plane[0].width, s->spatial_idwt_buffer);
2768 for(plane_index=0; plane_index<3; plane_index++){
2769 Plane *p= &s->plane[plane_index];
2770 p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
2771 && p->hcoeff[1]==-10
2777 if(frame_start(s) < 0)
2779 //keyframe flag duplication mess FIXME
2780 if(avctx->debug&FF_DEBUG_PICT_INFO)
2781 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
2785 for(plane_index=0; plane_index<3; plane_index++){
2786 Plane *p= &s->plane[plane_index];
2790 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
2792 if(s->avctx->debug&2048){
2793 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
2794 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
2798 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
2799 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
2805 for(level=0; level<s->spatial_decomposition_count; level++){
2806 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2807 SubBand *b= &p->band[level][orientation];
2808 unpack_coeffs(s, b, b->parent, orientation);
2814 const int mb_h= s->b_height << s->block_max_depth;
2815 const int block_size = MB_SIZE >> s->block_max_depth;
2816 const int block_w = plane_index ? block_size/2 : block_size;
2818 DWTCompose cs[MAX_DECOMPOSITIONS];
2823 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
2824 for(mb_y=0; mb_y<=mb_h; mb_y++){
2826 int slice_starty = block_w*mb_y;
2827 int slice_h = block_w*(mb_y+1);
2828 if (!(s->keyframe || s->avctx->debug&512)){
2829 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
2830 slice_h -= (block_w >> 1);
2833 for(level=0; level<s->spatial_decomposition_count; level++){
2834 for(orientation=level ? 1 : 0; orientation<4; orientation++){
2835 SubBand *b= &p->band[level][orientation];
2838 int our_mb_start = mb_y;
2839 int our_mb_end = (mb_y + 1);
2841 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
2842 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
2843 if (!(s->keyframe || s->avctx->debug&512)){
2844 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2845 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
2847 start_y = FFMIN(b->height, start_y);
2848 end_y = FFMIN(b->height, end_y);
2850 if (start_y != end_y){
2851 if (orientation == 0){
2852 SubBand * correlate_band = &p->band[0][0];
2853 int correlate_end_y = FFMIN(b->height, end_y + 1);
2854 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
2855 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
2856 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
2857 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
2860 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
2865 for(; yd<slice_h; yd+=4){
2866 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
2869 if(s->qlog == LOSSLESS_QLOG){
2870 for(; yq<slice_h && yq<h; yq++){
2871 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
2873 line[x] <<= FRAC_BITS;
2878 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
2880 y = FFMIN(p->height, slice_starty);
2881 end_y = FFMIN(p->height, slice_h);
2883 slice_buffer_release(&s->sb, y++);
2886 slice_buffer_flush(&s->sb);
2893 release_buffer(avctx);
2895 if(!(s->avctx->debug&2048))
2896 *picture= s->current_picture;
2898 *picture= s->mconly_picture;
2900 *data_size = sizeof(AVFrame);
2902 bytes_read= c->bytestream - c->bytestream_start;
2903 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
2908 static av_cold int decode_end(AVCodecContext *avctx)
2910 SnowContext *s = avctx->priv_data;
2912 slice_buffer_destroy(&s->sb);
2919 AVCodec snow_decoder = {
2923 sizeof(SnowContext),
2928 CODEC_CAP_DR1 /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
2930 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
2933 #if CONFIG_SNOW_ENCODER
2934 static av_cold int encode_init(AVCodecContext *avctx)
2936 SnowContext *s = avctx->priv_data;
2939 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
2940 av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
2941 "Use vstrict=-2 / -strict -2 to use it anyway.\n");
2945 if(avctx->prediction_method == DWT_97
2946 && (avctx->flags & CODEC_FLAG_QSCALE)
2947 && avctx->global_quality == 0){
2948 av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
2952 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
2954 s->mv_scale = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
2955 s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
2957 for(plane_index=0; plane_index<3; plane_index++){
2958 s->plane[plane_index].diag_mc= 1;
2959 s->plane[plane_index].htaps= 6;
2960 s->plane[plane_index].hcoeff[0]= 40;
2961 s->plane[plane_index].hcoeff[1]= -10;
2962 s->plane[plane_index].hcoeff[2]= 2;
2963 s->plane[plane_index].fast_mc= 1;
2972 s->m.flags = avctx->flags;
2973 s->m.bit_rate= avctx->bit_rate;
2976 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
2977 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2978 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
2979 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
2980 h263_encode_init(&s->m); //mv_penalty
2982 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
2984 if(avctx->flags&CODEC_FLAG_PASS1){
2985 if(!avctx->stats_out)
2986 avctx->stats_out = av_mallocz(256);
2988 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
2989 if(ff_rate_control_init(&s->m) < 0)
2992 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
2994 avctx->coded_frame= &s->current_picture;
2995 switch(avctx->pix_fmt){
2996 // case PIX_FMT_YUV444P:
2997 // case PIX_FMT_YUV422P:
2998 case PIX_FMT_YUV420P:
3000 // case PIX_FMT_YUV411P:
3001 // case PIX_FMT_YUV410P:
3002 s->colorspace_type= 0;
3004 /* case PIX_FMT_RGB32:
3008 av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
3011 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3012 s->chroma_h_shift= 1;
3013 s->chroma_v_shift= 1;
3015 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3016 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3018 s->avctx->get_buffer(s->avctx, &s->input_picture);
3020 if(s->avctx->me_method == ME_ITER){
3022 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3023 for(i=0; i<s->max_ref_frames; i++){
3024 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3025 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3032 //near copy & paste from dsputil, FIXME
3033 static int pix_sum(uint8_t * pix, int line_size, int w)
3038 for (i = 0; i < w; i++) {
3039 for (j = 0; j < w; j++) {
3043 pix += line_size - w;
3048 //near copy & paste from dsputil, FIXME
3049 static int pix_norm1(uint8_t * pix, int line_size, int w)
3052 uint32_t *sq = ff_squareTbl + 256;
3055 for (i = 0; i < w; i++) {
3056 for (j = 0; j < w; j ++) {
3060 pix += line_size - w;
3068 #define P_TOPRIGHT P[3]
3069 #define P_MEDIAN P[4]
3071 #define FLAG_QPEL 1 //must be 1
3073 static int encode_q_branch(SnowContext *s, int level, int x, int y){
3074 uint8_t p_buffer[1024];
3075 uint8_t i_buffer[1024];
3076 uint8_t p_state[sizeof(s->block_state)];
3077 uint8_t i_state[sizeof(s->block_state)];
3079 uint8_t *pbbak= s->c.bytestream;
3080 uint8_t *pbbak_start= s->c.bytestream_start;
3081 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
3082 const int w= s->b_width << s->block_max_depth;
3083 const int h= s->b_height << s->block_max_depth;
3084 const int rem_depth= s->block_max_depth - level;
3085 const int index= (x + y*w) << rem_depth;
3086 const int block_w= 1<<(LOG2_MB_SIZE - level);
3087 int trx= (x+1)<<rem_depth;
3088 int try= (y+1)<<rem_depth;
3089 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3090 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3091 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
3092 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
3093 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3094 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3095 int pl = left->color[0];
3096 int pcb= left->color[1];
3097 int pcr= left->color[2];
3101 const int stride= s->current_picture.linesize[0];
3102 const int uvstride= s->current_picture.linesize[1];
3103 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
3104 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
3105 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
3107 int16_t last_mv[3][2];
3108 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
3109 const int shift= 1+qpel;
3110 MotionEstContext *c= &s->m.me;
3111 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3112 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3113 int my_context= av_log2(2*FFABS(left->my - top->my));
3114 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3115 int ref, best_ref, ref_score, ref_mx, ref_my;
3117 assert(sizeof(s->block_state) >= 256);
3119 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3123 // clip predictors / edge ?
3125 P_LEFT[0]= left->mx;
3126 P_LEFT[1]= left->my;
3129 P_TOPRIGHT[0]= tr->mx;
3130 P_TOPRIGHT[1]= tr->my;
3132 last_mv[0][0]= s->block[index].mx;
3133 last_mv[0][1]= s->block[index].my;
3134 last_mv[1][0]= right->mx;
3135 last_mv[1][1]= right->my;
3136 last_mv[2][0]= bottom->mx;
3137 last_mv[2][1]= bottom->my;
3144 assert(c-> stride == stride);
3145 assert(c->uvstride == uvstride);
3147 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
3148 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
3149 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
3150 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
3152 c->xmin = - x*block_w - 16+3;
3153 c->ymin = - y*block_w - 16+3;
3154 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3155 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
3157 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
3158 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
3159 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
3160 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
3161 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
3162 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
3163 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
3165 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
3166 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
3169 c->pred_x= P_LEFT[0];
3170 c->pred_y= P_LEFT[1];
3172 c->pred_x = P_MEDIAN[0];
3173 c->pred_y = P_MEDIAN[1];
3178 for(ref=0; ref<s->ref_frames; ref++){
3179 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
3181 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
3182 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
3184 assert(ref_mx >= c->xmin);
3185 assert(ref_mx <= c->xmax);
3186 assert(ref_my >= c->ymin);
3187 assert(ref_my <= c->ymax);
3189 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
3190 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
3191 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
3192 if(s->ref_mvs[ref]){
3193 s->ref_mvs[ref][index][0]= ref_mx;
3194 s->ref_mvs[ref][index][1]= ref_my;
3195 s->ref_scores[ref][index]= ref_score;
3197 if(score > ref_score){
3204 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
3207 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
3209 pc.bytestream_start=
3210 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
3211 memcpy(p_state, s->block_state, sizeof(s->block_state));
3213 if(level!=s->block_max_depth)
3214 put_rac(&pc, &p_state[4 + s_context], 1);
3215 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
3216 if(s->ref_frames > 1)
3217 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
3218 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
3219 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
3220 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
3221 p_len= pc.bytestream - pc.bytestream_start;
3222 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
3224 block_s= block_w*block_w;
3225 sum = pix_sum(current_data[0], stride, block_w);
3226 l= (sum + block_s/2)/block_s;
3227 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
3229 block_s= block_w*block_w>>2;
3230 sum = pix_sum(current_data[1], uvstride, block_w>>1);
3231 cb= (sum + block_s/2)/block_s;
3232 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
3233 sum = pix_sum(current_data[2], uvstride, block_w>>1);
3234 cr= (sum + block_s/2)/block_s;
3235 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
3238 ic.bytestream_start=
3239 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
3240 memcpy(i_state, s->block_state, sizeof(s->block_state));
3241 if(level!=s->block_max_depth)
3242 put_rac(&ic, &i_state[4 + s_context], 1);
3243 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
3244 put_symbol(&ic, &i_state[32], l-pl , 1);
3245 put_symbol(&ic, &i_state[64], cb-pcb, 1);
3246 put_symbol(&ic, &i_state[96], cr-pcr, 1);
3247 i_len= ic.bytestream - ic.bytestream_start;
3248 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
3250 // assert(score==256*256*256*64-1);
3251 assert(iscore < 255*255*256 + s->lambda2*10);
3252 assert(iscore >= 0);
3253 assert(l>=0 && l<=255);
3254 assert(pl>=0 && pl<=255);
3257 int varc= iscore >> 8;
3258 int vard= score >> 8;
3259 if (vard <= 64 || vard < varc)
3260 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
3262 c->scene_change_score+= s->m.qscale;
3265 if(level!=s->block_max_depth){
3266 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3267 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
3268 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
3269 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
3270 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
3271 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
3273 if(score2 < score && score2 < iscore)
3278 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3279 memcpy(pbbak, i_buffer, i_len);
3281 s->c.bytestream_start= pbbak_start;
3282 s->c.bytestream= pbbak + i_len;
3283 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
3284 memcpy(s->block_state, i_state, sizeof(s->block_state));
3287 memcpy(pbbak, p_buffer, p_len);
3289 s->c.bytestream_start= pbbak_start;
3290 s->c.bytestream= pbbak + p_len;
3291 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
3292 memcpy(s->block_state, p_state, sizeof(s->block_state));
3297 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
3298 const int w= s->b_width << s->block_max_depth;
3299 const int rem_depth= s->block_max_depth - level;
3300 const int index= (x + y*w) << rem_depth;
3301 int trx= (x+1)<<rem_depth;
3302 BlockNode *b= &s->block[index];
3303 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3304 const BlockNode *top = y ? &s->block[index-w] : &null_block;
3305 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
3306 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
3307 int pl = left->color[0];
3308 int pcb= left->color[1];
3309 int pcr= left->color[2];
3311 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
3312 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
3313 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
3314 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
3317 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
3321 if(level!=s->block_max_depth){
3322 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
3323 put_rac(&s->c, &s->block_state[4 + s_context], 1);
3325 put_rac(&s->c, &s->block_state[4 + s_context], 0);
3326 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
3327 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
3328 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
3329 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
3333 if(b->type & BLOCK_INTRA){
3334 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
3335 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
3336 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
3337 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
3338 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
3339 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
3341 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
3342 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
3343 if(s->ref_frames > 1)
3344 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
3345 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
3346 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
3347 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
3351 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
3353 Plane *p= &s->plane[plane_index];
3354 const int block_size = MB_SIZE >> s->block_max_depth;
3355 const int block_w = plane_index ? block_size/2 : block_size;
3356 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3357 const int obmc_stride= plane_index ? block_size : 2*block_size;
3358 const int ref_stride= s->current_picture.linesize[plane_index];
3359 uint8_t *src= s-> input_picture.data[plane_index];
3360 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
3361 const int b_stride = s->b_width << s->block_max_depth;
3362 const int w= p->width;
3363 const int h= p->height;
3364 int index= mb_x + mb_y*b_stride;
3365 BlockNode *b= &s->block[index];
3366 BlockNode backup= *b;
3370 b->type|= BLOCK_INTRA;
3371 b->color[plane_index]= 0;
3372 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
3375 int mb_x2= mb_x + (i &1) - 1;
3376 int mb_y2= mb_y + (i>>1) - 1;
3377 int x= block_w*mb_x2 + block_w/2;
3378 int y= block_w*mb_y2 + block_w/2;
3380 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
3381 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3383 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3384 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3385 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3386 int obmc_v= obmc[index];
3388 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3389 if(x<0) obmc_v += obmc[index + block_w];
3390 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3391 if(x+block_w>w) obmc_v += obmc[index - block_w];
3392 //FIXME precalculate this or simplify it somehow else
3394 d = -dst[index] + (1<<(FRAC_BITS-1));
3396 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3397 aa += obmc_v * obmc_v; //FIXME precalculate this
3403 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
3406 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3407 const int b_stride = s->b_width << s->block_max_depth;
3408 const int b_height = s->b_height<< s->block_max_depth;
3409 int index= x + y*b_stride;
3410 const BlockNode *b = &s->block[index];
3411 const BlockNode *left = x ? &s->block[index-1] : &null_block;
3412 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3413 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3414 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3416 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
3417 // int my_context= av_log2(2*FFABS(left->my - top->my));
3419 if(x<0 || x>=b_stride || y>=b_height)
3426 00001XXXX 15-30 8-15
3428 //FIXME try accurate rate
3429 //FIXME intra and inter predictors if surrounding blocks are not the same type
3430 if(b->type & BLOCK_INTRA){
3431 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
3432 + av_log2(2*FFABS(left->color[1] - b->color[1]))
3433 + av_log2(2*FFABS(left->color[2] - b->color[2])));
3435 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
3438 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
3439 + av_log2(2*FFABS(dmy))
3440 + av_log2(2*b->ref));
3444 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3445 Plane *p= &s->plane[plane_index];
3446 const int block_size = MB_SIZE >> s->block_max_depth;
3447 const int block_w = plane_index ? block_size/2 : block_size;
3448 const int obmc_stride= plane_index ? block_size : 2*block_size;
3449 const int ref_stride= s->current_picture.linesize[plane_index];
3450 uint8_t *dst= s->current_picture.data[plane_index];
3451 uint8_t *src= s-> input_picture.data[plane_index];
3452 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3453 uint8_t *cur = s->scratchbuf;
3454 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS_MAX-1)];
3455 const int b_stride = s->b_width << s->block_max_depth;
3456 const int b_height = s->b_height<< s->block_max_depth;
3457 const int w= p->width;
3458 const int h= p->height;
3461 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3462 int sx= block_w*mb_x - block_w/2;
3463 int sy= block_w*mb_y - block_w/2;
3464 int x0= FFMAX(0,-sx);
3465 int y0= FFMAX(0,-sy);
3466 int x1= FFMIN(block_w*2, w-sx);
3467 int y1= FFMIN(block_w*2, h-sy);
3470 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3472 for(y=y0; y<y1; y++){
3473 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3474 const IDWTELEM *pred1 = pred + y*obmc_stride;
3475 uint8_t *cur1 = cur + y*ref_stride;
3476 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3477 for(x=x0; x<x1; x++){
3478 #if FRAC_BITS >= LOG2_OBMC_MAX
3479 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3481 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
3483 v = (v + pred1[x]) >> FRAC_BITS;
3484 if(v&(~255)) v= ~(v>>31);
3489 /* copy the regions where obmc[] = (uint8_t)256 */
3490 if(LOG2_OBMC_MAX == 8
3491 && (mb_x == 0 || mb_x == b_stride-1)
3492 && (mb_y == 0 || mb_y == b_height-1)){
3501 for(y=y0; y<y1; y++)
3502 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3506 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3507 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3508 /* FIXME cmps overlap but do not cover the wavelet's whole support.
3509 * So improving the score of one block is not strictly guaranteed
3510 * to improve the score of the whole frame, thus iterative motion
3511 * estimation does not always converge. */
3512 if(s->avctx->me_cmp == FF_CMP_W97)
3513 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3514 else if(s->avctx->me_cmp == FF_CMP_W53)
3515 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3519 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3520 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3525 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3534 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3536 if(mb_x == b_stride-2)
3537 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3539 return distortion + rate*penalty_factor;
3542 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3544 Plane *p= &s->plane[plane_index];
3545 const int block_size = MB_SIZE >> s->block_max_depth;
3546 const int block_w = plane_index ? block_size/2 : block_size;
3547 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3548 const int obmc_stride= plane_index ? block_size : 2*block_size;
3549 const int ref_stride= s->current_picture.linesize[plane_index];
3550 uint8_t *dst= s->current_picture.data[plane_index];
3551 uint8_t *src= s-> input_picture.data[plane_index];
3552 //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
3553 // const has only been removed from zero_dst to suppress a warning
3554 static IDWTELEM zero_dst[4096]; //FIXME
3555 const int b_stride = s->b_width << s->block_max_depth;
3556 const int w= p->width;
3557 const int h= p->height;
3560 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3563 int mb_x2= mb_x + (i%3) - 1;
3564 int mb_y2= mb_y + (i/3) - 1;
3565 int x= block_w*mb_x2 + block_w/2;
3566 int y= block_w*mb_y2 + block_w/2;
3568 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3569 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3571 //FIXME find a cleaner/simpler way to skip the outside stuff
3572 for(y2= y; y2<0; y2++)
3573 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3574 for(y2= h; y2<y+block_w; y2++)
3575 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3577 for(y2= y; y2<y+block_w; y2++)
3578 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3581 for(y2= y; y2<y+block_w; y2++)
3582 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3585 assert(block_w== 8 || block_w==16);
3586 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3590 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3591 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3599 rate = get_block_bits(s, mb_x, mb_y, 2);
3600 for(i=merged?4:0; i<9; i++){
3601 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3602 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3605 return distortion + rate*penalty_factor;
3608 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3610 for(level=decomposition_count-1; level>=0; level--){
3612 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
3613 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
3618 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
3619 const int support = type==1 ? 3 : 5;
3623 for(level=decomposition_count-1; level>=0; level--){
3624 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
3626 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3628 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
3635 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
3636 DWTCompose cs[MAX_DECOMPOSITIONS];
3638 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
3639 for(y=0; y<height; y+=4)
3640 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
3643 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3644 const int w= b->width;
3645 const int h= b->height;
3657 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3658 v= src[x + y*stride];
3661 t= src[x + (y-1)*stride];
3663 lt= src[x - 1 + (y-1)*stride];
3666 rt= src[x + 1 + (y-1)*stride];
3670 l= src[x - 1 + y*stride];
3672 if(orientation==1) ll= src[y + (x-2)*stride];
3673 else ll= src[x - 2 + y*stride];
3679 if(px<b->parent->width && py<b->parent->height)
3680 p= parent[px + py*2*stride];
3682 if(!(/*ll|*/l|lt|t|rt|p)){
3684 runs[run_index++]= run;
3692 max_index= run_index;
3693 runs[run_index++]= run;
3695 run= runs[run_index++];
3697 put_symbol2(&s->c, b->state[30], max_index, 0);
3698 if(run_index <= max_index)
3699 put_symbol2(&s->c, b->state[1], run, 3);
3702 if(s->c.bytestream_end - s->c.bytestream < w*40){
3703 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
3708 int /*ll=0, */l=0, lt=0, t=0, rt=0;
3709 v= src[x + y*stride];
3712 t= src[x + (y-1)*stride];
3714 lt= src[x - 1 + (y-1)*stride];
3717 rt= src[x + 1 + (y-1)*stride];
3721 l= src[x - 1 + y*stride];
3723 if(orientation==1) ll= src[y + (x-2)*stride];
3724 else ll= src[x - 2 + y*stride];
3730 if(px<b->parent->width && py<b->parent->height)
3731 p= parent[px + py*2*stride];
3733 if(/*ll|*/l|lt|t|rt|p){
3734 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3736 put_rac(&s->c, &b->state[0][context], !!v);
3739 run= runs[run_index++];
3741 if(run_index <= max_index)
3742 put_symbol2(&s->c, b->state[1], run, 3);
3750 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
3751 int l2= 2*FFABS(l) + (l<0);
3752 int t2= 2*FFABS(t) + (t<0);
3754 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
3755 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
3763 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
3764 // encode_subband_qtree(s, b, src, parent, stride, orientation);
3765 // encode_subband_z0run(s, b, src, parent, stride, orientation);
3766 return encode_subband_c0run(s, b, src, parent, stride, orientation);
3767 // encode_subband_dzr(s, b, src, parent, stride, orientation);
3770 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3771 const int b_stride= s->b_width << s->block_max_depth;
3772 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3773 BlockNode backup= *block;
3774 int rd, index, value;
3776 assert(mb_x>=0 && mb_y>=0);
3777 assert(mb_x<b_stride);
3780 block->color[0] = p[0];
3781 block->color[1] = p[1];
3782 block->color[2] = p[2];
3783 block->type |= BLOCK_INTRA;
3785 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3786 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3787 if(s->me_cache[index] == value)
3789 s->me_cache[index]= value;
3793 block->type &= ~BLOCK_INTRA;
3796 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3808 /* special case for int[2] args we discard afterwards,
3809 * fixes compilation problem with gcc 2.95 */
3810 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3811 int p[2] = {p0, p1};
3812 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3815 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3816 const int b_stride= s->b_width << s->block_max_depth;
3817 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3818 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3819 int rd, index, value;
3821 assert(mb_x>=0 && mb_y>=0);
3822 assert(mb_x<b_stride);
3823 assert(((mb_x|mb_y)&1) == 0);
3825 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3826 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3827 if(s->me_cache[index] == value)
3829 s->me_cache[index]= value;
3834 block->type &= ~BLOCK_INTRA;
3835 block[1]= block[b_stride]= block[b_stride+1]= *block;
3837 rd= get_4block_rd(s, mb_x, mb_y, 0);
3844 block[0]= backup[0];
3845 block[1]= backup[1];
3846 block[b_stride]= backup[2];
3847 block[b_stride+1]= backup[3];
3852 static void iterative_me(SnowContext *s){
3853 int pass, mb_x, mb_y;
3854 const int b_width = s->b_width << s->block_max_depth;
3855 const int b_height= s->b_height << s->block_max_depth;
3856 const int b_stride= b_width;
3860 RangeCoder r = s->c;
3861 uint8_t state[sizeof(s->block_state)];
3862 memcpy(state, s->block_state, sizeof(s->block_state));
3863 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3864 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3865 encode_q_branch(s, 0, mb_x, mb_y);
3867 memcpy(s->block_state, state, sizeof(s->block_state));
3870 for(pass=0; pass<25; pass++){
3873 for(mb_y= 0; mb_y<b_height; mb_y++){
3874 for(mb_x= 0; mb_x<b_width; mb_x++){
3875 int dia_change, i, j, ref;
3876 int best_rd= INT_MAX, ref_rd;
3877 BlockNode backup, ref_b;
3878 const int index= mb_x + mb_y * b_stride;
3879 BlockNode *block= &s->block[index];
3880 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3881 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3882 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3883 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3884 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3885 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3886 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3887 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3888 const int b_w= (MB_SIZE >> s->block_max_depth);
3889 uint8_t obmc_edged[b_w*2][b_w*2];
3891 if(pass && (block->type & BLOCK_OPT))
3893 block->type |= BLOCK_OPT;
3897 if(!s->me_cache_generation)
3898 memset(s->me_cache, 0, sizeof(s->me_cache));
3899 s->me_cache_generation += 1<<22;
3901 //FIXME precalculate
3904 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3906 for(y=0; y<b_w*2; y++)
3907 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3908 if(mb_x==b_stride-1)
3909 for(y=0; y<b_w*2; y++)
3910 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3912 for(x=0; x<b_w*2; x++)
3913 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3914 for(y=1; y<b_w; y++)
3915 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3917 if(mb_y==b_height-1){
3918 for(x=0; x<b_w*2; x++)
3919 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3920 for(y=b_w; y<b_w*2-1; y++)
3921 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3925 //skip stuff outside the picture
3926 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
3927 uint8_t *src= s-> input_picture.data[0];
3928 uint8_t *dst= s->current_picture.data[0];
3929 const int stride= s->current_picture.linesize[0];
3930 const int block_w= MB_SIZE >> s->block_max_depth;
3931 const int sx= block_w*mb_x - block_w/2;
3932 const int sy= block_w*mb_y - block_w/2;
3933 const int w= s->plane[0].width;
3934 const int h= s->plane[0].height;
3938 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3939 for(y=h; y<sy+block_w*2; y++)
3940 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3942 for(y=sy; y<sy+block_w*2; y++)
3943 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3945 if(sx+block_w*2 > w){
3946 for(y=sy; y<sy+block_w*2; y++)
3947 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3951 // intra(black) = neighbors' contribution to the current block
3953 color[i]= get_dc(s, mb_x, mb_y, i);
3955 // get previous score (cannot be cached due to OBMC)
3956 if(pass > 0 && (block->type&BLOCK_INTRA)){
3957 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3958 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3960 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3964 for(ref=0; ref < s->ref_frames; ref++){
3965 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3966 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3971 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3972 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3974 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3976 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3978 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3980 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3983 //FIXME avoid subpel interpolation / round to nearest integer
3986 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3988 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3989 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3990 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3991 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3997 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
4000 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
4002 //FIXME or try the standard 2 pass qpel or similar
4004 mvr[0][0]= block->mx;
4005 mvr[0][1]= block->my;
4006 if(ref_rd > best_rd){
4014 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
4015 //FIXME RD style color selection
4017 if(!same_block(block, &backup)){
4018 if(tb ) tb ->type &= ~BLOCK_OPT;
4019 if(lb ) lb ->type &= ~BLOCK_OPT;
4020 if(rb ) rb ->type &= ~BLOCK_OPT;
4021 if(bb ) bb ->type &= ~BLOCK_OPT;
4022 if(tlb) tlb->type &= ~BLOCK_OPT;
4023 if(trb) trb->type &= ~BLOCK_OPT;
4024 if(blb) blb->type &= ~BLOCK_OPT;
4025 if(brb) brb->type &= ~BLOCK_OPT;
4030 av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
4035 if(s->block_max_depth == 1){
4037 for(mb_y= 0; mb_y<b_height; mb_y+=2){
4038 for(mb_x= 0; mb_x<b_width; mb_x+=2){
4040 int best_rd, init_rd;
4041 const int index= mb_x + mb_y * b_stride;
4044 b[0]= &s->block[index];
4046 b[2]= b[0]+b_stride;
4048 if(same_block(b[0], b[1]) &&
4049 same_block(b[0], b[2]) &&
4050 same_block(b[0], b[3]))
4053 if(!s->me_cache_generation)
4054 memset(s->me_cache, 0, sizeof(s->me_cache));
4055 s->me_cache_generation += 1<<22;
4057 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
4059 //FIXME more multiref search?
4060 check_4block_inter(s, mb_x, mb_y,
4061 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
4062 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
4065 if(!(b[i]->type&BLOCK_INTRA))
4066 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
4068 if(init_rd != best_rd)
4072 av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
4076 static void encode_blocks(SnowContext *s, int search){
4081 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
4085 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
4086 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
4090 if(s->avctx->me_method == ME_ITER || !search)
4091 encode_q_branch2(s, 0, x, y);
4093 encode_q_branch (s, 0, x, y);
4098 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
4099 const int w= b->width;
4100 const int h= b->height;
4101 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4102 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
4103 int x,y, thres1, thres2;
4105 if(s->qlog == LOSSLESS_QLOG){
4108 dst[x + y*stride]= src[x + y*stride];
4112 bias= bias ? 0 : (3*qmul)>>3;
4113 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
4119 int i= src[x + y*stride];
4121 if((unsigned)(i+thres1) > thres2){
4124 i/= qmul; //FIXME optimize
4125 dst[x + y*stride]= i;
4129 i/= qmul; //FIXME optimize
4130 dst[x + y*stride]= -i;
4133 dst[x + y*stride]= 0;
4139 int i= src[x + y*stride];
4141 if((unsigned)(i+thres1) > thres2){
4144 i= (i + bias) / qmul; //FIXME optimize
4145 dst[x + y*stride]= i;
4149 i= (i + bias) / qmul; //FIXME optimize
4150 dst[x + y*stride]= -i;
4153 dst[x + y*stride]= 0;
4159 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
4160 const int w= b->width;
4161 const int h= b->height;
4162 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
4163 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4164 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
4167 if(s->qlog == LOSSLESS_QLOG) return;
4171 int i= src[x + y*stride];
4173 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
4175 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
4181 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4182 const int w= b->width;
4183 const int h= b->height;
4186 for(y=h-1; y>=0; y--){
4187 for(x=w-1; x>=0; x--){
4188 int i= x + y*stride;
4192 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4193 else src[i] -= src[i - 1];
4195 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4196 else src[i] -= src[i - 1];
4199 if(y) src[i] -= src[i - stride];
4205 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
4206 const int w= b->width;
4207 const int h= b->height;
4212 int i= x + y*stride;
4216 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
4217 else src[i] += src[i - 1];
4219 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
4220 else src[i] += src[i - 1];
4223 if(y) src[i] += src[i - stride];
4229 static void encode_qlogs(SnowContext *s){
4230 int plane_index, level, orientation;
4232 for(plane_index=0; plane_index<2; plane_index++){
4233 for(level=0; level<s->spatial_decomposition_count; level++){
4234 for(orientation=level ? 1:0; orientation<4; orientation++){
4235 if(orientation==2) continue;
4236 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
4242 static void encode_header(SnowContext *s){
4246 memset(kstate, MID_STATE, sizeof(kstate));
4248 put_rac(&s->c, kstate, s->keyframe);
4249 if(s->keyframe || s->always_reset){
4251 s->last_spatial_decomposition_type=
4255 s->last_block_max_depth= 0;
4256 for(plane_index=0; plane_index<2; plane_index++){
4257 Plane *p= &s->plane[plane_index];
4260 memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
4264 put_symbol(&s->c, s->header_state, s->version, 0);
4265 put_rac(&s->c, s->header_state, s->always_reset);
4266 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
4267 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
4268 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4269 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
4270 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
4271 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
4272 put_rac(&s->c, s->header_state, s->spatial_scalability);
4273 // put_rac(&s->c, s->header_state, s->rate_scalability);
4274 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
4281 for(plane_index=0; plane_index<2; plane_index++){
4282 Plane *p= &s->plane[plane_index];
4283 update_mc |= p->last_htaps != p->htaps;
4284 update_mc |= p->last_diag_mc != p->diag_mc;
4285 update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4287 put_rac(&s->c, s->header_state, update_mc);
4289 for(plane_index=0; plane_index<2; plane_index++){
4290 Plane *p= &s->plane[plane_index];
4291 put_rac(&s->c, s->header_state, p->diag_mc);
4292 put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
4293 for(i= p->htaps/2; i; i--)
4294 put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
4297 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4298 put_rac(&s->c, s->header_state, 1);
4299 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
4302 put_rac(&s->c, s->header_state, 0);
4305 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
4306 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
4307 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
4308 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
4309 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
4313 static void update_last_header_values(SnowContext *s){
4317 for(plane_index=0; plane_index<2; plane_index++){
4318 Plane *p= &s->plane[plane_index];
4319 p->last_diag_mc= p->diag_mc;
4320 p->last_htaps = p->htaps;
4321 memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
4325 s->last_spatial_decomposition_type = s->spatial_decomposition_type;
4326 s->last_qlog = s->qlog;
4327 s->last_qbias = s->qbias;
4328 s->last_mv_scale = s->mv_scale;
4329 s->last_block_max_depth = s->block_max_depth;
4330 s->last_spatial_decomposition_count = s->spatial_decomposition_count;
4333 static int qscale2qlog(int qscale){
4334 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
4335 + 61*QROOT/8; //<64 >60
4338 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
4340 /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
4341 * FIXME we know exact mv bits at this point,
4342 * but ratecontrol isn't set up to include them. */
4343 uint32_t coef_sum= 0;
4344 int level, orientation, delta_qlog;
4346 for(level=0; level<s->spatial_decomposition_count; level++){
4347 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4348 SubBand *b= &s->plane[0].band[level][orientation];
4349 IDWTELEM *buf= b->ibuf;
4350 const int w= b->width;
4351 const int h= b->height;
4352 const int stride= b->stride;
4353 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
4354 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
4355 const int qdiv= (1<<16)/qmul;
4357 //FIXME this is ugly
4360 buf[x+y*stride]= b->buf[x+y*stride];
4362 decorrelate(s, b, buf, stride, 1, 0);
4365 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
4369 /* ugly, ratecontrol just takes a sqrt again */
4370 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
4371 assert(coef_sum < INT_MAX);
4373 if(pict->pict_type == FF_I_TYPE){
4374 s->m.current_picture.mb_var_sum= coef_sum;
4375 s->m.current_picture.mc_mb_var_sum= 0;
4377 s->m.current_picture.mc_mb_var_sum= coef_sum;
4378 s->m.current_picture.mb_var_sum= 0;
4381 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
4382 if (pict->quality < 0)
4384 s->lambda= pict->quality * 3/2;
4385 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
4386 s->qlog+= delta_qlog;
4390 static void calculate_visual_weight(SnowContext *s, Plane *p){
4391 int width = p->width;
4392 int height= p->height;
4393 int level, orientation, x, y;
4395 for(level=0; level<s->spatial_decomposition_count; level++){
4396 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4397 SubBand *b= &p->band[level][orientation];
4398 IDWTELEM *ibuf= b->ibuf;
4401 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
4402 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
4403 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
4404 for(y=0; y<height; y++){
4405 for(x=0; x<width; x++){
4406 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
4411 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
4416 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4417 SnowContext *s = avctx->priv_data;
4418 RangeCoder * const c= &s->c;
4419 AVFrame *pict = data;
4420 const int width= s->avctx->width;
4421 const int height= s->avctx->height;
4422 int level, orientation, plane_index, i, y;
4423 uint8_t rc_header_bak[sizeof(s->header_state)];
4424 uint8_t rc_block_bak[sizeof(s->block_state)];
4426 ff_init_range_encoder(c, buf, buf_size);
4427 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4431 for(y=0; y<(height>>shift); y++)
4432 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4433 &pict->data[i][y * pict->linesize[i]],
4436 s->new_picture = *pict;
4438 s->m.picture_number= avctx->frame_number;
4439 if(avctx->flags&CODEC_FLAG_PASS2){
4441 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4442 s->keyframe= pict->pict_type==FF_I_TYPE;
4443 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4444 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4445 if (pict->quality < 0)
4449 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4451 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4454 if(s->pass1_rc && avctx->frame_number == 0)
4455 pict->quality= 2*FF_QP2LAMBDA;
4457 s->qlog= qscale2qlog(pict->quality);
4458 s->lambda = pict->quality * 3/2;
4460 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4461 s->qlog= LOSSLESS_QLOG;
4463 }//else keep previous frame's qlog until after motion estimation
4467 s->m.current_picture_ptr= &s->m.current_picture;
4468 s->m.last_picture.pts= s->m.current_picture.pts;
4469 s->m.current_picture.pts= pict->pts;
4470 if(pict->pict_type == FF_P_TYPE){
4471 int block_width = (width +15)>>4;
4472 int block_height= (height+15)>>4;
4473 int stride= s->current_picture.linesize[0];
4475 assert(s->current_picture.data[0]);
4476 assert(s->last_picture[0].data[0]);
4478 s->m.avctx= s->avctx;
4479 s->m.current_picture.data[0]= s->current_picture.data[0];
4480 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4481 s->m. new_picture.data[0]= s-> input_picture.data[0];
4482 s->m. last_picture_ptr= &s->m. last_picture;
4484 s->m. last_picture.linesize[0]=
4485 s->m. new_picture.linesize[0]=
4486 s->m.current_picture.linesize[0]= stride;
4487 s->m.uvlinesize= s->current_picture.linesize[1];
4489 s->m.height= height;
4490 s->m.mb_width = block_width;
4491 s->m.mb_height= block_height;
4492 s->m.mb_stride= s->m.mb_width+1;
4493 s->m.b8_stride= 2*s->m.mb_width+1;
4495 s->m.pict_type= pict->pict_type;
4496 s->m.me_method= s->avctx->me_method;
4497 s->m.me.scene_change_score=0;
4498 s->m.flags= s->avctx->flags;
4499 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4500 s->m.out_format= FMT_H263;
4501 s->m.unrestricted_mv= 1;
4503 s->m.lambda = s->lambda;
4504 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4505 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4507 s->m.dsp= s->dsp; //move
4513 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4514 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4519 if(pict->pict_type == FF_I_TYPE)
4520 s->spatial_decomposition_count= 5;
4522 s->spatial_decomposition_count= 5;
4524 s->m.pict_type = pict->pict_type;
4525 s->qbias= pict->pict_type == FF_P_TYPE ? 2 : 0;
4527 common_init_after_header(avctx);
4529 if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
4530 for(plane_index=0; plane_index<3; plane_index++){
4531 calculate_visual_weight(s, &s->plane[plane_index]);
4536 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4537 encode_blocks(s, 1);
4538 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4540 for(plane_index=0; plane_index<3; plane_index++){
4541 Plane *p= &s->plane[plane_index];
4545 // int bits= put_bits_count(&s->c.pb);
4547 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4549 if(pict->data[plane_index]) //FIXME gray hack
4552 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4555 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4558 && pict->pict_type == FF_P_TYPE
4559 && !(avctx->flags&CODEC_FLAG_PASS2)
4560 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4561 ff_init_range_encoder(c, buf, buf_size);
4562 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4563 pict->pict_type= FF_I_TYPE;
4565 s->current_picture.key_frame=1;
4569 if(s->qlog == LOSSLESS_QLOG){
4572 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4578 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4584 dwt_quantize(s, p, s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type);
4586 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4588 if(s->pass1_rc && plane_index==0){
4589 int delta_qlog = ratecontrol_1pass(s, pict);
4590 if (delta_qlog <= INT_MIN)
4593 //reordering qlog in the bitstream would eliminate this reset
4594 ff_init_range_encoder(c, buf, buf_size);
4595 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4596 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4598 encode_blocks(s, 0);
4602 for(level=0; level<s->spatial_decomposition_count; level++){
4603 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4604 SubBand *b= &p->band[level][orientation];
4607 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4609 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == FF_P_TYPE, 0);
4610 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4611 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4613 correlate(s, b, b->ibuf, b->stride, 1, 0);
4617 for(level=0; level<s->spatial_decomposition_count; level++){
4618 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4619 SubBand *b= &p->band[level][orientation];
4621 dequantize(s, b, b->ibuf, b->stride);
4625 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4626 if(s->qlog == LOSSLESS_QLOG){
4629 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4633 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4636 if(pict->pict_type == FF_I_TYPE){
4639 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4640 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4644 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4645 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4648 if(s->avctx->flags&CODEC_FLAG_PSNR){
4651 if(pict->data[plane_index]) //FIXME gray hack
4654 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4658 s->avctx->error[plane_index] += error;
4659 s->current_picture.error[plane_index] = error;
4664 update_last_header_values(s);
4666 release_buffer(avctx);
4668 s->current_picture.coded_picture_number = avctx->frame_number;
4669 s->current_picture.pict_type = pict->pict_type;
4670 s->current_picture.quality = pict->quality;
4671 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4672 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4673 s->m.current_picture.display_picture_number =
4674 s->m.current_picture.coded_picture_number = avctx->frame_number;
4675 s->m.current_picture.quality = pict->quality;
4676 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4678 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4680 if(avctx->flags&CODEC_FLAG_PASS1)
4681 ff_write_pass1_stats(&s->m);
4682 s->m.last_pict_type = s->m.pict_type;
4683 avctx->frame_bits = s->m.frame_bits;
4684 avctx->mv_bits = s->m.mv_bits;
4685 avctx->misc_bits = s->m.misc_bits;
4686 avctx->p_tex_bits = s->m.p_tex_bits;
4690 return ff_rac_terminate(c);
4693 static av_cold int encode_end(AVCodecContext *avctx)
4695 SnowContext *s = avctx->priv_data;
4698 if (s->input_picture.data[0])
4699 avctx->release_buffer(avctx, &s->input_picture);
4700 av_free(avctx->stats_out);
4705 AVCodec snow_encoder = {
4709 sizeof(SnowContext),
4713 .long_name = NULL_IF_CONFIG_SMALL("Snow"),
4723 #include "libavutil/lfg.h"
4728 int buffer[2][width*height];
4732 s.spatial_decomposition_count=6;
4733 s.spatial_decomposition_type=1;
4735 av_lfg_init(&prng, 1);
4737 printf("testing 5/3 DWT\n");
4738 for(i=0; i<width*height; i++)
4739 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4741 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4742 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4744 for(i=0; i<width*height; i++)
4745 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4747 printf("testing 9/7 DWT\n");
4748 s.spatial_decomposition_type=0;
4749 for(i=0; i<width*height; i++)
4750 buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
4752 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4753 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4755 for(i=0; i<width*height; i++)
4756 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
4759 printf("testing AC coder\n");
4760 memset(s.header_state, 0, sizeof(s.header_state));
4761 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4762 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4764 for(i=-256; i<256; i++){
4765 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4767 ff_rac_terminate(&s.c);
4769 memset(s.header_state, 0, sizeof(s.header_state));
4770 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4771 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4773 for(i=-256; i<256; i++){
4775 j= get_symbol(&s.c, s.header_state, 1);
4776 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4780 int level, orientation, x, y;
4781 int64_t errors[8][4];
4784 memset(errors, 0, sizeof(errors));
4785 s.spatial_decomposition_count=3;
4786 s.spatial_decomposition_type=0;
4787 for(level=0; level<s.spatial_decomposition_count; level++){
4788 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4789 int w= width >> (s.spatial_decomposition_count-level);
4790 int h= height >> (s.spatial_decomposition_count-level);
4791 int stride= width << (s.spatial_decomposition_count-level);
4792 DWTELEM *buf= buffer[0];
4795 if(orientation&1) buf+=w;
4796 if(orientation>1) buf+=stride>>1;
4798 memset(buffer[0], 0, sizeof(int)*width*height);
4799 buf[w/2 + h/2*stride]= 256*256;
4800 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4801 for(y=0; y<height; y++){
4802 for(x=0; x<width; x++){
4803 int64_t d= buffer[0][x + y*width];
4805 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4807 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4809 error= (int)(sqrt(error)+0.5);
4810 errors[level][orientation]= error;
4811 if(g) g=av_gcd(g, error);
4815 printf("static int const visual_weight[][4]={\n");
4816 for(level=0; level<s.spatial_decomposition_count; level++){
4818 for(orientation=0; orientation<4; orientation++){
4819 printf("%8"PRId64",", errors[level][orientation]/g);
4826 int w= width >> (s.spatial_decomposition_count-level);
4827 //int h= height >> (s.spatial_decomposition_count-level);
4828 int stride= width << (s.spatial_decomposition_count-level);
4829 DWTELEM *buf= buffer[0];
4835 memset(buffer[0], 0, sizeof(int)*width*height);
4837 for(y=0; y<height; y++){
4838 for(x=0; x<width; x++){
4839 int tab[4]={0,2,3,1};
4840 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4843 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4847 buf[x + y*stride ]=169;
4848 buf[x + y*stride-w]=64;
4851 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4853 for(y=0; y<height; y++){
4854 for(x=0; x<width; x++){
4855 int64_t d= buffer[0][x + y*width];
4857 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4859 if(FFABS(height/2-y)<9) printf("\n");