2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49 static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103 static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121 static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139 static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157 static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232 static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 static const uint8_t obmc16[256]={
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
287 static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
344 static const uint8_t obmc8[64]={
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
357 static const uint8_t obmc4[16]={
365 static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
369 typedef struct BlockNode{
374 //#define TYPE_SPLIT 1
375 #define BLOCK_INTRA 1
377 //#define TYPE_NOCOLOR 4
378 uint8_t level; //FIXME merge into type?
381 static const BlockNode null_block= { //FIXME add border maybe
382 .color= {128,128,128},
389 #define LOG2_MB_SIZE 4
390 #define MB_SIZE (1<<LOG2_MB_SIZE)
392 typedef struct x_and_coeff{
397 typedef struct SubBand{
402 int qlog; ///< log(qscale)/log[2^(1/6)]
406 int stride_line; ///< Stride measured in lines, not pixels.
407 x_and_coeff * x_coeff;
408 struct SubBand *parent;
409 uint8_t state[/*7*2*/ 7 + 512][32];
412 typedef struct Plane{
415 SubBand band[MAX_DECOMPOSITIONS][4];
418 typedef struct SnowContext{
419 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
421 AVCodecContext *avctx;
425 AVFrame input_picture; ///< new_picture with the internal linesizes
426 AVFrame current_picture;
427 AVFrame last_picture;
428 AVFrame mconly_picture;
429 // uint8_t q_context[16];
430 uint8_t header_state[32];
431 uint8_t block_state[128 + 32*128];
435 int spatial_decomposition_type;
436 int temporal_decomposition_type;
437 int spatial_decomposition_count;
438 int temporal_decomposition_count;
439 DWTELEM *spatial_dwt_buffer;
443 int spatial_scalability;
449 #define QBIAS_SHIFT 3
453 Plane plane[MAX_PLANES];
455 #define ME_CACHE_SIZE 1024
456 int me_cache[ME_CACHE_SIZE];
457 int me_cache_generation;
460 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
471 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
472 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
474 static void iterative_me(SnowContext *s);
476 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
480 buf->base_buffer = base_buffer;
481 buf->line_count = line_count;
482 buf->line_width = line_width;
483 buf->data_count = max_allocated_lines;
484 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
485 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
487 for (i = 0; i < max_allocated_lines; i++)
489 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
492 buf->data_stack_top = max_allocated_lines - 1;
495 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
500 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
502 assert(buf->data_stack_top >= 0);
503 // assert(!buf->line[line]);
505 return buf->line[line];
507 offset = buf->line_width * line;
508 buffer = buf->data_stack[buf->data_stack_top];
509 buf->data_stack_top--;
510 buf->line[line] = buffer;
512 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
517 static void slice_buffer_release(slice_buffer * buf, int line)
522 assert(line >= 0 && line < buf->line_count);
523 assert(buf->line[line]);
525 offset = buf->line_width * line;
526 buffer = buf->line[line];
527 buf->data_stack_top++;
528 buf->data_stack[buf->data_stack_top] = buffer;
529 buf->line[line] = NULL;
531 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
534 static void slice_buffer_flush(slice_buffer * buf)
537 for (i = 0; i < buf->line_count; i++)
541 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
542 slice_buffer_release(buf, i);
547 static void slice_buffer_destroy(slice_buffer * buf)
550 slice_buffer_flush(buf);
552 for (i = buf->data_count - 1; i >= 0; i--)
554 assert(buf->data_stack[i]);
555 av_freep(&buf->data_stack[i]);
557 assert(buf->data_stack);
558 av_freep(&buf->data_stack);
560 av_freep(&buf->line);
564 // Avoid a name clash on SGI IRIX
567 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
568 static uint8_t qexp[QROOT];
570 static inline int mirror(int v, int m){
571 while((unsigned)v > (unsigned)m){
578 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
583 const int e= av_log2(a);
585 const int el= FFMIN(e, 10);
586 put_rac(c, state+0, 0);
589 put_rac(c, state+1+i, 1); //1..10
592 put_rac(c, state+1+9, 1); //1..10
594 put_rac(c, state+1+FFMIN(i,9), 0);
596 for(i=e-1; i>=el; i--){
597 put_rac(c, state+22+9, (a>>i)&1); //22..31
600 put_rac(c, state+22+i, (a>>i)&1); //22..31
604 put_rac(c, state+11 + el, v < 0); //11..21
607 put_rac(c, state+0, 0);
610 put_rac(c, state+1+i, 1); //1..10
612 put_rac(c, state+1+i, 0);
614 for(i=e-1; i>=0; i--){
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
619 put_rac(c, state+11 + e, v < 0); //11..21
622 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
624 put_rac(c, state+1+FFMIN(i,9), 0);
626 for(i=e-1; i>=0; i--){
627 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
631 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
635 put_rac(c, state+0, 1);
639 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
640 if(get_rac(c, state+0))
645 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
650 for(i=e-1; i>=0; i--){
651 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
654 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
661 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
663 int r= log2>=0 ? 1<<log2 : 1;
669 put_rac(c, state+4+log2, 1);
674 put_rac(c, state+4+log2, 0);
676 for(i=log2-1; i>=0; i--){
677 put_rac(c, state+31-i, (v>>i)&1);
681 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
683 int r= log2>=0 ? 1<<log2 : 1;
688 while(get_rac(c, state+4+log2)){
694 for(i=log2-1; i>=0; i--){
695 v+= get_rac(c, state+31-i)<<i;
701 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
702 const int mirror_left= !highpass;
703 const int mirror_right= (width&1) ^ highpass;
704 const int w= (width>>1) - 1 + (highpass & width);
707 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
709 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
715 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
719 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
724 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
725 const int mirror_left= !highpass;
726 const int mirror_right= (width&1) ^ highpass;
727 const int w= (width>>1) - 1 + (highpass & width);
734 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
740 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
743 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
747 int r= 3*2*ref[w*ref_step];
750 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
756 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
757 const int mirror_left= !highpass;
758 const int mirror_right= (width&1) ^ highpass;
759 const int w= (width>>1) - 1 + (highpass & width);
763 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
765 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
771 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
775 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
781 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
784 for(x=start; x<width; x+=2){
788 int x2= x + 2*i - n + 1;
790 else if(x2>=width) x2= 2*width-x2-2;
791 sum += coeffs[i]*(int64_t)dst[x2];
793 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
794 else dst[x] += (sum + (1<<shift)/2)>>shift;
798 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
800 for(y=start; y<height; y+=2){
801 for(x=0; x<width; x++){
805 int y2= y + 2*i - n + 1;
807 else if(y2>=height) y2= 2*height-y2-2;
808 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
810 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
820 #if 0 // more accurate 9/7
823 #define COEFFS1 (int[]){-25987,-25987}
826 #define COEFFS2 (int[]){-27777,-27777}
829 #define COEFFS3 (int[]){28931,28931}
832 #define COEFFS4 (int[]){14533,14533}
836 #define COEFFS1 (int[]){1,-9,-9,1}
839 #define COEFFS2 (int[]){-1,5,5,-1}
852 #define COEFFS1 (int[]){1,1}
855 #define COEFFS2 (int[]){-1,-1}
868 #define COEFFS2 (int[]){-1,-1}
871 #define COEFFS3 (int[]){-1,-1}
874 #define COEFFS4 (int[]){-5,29,29,-5}
879 #define COEFFS1 (int[]){-203,-203}
882 #define COEFFS2 (int[]){-217,-217}
885 #define COEFFS3 (int[]){113,113}
888 #define COEFFS4 (int[]){227,227}
896 #define COEFFS2 (int[]){-1,-1}
899 #define COEFFS3 (int[]){-1,-1}
902 #define COEFFS4 (int[]){3,3}
906 #define COEFFS1 (int[]){1,-9,-9,1}
909 #define COEFFS2 (int[]){1,1}
919 #define COEFFS1 (int[]){1,-9,-9,1}
922 #define COEFFS2 (int[]){-1,5,5,-1}
930 static void horizontal_decomposeX(DWTELEM *b, int width){
932 const int width2= width>>1;
933 const int w2= (width+1)>>1;
936 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
937 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
938 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
939 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
941 for(x=0; x<width2; x++){
943 temp[x+w2]= b[2*x + 1];
947 memcpy(b, temp, width*sizeof(int));
950 static void horizontal_composeX(DWTELEM *b, int width){
952 const int width2= width>>1;
954 const int w2= (width+1)>>1;
956 memcpy(temp, b, width*sizeof(int));
957 for(x=0; x<width2; x++){
959 b[2*x + 1]= temp[x+w2];
964 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
965 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
966 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
967 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
970 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
973 for(y=0; y<height; y++){
974 for(x=0; x<width; x++){
975 buffer[y*stride + x] *= SCALEX;
979 for(y=0; y<height; y++){
980 horizontal_decomposeX(buffer + y*stride, width);
983 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
984 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
985 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
986 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
989 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
992 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
993 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
994 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
995 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
997 for(y=0; y<height; y++){
998 horizontal_composeX(buffer + y*stride, width);
1001 for(y=0; y<height; y++){
1002 for(x=0; x<width; x++){
1003 buffer[y*stride + x] /= SCALEX;
1008 static void horizontal_decompose53i(DWTELEM *b, int width){
1009 DWTELEM temp[width];
1010 const int width2= width>>1;
1012 const int w2= (width+1)>>1;
1014 for(x=0; x<width2; x++){
1016 temp[x+w2]= b[2*x + 1];
1030 for(x=1; x+1<width2; x+=2){
1034 A2 += (A1 + A3 + 2)>>2;
1038 A1= temp[x+1+width2];
1041 A4 += (A1 + A3 + 2)>>2;
1047 A2 += (A1 + A3 + 2)>>2;
1052 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1053 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1057 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1060 for(i=0; i<width; i++){
1061 b1[i] -= (b0[i] + b2[i])>>1;
1065 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1068 for(i=0; i<width; i++){
1069 b1[i] += (b0[i] + b2[i] + 2)>>2;
1073 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1075 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1076 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1078 for(y=-2; y<height; y+=2){
1079 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1080 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1083 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1084 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1085 STOP_TIMER("horizontal_decompose53i")}
1088 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1089 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1090 STOP_TIMER("vertical_decompose53i*")}
1097 static void horizontal_decompose97i(DWTELEM *b, int width){
1098 DWTELEM temp[width];
1099 const int w2= (width+1)>>1;
1101 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1102 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1103 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1104 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1108 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1111 for(i=0; i<width; i++){
1112 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1116 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1119 for(i=0; i<width; i++){
1121 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1123 int r= 3*(b0[i] + b2[i]);
1126 b1[i] += (r+W_CO)>>W_CS;
1131 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1134 for(i=0; i<width; i++){
1136 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1138 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1143 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1146 for(i=0; i<width; i++){
1147 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1151 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1153 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1154 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1155 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1156 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1158 for(y=-4; y<height; y+=2){
1159 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1160 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1163 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1164 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1166 STOP_TIMER("horizontal_decompose97i")
1170 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1171 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1172 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1173 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1176 STOP_TIMER("vertical_decompose97i")
1186 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1189 for(level=0; level<decomposition_count; level++){
1191 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1192 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1193 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1198 static void horizontal_compose53i(DWTELEM *b, int width){
1199 DWTELEM temp[width];
1200 const int width2= width>>1;
1201 const int w2= (width+1)>>1;
1213 for(x=1; x+1<width2; x+=2){
1217 A2 += (A1 + A3 + 2)>>2;
1221 A1= temp[x+1+width2];
1224 A4 += (A1 + A3 + 2)>>2;
1230 A2 += (A1 + A3 + 2)>>2;
1234 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1235 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1237 for(x=0; x<width2; x++){
1239 b[2*x + 1]= temp[x+w2];
1245 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1248 for(i=0; i<width; i++){
1249 b1[i] += (b0[i] + b2[i])>>1;
1253 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1256 for(i=0; i<width; i++){
1257 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1261 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1262 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1263 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1267 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1268 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1269 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1273 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1276 DWTELEM *b0= cs->b0;
1277 DWTELEM *b1= cs->b1;
1278 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1279 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1282 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1283 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1284 STOP_TIMER("vertical_compose53i*")}
1287 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1288 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1289 STOP_TIMER("horizontal_compose53i")}
1296 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1298 DWTELEM *b0= cs->b0;
1299 DWTELEM *b1= cs->b1;
1300 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1301 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1304 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1305 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1306 STOP_TIMER("vertical_compose53i*")}
1309 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1310 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1311 STOP_TIMER("horizontal_compose53i")}
1318 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1320 spatial_compose53i_init(&cs, buffer, height, stride);
1321 while(cs.y <= height)
1322 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1326 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1327 DWTELEM temp[width];
1328 const int w2= (width+1)>>1;
1330 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1331 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1332 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1333 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1336 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1339 for(i=0; i<width; i++){
1340 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1344 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1347 for(i=0; i<width; i++){
1349 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1351 int r= 3*(b0[i] + b2[i]);
1354 b1[i] -= (r+W_CO)>>W_CS;
1359 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1362 for(i=0; i<width; i++){
1364 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1366 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1371 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1374 for(i=0; i<width; i++){
1375 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1379 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1382 for(i=0; i<width; i++){
1386 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1388 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1390 r= 3*(b2[i] + b4[i]);
1393 b3[i] -= (r+W_CO)>>W_CS;
1396 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1398 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1400 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1404 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1405 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1406 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1407 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1408 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1412 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1413 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1414 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1415 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1416 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1420 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1423 DWTELEM *b0= cs->b0;
1424 DWTELEM *b1= cs->b1;
1425 DWTELEM *b2= cs->b2;
1426 DWTELEM *b3= cs->b3;
1427 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1428 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1431 if(y>0 && y+4<height){
1432 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1434 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1435 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1436 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1437 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1440 STOP_TIMER("vertical_compose97i")}}
1443 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1444 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1445 if(width>400 && y+0<(unsigned)height){
1446 STOP_TIMER("horizontal_compose97i")}}
1455 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1457 DWTELEM *b0= cs->b0;
1458 DWTELEM *b1= cs->b1;
1459 DWTELEM *b2= cs->b2;
1460 DWTELEM *b3= cs->b3;
1461 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1462 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1465 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1466 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1467 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1468 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1470 STOP_TIMER("vertical_compose97i")}}
1473 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1474 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1475 if(width>400 && b0 <= b2){
1476 STOP_TIMER("horizontal_compose97i")}}
1485 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1487 spatial_compose97i_init(&cs, buffer, height, stride);
1488 while(cs.y <= height)
1489 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1492 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1494 for(level=decomposition_count-1; level>=0; level--){
1496 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1497 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1498 /* not slicified yet */
1499 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1500 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1505 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1507 for(level=decomposition_count-1; level>=0; level--){
1509 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1510 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1511 /* not slicified yet */
1512 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1517 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1518 const int support = type==1 ? 3 : 5;
1522 for(level=decomposition_count-1; level>=0; level--){
1523 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1525 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1527 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1535 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1536 const int support = type==1 ? 3 : 5;
1540 for(level=decomposition_count-1; level>=0; level--){
1541 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1543 case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1545 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1553 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1556 for(level=decomposition_count-1; level>=0; level--)
1557 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1559 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1561 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1562 for(y=0; y<height; y+=4)
1563 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1567 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1568 const int w= b->width;
1569 const int h= b->height;
1581 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1582 v= src[x + y*stride];
1585 t= src[x + (y-1)*stride];
1587 lt= src[x - 1 + (y-1)*stride];
1590 rt= src[x + 1 + (y-1)*stride];
1594 l= src[x - 1 + y*stride];
1596 if(orientation==1) ll= src[y + (x-2)*stride];
1597 else ll= src[x - 2 + y*stride];
1603 if(px<b->parent->width && py<b->parent->height)
1604 p= parent[px + py*2*stride];
1606 if(!(/*ll|*/l|lt|t|rt|p)){
1608 runs[run_index++]= run;
1616 max_index= run_index;
1617 runs[run_index++]= run;
1619 run= runs[run_index++];
1621 put_symbol2(&s->c, b->state[30], max_index, 0);
1622 if(run_index <= max_index)
1623 put_symbol2(&s->c, b->state[1], run, 3);
1626 if(s->c.bytestream_end - s->c.bytestream < w*40){
1627 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1632 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1633 v= src[x + y*stride];
1636 t= src[x + (y-1)*stride];
1638 lt= src[x - 1 + (y-1)*stride];
1641 rt= src[x + 1 + (y-1)*stride];
1645 l= src[x - 1 + y*stride];
1647 if(orientation==1) ll= src[y + (x-2)*stride];
1648 else ll= src[x - 2 + y*stride];
1654 if(px<b->parent->width && py<b->parent->height)
1655 p= parent[px + py*2*stride];
1657 if(/*ll|*/l|lt|t|rt|p){
1658 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1660 put_rac(&s->c, &b->state[0][context], !!v);
1663 run= runs[run_index++];
1665 if(run_index <= max_index)
1666 put_symbol2(&s->c, b->state[1], run, 3);
1674 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1675 int l2= 2*ABS(l) + (l<0);
1676 int t2= 2*ABS(t) + (t<0);
1678 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1679 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1687 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1688 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1689 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1690 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1691 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1694 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1695 const int w= b->width;
1696 const int h= b->height;
1701 x_and_coeff *xc= b->x_coeff;
1702 x_and_coeff *prev_xc= NULL;
1703 x_and_coeff *prev2_xc= xc;
1704 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1705 x_and_coeff *prev_parent_xc= parent_xc;
1707 runs= get_symbol2(&s->c, b->state[30], 0);
1708 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1713 int lt=0, t=0, rt=0;
1715 if(y && prev_xc->x == 0){
1727 if(prev_xc->x == x + 1)
1733 if(x>>1 > parent_xc->x){
1736 if(x>>1 == parent_xc->x){
1737 p= parent_xc->coeff;
1740 if(/*ll|*/l|lt|t|rt|p){
1741 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1743 v=get_rac(&s->c, &b->state[0][context]);
1745 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1746 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1753 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1755 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1756 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1765 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1766 else max_run= FFMIN(run, w-x-1);
1768 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1774 (xc++)->x= w+1; //end marker
1780 while(parent_xc->x != parent->width+1)
1783 prev_parent_xc= parent_xc;
1785 parent_xc= prev_parent_xc;
1790 (xc++)->x= w+1; //end marker
1794 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1795 const int w= b->width;
1797 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1798 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1799 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1804 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1809 /* If we are on the second or later slice, restore our index. */
1811 new_index = save_state[0];
1814 for(y=start_y; y<h; y++){
1817 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1818 memset(line, 0, b->width*sizeof(DWTELEM));
1819 v = b->x_coeff[new_index].coeff;
1820 x = b->x_coeff[new_index++].x;
1823 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1824 register int u= -(v&1);
1825 line[x] = (t^u) - u;
1827 v = b->x_coeff[new_index].coeff;
1828 x = b->x_coeff[new_index++].x;
1831 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1832 STOP_TIMER("decode_subband")
1835 /* Save our variables for the next slice. */
1836 save_state[0] = new_index;
1841 static void reset_contexts(SnowContext *s){
1842 int plane_index, level, orientation;
1844 for(plane_index=0; plane_index<3; plane_index++){
1845 for(level=0; level<s->spatial_decomposition_count; level++){
1846 for(orientation=level ? 1:0; orientation<4; orientation++){
1847 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1851 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1852 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1855 static int alloc_blocks(SnowContext *s){
1856 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1857 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1862 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1866 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1867 uint8_t *bytestream= d->bytestream;
1868 uint8_t *bytestream_start= d->bytestream_start;
1870 d->bytestream= bytestream;
1871 d->bytestream_start= bytestream_start;
1874 //near copy & paste from dsputil, FIXME
1875 static int pix_sum(uint8_t * pix, int line_size, int w)
1880 for (i = 0; i < w; i++) {
1881 for (j = 0; j < w; j++) {
1885 pix += line_size - w;
1890 //near copy & paste from dsputil, FIXME
1891 static int pix_norm1(uint8_t * pix, int line_size, int w)
1894 uint32_t *sq = squareTbl + 256;
1897 for (i = 0; i < w; i++) {
1898 for (j = 0; j < w; j ++) {
1902 pix += line_size - w;
1907 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1908 const int w= s->b_width << s->block_max_depth;
1909 const int rem_depth= s->block_max_depth - level;
1910 const int index= (x + y*w) << rem_depth;
1911 const int block_w= 1<<rem_depth;
1923 for(j=0; j<block_w; j++){
1924 for(i=0; i<block_w; i++){
1925 s->block[index + i + j*w]= block;
1930 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1931 const int offset[3]= {
1933 ((y*c->uvstride + x)>>1),
1934 ((y*c->uvstride + x)>>1),
1938 c->src[0][i]= src [i];
1939 c->ref[0][i]= ref [i] + offset[i];
1947 #define P_TOPRIGHT P[3]
1948 #define P_MEDIAN P[4]
1950 #define FLAG_QPEL 1 //must be 1
1952 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1953 uint8_t p_buffer[1024];
1954 uint8_t i_buffer[1024];
1955 uint8_t p_state[sizeof(s->block_state)];
1956 uint8_t i_state[sizeof(s->block_state)];
1958 uint8_t *pbbak= s->c.bytestream;
1959 uint8_t *pbbak_start= s->c.bytestream_start;
1960 int score, score2, iscore, i_len, p_len, block_s, sum;
1961 const int w= s->b_width << s->block_max_depth;
1962 const int h= s->b_height << s->block_max_depth;
1963 const int rem_depth= s->block_max_depth - level;
1964 const int index= (x + y*w) << rem_depth;
1965 const int block_w= 1<<(LOG2_MB_SIZE - level);
1966 int trx= (x+1)<<rem_depth;
1967 int try= (y+1)<<rem_depth;
1968 BlockNode *left = x ? &s->block[index-1] : &null_block;
1969 BlockNode *top = y ? &s->block[index-w] : &null_block;
1970 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1971 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1972 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1973 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1974 int pl = left->color[0];
1975 int pcb= left->color[1];
1976 int pcr= left->color[2];
1977 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1978 int pmy= mid_pred(left->my, top->my, tr->my);
1981 const int stride= s->current_picture.linesize[0];
1982 const int uvstride= s->current_picture.linesize[1];
1983 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1984 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1985 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1987 int16_t last_mv[3][2];
1988 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1989 const int shift= 1+qpel;
1990 MotionEstContext *c= &s->m.me;
1991 int mx_context= av_log2(2*ABS(left->mx - top->mx));
1992 int my_context= av_log2(2*ABS(left->my - top->my));
1993 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1995 assert(sizeof(s->block_state) >= 256);
1997 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2001 // clip predictors / edge ?
2003 P_LEFT[0]= left->mx;
2004 P_LEFT[1]= left->my;
2007 P_TOPRIGHT[0]= tr->mx;
2008 P_TOPRIGHT[1]= tr->my;
2010 last_mv[0][0]= s->block[index].mx;
2011 last_mv[0][1]= s->block[index].my;
2012 last_mv[1][0]= right->mx;
2013 last_mv[1][1]= right->my;
2014 last_mv[2][0]= bottom->mx;
2015 last_mv[2][1]= bottom->my;
2022 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2024 assert(s->m.me. stride == stride);
2025 assert(s->m.me.uvstride == uvstride);
2027 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2028 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2029 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2030 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2032 c->xmin = - x*block_w - 16+2;
2033 c->ymin = - y*block_w - 16+2;
2034 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2035 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2037 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2038 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2039 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2040 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2041 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2042 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2043 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2045 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2046 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2049 c->pred_x= P_LEFT[0];
2050 c->pred_y= P_LEFT[1];
2052 c->pred_x = P_MEDIAN[0];
2053 c->pred_y = P_MEDIAN[1];
2056 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2057 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2059 assert(mx >= c->xmin);
2060 assert(mx <= c->xmax);
2061 assert(my >= c->ymin);
2062 assert(my <= c->ymax);
2064 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2065 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2066 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2070 pc.bytestream_start=
2071 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2072 memcpy(p_state, s->block_state, sizeof(s->block_state));
2074 if(level!=s->block_max_depth)
2075 put_rac(&pc, &p_state[4 + s_context], 1);
2076 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2077 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2078 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2079 p_len= pc.bytestream - pc.bytestream_start;
2080 score += (s->lambda2*(p_len*8
2081 + (pc.outstanding_count - s->c.outstanding_count)*8
2082 + (-av_log2(pc.range) + av_log2(s->c.range))
2083 ))>>FF_LAMBDA_SHIFT;
2085 block_s= block_w*block_w;
2086 sum = pix_sum(current_data[0], stride, block_w);
2087 l= (sum + block_s/2)/block_s;
2088 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2090 block_s= block_w*block_w>>2;
2091 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2092 cb= (sum + block_s/2)/block_s;
2093 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2094 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2095 cr= (sum + block_s/2)/block_s;
2096 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2099 ic.bytestream_start=
2100 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2101 memcpy(i_state, s->block_state, sizeof(s->block_state));
2102 if(level!=s->block_max_depth)
2103 put_rac(&ic, &i_state[4 + s_context], 1);
2104 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2105 put_symbol(&ic, &i_state[32], l-pl , 1);
2106 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2107 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2108 i_len= ic.bytestream - ic.bytestream_start;
2109 iscore += (s->lambda2*(i_len*8
2110 + (ic.outstanding_count - s->c.outstanding_count)*8
2111 + (-av_log2(ic.range) + av_log2(s->c.range))
2112 ))>>FF_LAMBDA_SHIFT;
2114 // assert(score==256*256*256*64-1);
2115 assert(iscore < 255*255*256 + s->lambda2*10);
2116 assert(iscore >= 0);
2117 assert(l>=0 && l<=255);
2118 assert(pl>=0 && pl<=255);
2121 int varc= iscore >> 8;
2122 int vard= score >> 8;
2123 if (vard <= 64 || vard < varc)
2124 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2126 c->scene_change_score+= s->m.qscale;
2129 if(level!=s->block_max_depth){
2130 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2131 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2132 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2133 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2134 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2135 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2137 if(score2 < score && score2 < iscore)
2142 memcpy(pbbak, i_buffer, i_len);
2144 s->c.bytestream_start= pbbak_start;
2145 s->c.bytestream= pbbak + i_len;
2146 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2147 memcpy(s->block_state, i_state, sizeof(s->block_state));
2150 memcpy(pbbak, p_buffer, p_len);
2152 s->c.bytestream_start= pbbak_start;
2153 s->c.bytestream= pbbak + p_len;
2154 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2155 memcpy(s->block_state, p_state, sizeof(s->block_state));
2160 static always_inline int same_block(BlockNode *a, BlockNode *b){
2161 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2162 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2164 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2168 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2169 const int w= s->b_width << s->block_max_depth;
2170 const int rem_depth= s->block_max_depth - level;
2171 const int index= (x + y*w) << rem_depth;
2172 int trx= (x+1)<<rem_depth;
2173 BlockNode *b= &s->block[index];
2174 BlockNode *left = x ? &s->block[index-1] : &null_block;
2175 BlockNode *top = y ? &s->block[index-w] : &null_block;
2176 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2177 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2178 int pl = left->color[0];
2179 int pcb= left->color[1];
2180 int pcr= left->color[2];
2181 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2182 int pmy= mid_pred(left->my, top->my, tr->my);
2183 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2184 int my_context= av_log2(2*ABS(left->my - top->my));
2185 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2188 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2192 if(level!=s->block_max_depth){
2193 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2194 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2196 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2197 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2198 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2199 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2200 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2204 if(b->type & BLOCK_INTRA){
2205 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2206 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2207 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2208 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2209 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2211 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2212 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2213 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2214 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2218 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2219 const int w= s->b_width << s->block_max_depth;
2220 const int rem_depth= s->block_max_depth - level;
2221 const int index= (x + y*w) << rem_depth;
2222 int trx= (x+1)<<rem_depth;
2223 BlockNode *left = x ? &s->block[index-1] : &null_block;
2224 BlockNode *top = y ? &s->block[index-w] : &null_block;
2225 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2226 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2227 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2230 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2234 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2236 int l = left->color[0];
2237 int cb= left->color[1];
2238 int cr= left->color[2];
2239 int mx= mid_pred(left->mx, top->mx, tr->mx);
2240 int my= mid_pred(left->my, top->my, tr->my);
2241 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2242 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2244 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2247 l += get_symbol(&s->c, &s->block_state[32], 1);
2248 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2249 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2251 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2252 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2254 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2256 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2257 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2258 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2259 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2263 static void encode_blocks(SnowContext *s){
2268 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2272 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2273 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2277 if(s->avctx->me_method == ME_ITER)
2278 encode_q_branch2(s, 0, x, y);
2280 encode_q_branch (s, 0, x, y);
2285 static void decode_blocks(SnowContext *s){
2292 decode_q_branch(s, 0, x, y);
2297 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2300 for(y=0; y < b_h+5; y++){
2301 for(x=0; x < b_w; x++){
2308 // int am= 9*(a1+a2) - (a0+a3);
2309 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2310 // int am= 18*(a2+a3) - 2*(a1+a4);
2311 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2312 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2314 // if(b_w==16) am= 8*(a1+a2);
2316 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2317 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2319 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2320 if(am&(~255)) am= ~(am>>31);
2324 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2325 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2326 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2327 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2332 tmp -= (b_h+5)*stride;
2334 for(y=0; y < b_h; y++){
2335 for(x=0; x < b_w; x++){
2336 int a0= tmp[x + 0*stride];
2337 int a1= tmp[x + 1*stride];
2338 int a2= tmp[x + 2*stride];
2339 int a3= tmp[x + 3*stride];
2340 int a4= tmp[x + 4*stride];
2341 int a5= tmp[x + 5*stride];
2342 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2343 // int am= 18*(a2+a3) - 2*(a1+a4);
2344 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2345 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2347 // if(b_w==16) am= 8*(a1+a2);
2349 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2350 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2352 if(am&(~255)) am= ~(am>>31);
2355 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2356 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2357 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2358 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2363 STOP_TIMER("mc_block")
2366 #define mca(dx,dy,b_w)\
2367 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2368 uint8_t tmp[stride*(b_w+5)];\
2370 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2382 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2383 if(block->type & BLOCK_INTRA){
2385 const int color = block->color[plane_index];
2386 const int color4= color*0x01010101;
2388 for(y=0; y < b_h; y++){
2389 *(uint32_t*)&dst[0 + y*stride]= color4;
2390 *(uint32_t*)&dst[4 + y*stride]= color4;
2391 *(uint32_t*)&dst[8 + y*stride]= color4;
2392 *(uint32_t*)&dst[12+ y*stride]= color4;
2393 *(uint32_t*)&dst[16+ y*stride]= color4;
2394 *(uint32_t*)&dst[20+ y*stride]= color4;
2395 *(uint32_t*)&dst[24+ y*stride]= color4;
2396 *(uint32_t*)&dst[28+ y*stride]= color4;
2399 for(y=0; y < b_h; y++){
2400 *(uint32_t*)&dst[0 + y*stride]= color4;
2401 *(uint32_t*)&dst[4 + y*stride]= color4;
2402 *(uint32_t*)&dst[8 + y*stride]= color4;
2403 *(uint32_t*)&dst[12+ y*stride]= color4;
2406 for(y=0; y < b_h; y++){
2407 *(uint32_t*)&dst[0 + y*stride]= color4;
2408 *(uint32_t*)&dst[4 + y*stride]= color4;
2411 for(y=0; y < b_h; y++){
2412 *(uint32_t*)&dst[0 + y*stride]= color4;
2415 for(y=0; y < b_h; y++){
2416 for(x=0; x < b_w; x++){
2417 dst[x + y*stride]= color;
2422 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2423 int mx= block->mx*scale;
2424 int my= block->my*scale;
2425 const int dx= mx&15;
2426 const int dy= my&15;
2427 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2430 src += sx + sy*stride;
2431 if( (unsigned)sx >= w - b_w - 4
2432 || (unsigned)sy >= h - b_h - 4){
2433 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2436 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2437 // assert(!(b_w&(b_w-1)));
2438 assert(b_w>1 && b_h>1);
2439 assert(tab_index>=0 && tab_index<4 || b_w==32);
2440 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2441 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2444 for(y=0; y<b_h; y+=16){
2445 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2446 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2449 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2450 else if(b_w==2*b_h){
2451 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2452 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2455 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2456 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2461 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2462 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2465 for(y=0; y<b_h; y++){
2466 //FIXME ugly missue of obmc_stride
2467 uint8_t *obmc1= obmc + y*obmc_stride;
2468 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2469 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2470 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2471 dst = slice_buffer_get_line(sb, src_y + y);
2472 for(x=0; x<b_w; x++){
2473 int v= obmc1[x] * block[3][x + y*src_stride]
2474 +obmc2[x] * block[2][x + y*src_stride]
2475 +obmc3[x] * block[1][x + y*src_stride]
2476 +obmc4[x] * block[0][x + y*src_stride];
2478 v <<= 8 - LOG2_OBMC_MAX;
2480 v += 1<<(7 - FRAC_BITS);
2481 v >>= 8 - FRAC_BITS;
2484 v += dst[x + src_x];
2485 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2486 if(v&(~255)) v= ~(v>>31);
2487 dst8[x + y*src_stride] = v;
2489 dst[x + src_x] -= v;
2495 //FIXME name clenup (b_w, block_w, b_width stuff)
2496 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2497 DWTELEM * dst = NULL;
2498 const int b_width = s->b_width << s->block_max_depth;
2499 const int b_height= s->b_height << s->block_max_depth;
2500 const int b_stride= b_width;
2501 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2502 BlockNode *rt= lt+1;
2503 BlockNode *lb= lt+b_stride;
2504 BlockNode *rb= lb+1;
2506 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2507 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2514 }else if(b_x + 1 >= b_width){
2521 }else if(b_y + 1 >= b_height){
2526 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2530 }else if(src_x + b_w > w){
2534 obmc -= src_y*obmc_stride;
2537 }else if(src_y + b_h> h){
2541 if(b_w<=0 || b_h<=0) return;
2543 assert(src_stride > 2*MB_SIZE + 5);
2544 // old_dst += src_x + src_y*dst_stride;
2545 dst8+= src_x + src_y*src_stride;
2546 // src += src_x + src_y*src_stride;
2548 ptmp= tmp + 3*tmp_step;
2551 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2553 if(same_block(lt, rt)){
2558 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2561 if(same_block(lt, lb)){
2563 }else if(same_block(rt, lb)){
2568 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2571 if(same_block(lt, rb) ){
2573 }else if(same_block(rt, rb)){
2575 }else if(same_block(lb, rb)){
2579 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2582 for(y=0; y<b_h; y++){
2583 for(x=0; x<b_w; x++){
2584 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2585 if(add) dst[x + y*dst_stride] += v;
2586 else dst[x + y*dst_stride] -= v;
2589 for(y=0; y<b_h; y++){
2590 uint8_t *obmc2= obmc + (obmc_stride>>1);
2591 for(x=0; x<b_w; x++){
2592 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2593 if(add) dst[x + y*dst_stride] += v;
2594 else dst[x + y*dst_stride] -= v;
2597 for(y=0; y<b_h; y++){
2598 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2599 for(x=0; x<b_w; x++){
2600 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2601 if(add) dst[x + y*dst_stride] += v;
2602 else dst[x + y*dst_stride] -= v;
2605 for(y=0; y<b_h; y++){
2606 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2607 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2608 for(x=0; x<b_w; x++){
2609 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2610 if(add) dst[x + y*dst_stride] += v;
2611 else dst[x + y*dst_stride] -= v;
2619 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2620 STOP_TIMER("Inner add y block")
2625 //FIXME name clenup (b_w, block_w, b_width stuff)
2626 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2627 const int b_width = s->b_width << s->block_max_depth;
2628 const int b_height= s->b_height << s->block_max_depth;
2629 const int b_stride= b_width;
2630 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2631 BlockNode *rt= lt+1;
2632 BlockNode *lb= lt+b_stride;
2633 BlockNode *rb= lb+1;
2635 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2636 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2643 }else if(b_x + 1 >= b_width){
2650 }else if(b_y + 1 >= b_height){
2655 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2661 }else if(src_x + b_w > w){
2665 obmc -= src_y*obmc_stride;
2668 dst -= src_y*dst_stride;
2670 }else if(src_y + b_h> h){
2674 if(b_w<=0 || b_h<=0) return;
2676 assert(src_stride > 2*MB_SIZE + 5);
2678 dst += src_x + src_y*dst_stride;
2679 dst8+= src_x + src_y*src_stride;
2680 // src += src_x + src_y*src_stride;
2682 ptmp= tmp + 3*tmp_step;
2685 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2687 if(same_block(lt, rt)){
2692 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2695 if(same_block(lt, lb)){
2697 }else if(same_block(rt, lb)){
2702 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2705 if(same_block(lt, rb) ){
2707 }else if(same_block(rt, rb)){
2709 }else if(same_block(lb, rb)){
2713 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2716 for(y=0; y<b_h; y++){
2717 for(x=0; x<b_w; x++){
2718 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2719 if(add) dst[x + y*dst_stride] += v;
2720 else dst[x + y*dst_stride] -= v;
2723 for(y=0; y<b_h; y++){
2724 uint8_t *obmc2= obmc + (obmc_stride>>1);
2725 for(x=0; x<b_w; x++){
2726 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2727 if(add) dst[x + y*dst_stride] += v;
2728 else dst[x + y*dst_stride] -= v;
2731 for(y=0; y<b_h; y++){
2732 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2733 for(x=0; x<b_w; x++){
2734 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2735 if(add) dst[x + y*dst_stride] += v;
2736 else dst[x + y*dst_stride] -= v;
2739 for(y=0; y<b_h; y++){
2740 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2741 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2742 for(x=0; x<b_w; x++){
2743 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2744 if(add) dst[x + y*dst_stride] += v;
2745 else dst[x + y*dst_stride] -= v;
2749 for(y=0; y<b_h; y++){
2750 //FIXME ugly missue of obmc_stride
2751 uint8_t *obmc1= obmc + y*obmc_stride;
2752 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2753 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2754 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2755 for(x=0; x<b_w; x++){
2756 int v= obmc1[x] * block[3][x + y*src_stride]
2757 +obmc2[x] * block[2][x + y*src_stride]
2758 +obmc3[x] * block[1][x + y*src_stride]
2759 +obmc4[x] * block[0][x + y*src_stride];
2761 v <<= 8 - LOG2_OBMC_MAX;
2763 v += 1<<(7 - FRAC_BITS);
2764 v >>= 8 - FRAC_BITS;
2767 v += dst[x + y*dst_stride];
2768 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2769 if(v&(~255)) v= ~(v>>31);
2770 dst8[x + y*src_stride] = v;
2772 dst[x + y*dst_stride] -= v;
2779 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2780 Plane *p= &s->plane[plane_index];
2781 const int mb_w= s->b_width << s->block_max_depth;
2782 const int mb_h= s->b_height << s->block_max_depth;
2784 int block_size = MB_SIZE >> s->block_max_depth;
2785 int block_w = plane_index ? block_size/2 : block_size;
2786 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2787 int obmc_stride= plane_index ? block_size : 2*block_size;
2788 int ref_stride= s->current_picture.linesize[plane_index];
2789 uint8_t *ref = s->last_picture.data[plane_index];
2790 uint8_t *dst8= s->current_picture.data[plane_index];
2795 if(s->keyframe || (s->avctx->debug&512)){
2800 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2802 // DWTELEM * line = slice_buffer_get_line(sb, y);
2803 DWTELEM * line = sb->line[y];
2806 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2807 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2809 if(v&(~255)) v= ~(v>>31);
2810 dst8[x + y*ref_stride]= v;
2814 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2816 // DWTELEM * line = slice_buffer_get_line(sb, y);
2817 DWTELEM * line = sb->line[y];
2820 line[x] -= 128 << FRAC_BITS;
2821 // buf[x + y*w]-= 128<<FRAC_BITS;
2829 for(mb_x=0; mb_x<=mb_w; mb_x++){
2832 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2833 block_w*mb_x - block_w/2,
2834 block_w*mb_y - block_w/2,
2837 w, ref_stride, obmc_stride,
2841 STOP_TIMER("add_yblock")
2844 STOP_TIMER("predict_slice")
2847 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2848 Plane *p= &s->plane[plane_index];
2849 const int mb_w= s->b_width << s->block_max_depth;
2850 const int mb_h= s->b_height << s->block_max_depth;
2852 int block_size = MB_SIZE >> s->block_max_depth;
2853 int block_w = plane_index ? block_size/2 : block_size;
2854 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
2856 int ref_stride= s->current_picture.linesize[plane_index];
2857 uint8_t *ref = s->last_picture.data[plane_index];
2858 uint8_t *dst8= s->current_picture.data[plane_index];
2863 if(s->keyframe || (s->avctx->debug&512)){
2868 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2870 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2872 if(v&(~255)) v= ~(v>>31);
2873 dst8[x + y*ref_stride]= v;
2877 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2879 buf[x + y*w]-= 128<<FRAC_BITS;
2887 for(mb_x=0; mb_x<=mb_w; mb_x++){
2890 add_yblock(s, buf, dst8, ref, obmc,
2891 block_w*mb_x - block_w/2,
2892 block_w*mb_y - block_w/2,
2895 w, ref_stride, obmc_stride,
2897 add, 1, plane_index);
2899 STOP_TIMER("add_yblock")
2902 STOP_TIMER("predict_slice")
2905 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2906 const int mb_h= s->b_height << s->block_max_depth;
2908 for(mb_y=0; mb_y<=mb_h; mb_y++)
2909 predict_slice(s, buf, plane_index, add, mb_y);
2912 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2914 Plane *p= &s->plane[plane_index];
2915 const int block_size = MB_SIZE >> s->block_max_depth;
2916 const int block_w = plane_index ? block_size/2 : block_size;
2917 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2918 const int obmc_stride= plane_index ? block_size : 2*block_size;
2919 const int ref_stride= s->current_picture.linesize[plane_index];
2920 uint8_t *ref= s-> last_picture.data[plane_index];
2921 uint8_t *src= s-> input_picture.data[plane_index];
2922 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2923 const int b_stride = s->b_width << s->block_max_depth;
2924 const int w= p->width;
2925 const int h= p->height;
2926 int index= mb_x + mb_y*b_stride;
2927 BlockNode *b= &s->block[index];
2928 BlockNode backup= *b;
2932 b->type|= BLOCK_INTRA;
2933 b->color[plane_index]= 0;
2934 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2937 int mb_x2= mb_x + (i &1) - 1;
2938 int mb_y2= mb_y + (i>>1) - 1;
2939 int x= block_w*mb_x2 + block_w/2;
2940 int y= block_w*mb_y2 + block_w/2;
2942 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
2943 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2945 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2946 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2947 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2948 int obmc_v= obmc[index];
2950 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2951 if(x<0) obmc_v += obmc[index + block_w];
2952 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2953 if(x+block_w>w) obmc_v += obmc[index - block_w];
2954 //FIXME precalc this or simplify it somehow else
2956 d = -dst[index] + (1<<(FRAC_BITS-1));
2958 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2959 aa += obmc_v * obmc_v; //FIXME precalclate this
2965 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2968 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2969 const int b_stride = s->b_width << s->block_max_depth;
2970 const int b_height = s->b_height<< s->block_max_depth;
2971 int index= x + y*b_stride;
2972 BlockNode *b = &s->block[index];
2973 BlockNode *left = x ? &s->block[index-1] : &null_block;
2974 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2975 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2976 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2978 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
2979 // int my_context= av_log2(2*ABS(left->my - top->my));
2981 if(x<0 || x>=b_stride || y>=b_height)
2983 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
2984 dmy= b->my - mid_pred(left->my, top->my, tr->my);
2990 00001XXXX 15-30 8-15
2992 //FIXME try accurate rate
2993 //FIXME intra and inter predictors if surrounding blocks arent the same type
2994 if(b->type & BLOCK_INTRA){
2995 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
2996 + av_log2(2*ABS(left->color[1] - b->color[1]))
2997 + av_log2(2*ABS(left->color[2] - b->color[2])));
2999 return 2*(1 + av_log2(2*ABS(dmx))
3000 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3003 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3004 Plane *p= &s->plane[plane_index];
3005 const int block_size = MB_SIZE >> s->block_max_depth;
3006 const int block_w = plane_index ? block_size/2 : block_size;
3007 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3008 const int obmc_stride= plane_index ? block_size : 2*block_size;
3009 const int ref_stride= s->current_picture.linesize[plane_index];
3010 uint8_t *ref= s-> last_picture.data[plane_index];
3011 uint8_t *dst= s->current_picture.data[plane_index];
3012 uint8_t *src= s-> input_picture.data[plane_index];
3013 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3014 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3015 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3016 const int b_stride = s->b_width << s->block_max_depth;
3017 const int b_height = s->b_height<< s->block_max_depth;
3018 const int w= p->width;
3019 const int h= p->height;
3022 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3023 int sx= block_w*mb_x - block_w/2;
3024 int sy= block_w*mb_y - block_w/2;
3025 int x0= FFMAX(0,-sx);
3026 int y0= FFMAX(0,-sy);
3027 int x1= FFMIN(block_w*2, w-sx);
3028 int y1= FFMIN(block_w*2, h-sy);
3031 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3033 for(y=y0; y<y1; y++){
3034 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3035 const DWTELEM *pred1 = pred + y*obmc_stride;
3036 uint8_t *cur1 = cur + y*ref_stride;
3037 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3038 for(x=x0; x<x1; x++){
3039 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3040 v = (v + pred1[x]) >> FRAC_BITS;
3041 if(v&(~255)) v= ~(v>>31);
3046 /* copy the regions where obmc[] = (uint8_t)256 */
3047 if(LOG2_OBMC_MAX == 8
3048 && (mb_x == 0 || mb_x == b_stride-1)
3049 && (mb_y == 0 || mb_y == b_height-1)){
3058 for(y=y0; y<y1; y++)
3059 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3062 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3066 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3067 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3071 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3080 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3082 if(mb_x == b_stride-2)
3083 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3085 return distortion + rate*penalty_factor;
3088 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3090 Plane *p= &s->plane[plane_index];
3091 const int block_size = MB_SIZE >> s->block_max_depth;
3092 const int block_w = plane_index ? block_size/2 : block_size;
3093 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3094 const int obmc_stride= plane_index ? block_size : 2*block_size;
3095 const int ref_stride= s->current_picture.linesize[plane_index];
3096 uint8_t *ref= s-> last_picture.data[plane_index];
3097 uint8_t *dst= s->current_picture.data[plane_index];
3098 uint8_t *src= s-> input_picture.data[plane_index];
3099 const static DWTELEM zero_dst[4096]; //FIXME
3100 const int b_stride = s->b_width << s->block_max_depth;
3101 const int b_height = s->b_height<< s->block_max_depth;
3102 const int w= p->width;
3103 const int h= p->height;
3106 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3109 int mb_x2= mb_x + (i%3) - 1;
3110 int mb_y2= mb_y + (i/3) - 1;
3111 int x= block_w*mb_x2 + block_w/2;
3112 int y= block_w*mb_y2 + block_w/2;
3114 add_yblock(s, zero_dst, dst, ref, obmc,
3115 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3117 //FIXME find a cleaner/simpler way to skip the outside stuff
3118 for(y2= y; y2<0; y2++)
3119 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3120 for(y2= h; y2<y+block_w; y2++)
3121 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3123 for(y2= y; y2<y+block_w; y2++)
3124 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3127 for(y2= y; y2<y+block_w; y2++)
3128 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3131 assert(block_w== 8 || block_w==16);
3132 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3136 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3137 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3145 rate = get_block_bits(s, mb_x, mb_y, 2);
3146 for(i=merged?4:0; i<9; i++){
3147 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3148 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3151 return distortion + rate*penalty_factor;
3154 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3155 const int b_stride= s->b_width << s->block_max_depth;
3156 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3157 BlockNode backup= *block;
3158 int rd, index, value;
3160 assert(mb_x>=0 && mb_y>=0);
3161 assert(mb_x<b_stride);
3164 block->color[0] = p[0];
3165 block->color[1] = p[1];
3166 block->color[2] = p[2];
3167 block->type |= BLOCK_INTRA;
3169 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3170 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3171 if(s->me_cache[index] == value)
3173 s->me_cache[index]= value;
3177 block->type &= ~BLOCK_INTRA;
3180 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3192 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3193 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3194 int p[2] = {p0, p1};
3195 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3198 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){
3199 const int b_stride= s->b_width << s->block_max_depth;
3200 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3201 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3202 int rd, index, value;
3204 assert(mb_x>=0 && mb_y>=0);
3205 assert(mb_x<b_stride);
3206 assert(((mb_x|mb_y)&1) == 0);
3208 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3209 value= s->me_cache_generation + (p0>>10) + (p1<<6);
3210 if(s->me_cache[index] == value)
3212 s->me_cache[index]= value;
3216 block->type &= ~BLOCK_INTRA;
3217 block[1]= block[b_stride]= block[b_stride+1]= *block;
3219 rd= get_4block_rd(s, mb_x, mb_y, 0);
3226 block[0]= backup[0];
3227 block[1]= backup[1];
3228 block[b_stride]= backup[2];
3229 block[b_stride+1]= backup[3];
3234 static void iterative_me(SnowContext *s){
3235 int pass, mb_x, mb_y;
3236 const int b_width = s->b_width << s->block_max_depth;
3237 const int b_height= s->b_height << s->block_max_depth;
3238 const int b_stride= b_width;
3242 RangeCoder r = s->c;
3243 uint8_t state[sizeof(s->block_state)];
3244 memcpy(state, s->block_state, sizeof(s->block_state));
3245 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3246 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3247 encode_q_branch(s, 0, mb_x, mb_y);
3249 memcpy(s->block_state, state, sizeof(s->block_state));
3252 for(pass=0; pass<50; pass++){
3255 for(mb_y= 0; mb_y<b_height; mb_y++){
3256 for(mb_x= 0; mb_x<b_width; mb_x++){
3257 int dia_change, i, j;
3258 int best_rd= INT_MAX;
3260 const int index= mb_x + mb_y * b_stride;
3261 BlockNode *block= &s->block[index];
3262 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3263 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3264 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block;
3265 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block;
3266 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3267 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3268 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3269 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
3270 const int b_w= (MB_SIZE >> s->block_max_depth);
3271 uint8_t obmc_edged[b_w*2][b_w*2];
3273 if(pass && (block->type & BLOCK_OPT))
3275 block->type |= BLOCK_OPT;
3279 if(!s->me_cache_generation)
3280 memset(s->me_cache, 0, sizeof(s->me_cache));
3281 s->me_cache_generation += 1<<22;
3286 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3288 for(y=0; y<b_w*2; y++)
3289 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3290 if(mb_x==b_stride-1)
3291 for(y=0; y<b_w*2; y++)
3292 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3294 for(x=0; x<b_w*2; x++)
3295 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3296 for(y=1; y<b_w; y++)
3297 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3299 if(mb_y==b_height-1){
3300 for(x=0; x<b_w*2; x++)
3301 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3302 for(y=b_w; y<b_w*2-1; y++)
3303 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3307 //skip stuff outside the picture
3308 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3310 uint8_t *src= s-> input_picture.data[0];
3311 uint8_t *dst= s->current_picture.data[0];
3312 const int stride= s->current_picture.linesize[0];
3313 const int block_w= MB_SIZE >> s->block_max_depth;
3314 const int sx= block_w*mb_x - block_w/2;
3315 const int sy= block_w*mb_y - block_w/2;
3316 const int w= s->plane[0].width;
3317 const int h= s->plane[0].height;
3321 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3322 for(y=h; y<sy+block_w*2; y++)
3323 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3325 for(y=sy; y<sy+block_w*2; y++)
3326 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3328 if(sx+block_w*2 > w){
3329 for(y=sy; y<sy+block_w*2; y++)
3330 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3334 // intra(black) = neighbors' contribution to the current block
3336 color[i]= get_dc(s, mb_x, mb_y, i);
3338 // get previous score (cant be cached due to OBMC)
3339 if(pass > 0 && (block->type&BLOCK_INTRA)){
3340 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3341 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3343 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3345 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3346 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, *obmc_edged, &best_rd);
3347 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, *obmc_edged, &best_rd);
3348 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, *obmc_edged, &best_rd);
3349 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, *obmc_edged, &best_rd);
3352 //FIXME avoid subpel interpol / round to nearest integer
3355 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3357 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3358 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3359 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3360 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3366 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3369 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3371 //FIXME or try the standard 2 pass qpel or similar
3373 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3374 //FIXME RD style color selection
3376 if(!same_block(block, &backup)){
3377 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3378 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3379 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3380 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3381 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3382 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3383 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3384 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3389 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3394 if(s->block_max_depth == 1){
3396 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3397 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3398 int dia_change, i, j;
3399 int best_rd, init_rd;
3400 const int index= mb_x + mb_y * b_stride;
3403 b[0]= &s->block[index];
3405 b[2]= b[0]+b_stride;
3407 if(same_block(b[0], b[1]) &&
3408 same_block(b[0], b[2]) &&
3409 same_block(b[0], b[3]))
3412 if(!s->me_cache_generation)
3413 memset(s->me_cache, 0, sizeof(s->me_cache));
3414 s->me_cache_generation += 1<<22;
3416 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3418 check_4block_inter(s, mb_x, mb_y,
3419 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3420 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd);
3423 if(!(b[i]->type&BLOCK_INTRA))
3424 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd);
3426 if(init_rd != best_rd)
3430 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3434 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3435 const int level= b->level;
3436 const int w= b->width;
3437 const int h= b->height;
3438 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3439 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3440 int x,y, thres1, thres2;
3443 if(s->qlog == LOSSLESS_QLOG) return;
3445 bias= bias ? 0 : (3*qmul)>>3;
3446 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3452 int i= src[x + y*stride];
3454 if((unsigned)(i+thres1) > thres2){
3457 i/= qmul; //FIXME optimize
3458 src[x + y*stride]= i;
3462 i/= qmul; //FIXME optimize
3463 src[x + y*stride]= -i;
3466 src[x + y*stride]= 0;
3472 int i= src[x + y*stride];
3474 if((unsigned)(i+thres1) > thres2){
3477 i= (i + bias) / qmul; //FIXME optimize
3478 src[x + y*stride]= i;
3482 i= (i + bias) / qmul; //FIXME optimize
3483 src[x + y*stride]= -i;
3486 src[x + y*stride]= 0;
3490 if(level+1 == s->spatial_decomposition_count){
3491 // STOP_TIMER("quantize")
3495 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3496 const int w= b->width;
3497 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3498 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3499 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3503 if(s->qlog == LOSSLESS_QLOG) return;
3505 for(y=start_y; y<end_y; y++){
3506 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3507 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3511 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3513 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3517 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3518 STOP_TIMER("dquant")
3522 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3523 const int w= b->width;
3524 const int h= b->height;
3525 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3526 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3527 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3531 if(s->qlog == LOSSLESS_QLOG) return;
3535 int i= src[x + y*stride];
3537 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3539 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3543 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3544 STOP_TIMER("dquant")
3548 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3549 const int w= b->width;
3550 const int h= b->height;
3553 for(y=h-1; y>=0; y--){
3554 for(x=w-1; x>=0; x--){
3555 int i= x + y*stride;
3559 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3560 else src[i] -= src[i - 1];
3562 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3563 else src[i] -= src[i - 1];
3566 if(y) src[i] -= src[i - stride];
3572 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3573 const int w= b->width;
3582 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3584 for(y=start_y; y<end_y; y++){
3586 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3587 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3591 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3592 else line[x] += line[x - 1];
3594 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3595 else line[x] += line[x - 1];
3598 if(y) line[x] += prev[x];
3603 // STOP_TIMER("correlate")
3606 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3607 const int w= b->width;
3608 const int h= b->height;
3613 int i= x + y*stride;
3617 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3618 else src[i] += src[i - 1];
3620 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3621 else src[i] += src[i - 1];
3624 if(y) src[i] += src[i - stride];
3630 static void encode_header(SnowContext *s){
3631 int plane_index, level, orientation;
3634 memset(kstate, MID_STATE, sizeof(kstate));
3636 put_rac(&s->c, kstate, s->keyframe);
3637 if(s->keyframe || s->always_reset)
3640 put_symbol(&s->c, s->header_state, s->version, 0);
3641 put_rac(&s->c, s->header_state, s->always_reset);
3642 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3643 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3644 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3645 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3646 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3647 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3648 put_rac(&s->c, s->header_state, s->spatial_scalability);
3649 // put_rac(&s->c, s->header_state, s->rate_scalability);
3651 for(plane_index=0; plane_index<2; plane_index++){
3652 for(level=0; level<s->spatial_decomposition_count; level++){
3653 for(orientation=level ? 1:0; orientation<4; orientation++){
3654 if(orientation==2) continue;
3655 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3660 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3661 put_symbol(&s->c, s->header_state, s->qlog, 1);
3662 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3663 put_symbol(&s->c, s->header_state, s->qbias, 1);
3664 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3667 static int decode_header(SnowContext *s){
3668 int plane_index, level, orientation;
3671 memset(kstate, MID_STATE, sizeof(kstate));
3673 s->keyframe= get_rac(&s->c, kstate);
3674 if(s->keyframe || s->always_reset)
3677 s->version= get_symbol(&s->c, s->header_state, 0);
3679 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3682 s->always_reset= get_rac(&s->c, s->header_state);
3683 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3684 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3685 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3686 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3687 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3688 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3689 s->spatial_scalability= get_rac(&s->c, s->header_state);
3690 // s->rate_scalability= get_rac(&s->c, s->header_state);
3692 for(plane_index=0; plane_index<3; plane_index++){
3693 for(level=0; level<s->spatial_decomposition_count; level++){
3694 for(orientation=level ? 1:0; orientation<4; orientation++){
3696 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3697 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3698 else q= get_symbol(&s->c, s->header_state, 1);
3699 s->plane[plane_index].band[level][orientation].qlog= q;
3705 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3706 if(s->spatial_decomposition_type > 2){
3707 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3711 s->qlog= get_symbol(&s->c, s->header_state, 1);
3712 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3713 s->qbias= get_symbol(&s->c, s->header_state, 1);
3714 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3715 if(s->block_max_depth > 1){
3716 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3717 s->block_max_depth= 0;
3724 static void init_qexp(void){
3728 for(i=0; i<QROOT; i++){
3730 v *= pow(2, 1.0 / QROOT);
3734 static int common_init(AVCodecContext *avctx){
3735 SnowContext *s = avctx->priv_data;
3737 int level, orientation, plane_index, dec;
3741 dsputil_init(&s->dsp, avctx);
3744 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3745 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3746 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3747 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3748 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3749 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3768 #define mcfh(dx,dy)\
3769 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3770 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3771 mc_block_hpel ## dx ## dy ## 16;\
3772 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3773 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3774 mc_block_hpel ## dx ## dy ## 8;
3784 dec= s->spatial_decomposition_count= 5;
3785 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3787 s->chroma_h_shift= 1; //FIXME XXX
3788 s->chroma_v_shift= 1;
3790 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3792 width= s->avctx->width;
3793 height= s->avctx->height;
3795 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3797 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3798 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3800 for(plane_index=0; plane_index<3; plane_index++){
3801 int w= s->avctx->width;
3802 int h= s->avctx->height;
3805 w>>= s->chroma_h_shift;
3806 h>>= s->chroma_v_shift;
3808 s->plane[plane_index].width = w;
3809 s->plane[plane_index].height= h;
3810 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3811 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3812 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3813 SubBand *b= &s->plane[plane_index].band[level][orientation];
3815 b->buf= s->spatial_dwt_buffer;
3817 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3818 b->width = (w + !(orientation&1))>>1;
3819 b->height= (h + !(orientation>1))>>1;
3821 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3822 b->buf_x_offset = 0;
3823 b->buf_y_offset = 0;
3827 b->buf_x_offset = (w+1)>>1;
3830 b->buf += b->stride>>1;
3831 b->buf_y_offset = b->stride_line >> 1;
3835 b->parent= &s->plane[plane_index].band[level-1][orientation];
3836 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3845 width= s->width= avctx->width;
3846 height= s->height= avctx->height;
3848 assert(width && height);
3850 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3856 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3857 int width = p->width;
3858 int height= p->height;
3859 int level, orientation, x, y;
3861 for(level=0; level<s->spatial_decomposition_count; level++){
3862 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3863 SubBand *b= &p->band[level][orientation];
3864 DWTELEM *buf= b->buf;
3867 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3868 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3869 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3870 for(y=0; y<height; y++){
3871 for(x=0; x<width; x++){
3872 int64_t d= s->spatial_dwt_buffer[x + y*width];
3877 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3878 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3883 static int encode_init(AVCodecContext *avctx)
3885 SnowContext *s = avctx->priv_data;
3888 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3889 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3890 "use vstrict=-2 / -strict -2 to use it anyway\n");
3900 s->m.flags = avctx->flags;
3901 s->m.bit_rate= avctx->bit_rate;
3903 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3904 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3905 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3906 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3907 h263_encode_init(&s->m); //mv_penalty
3909 if(avctx->flags&CODEC_FLAG_PASS1){
3910 if(!avctx->stats_out)
3911 avctx->stats_out = av_mallocz(256);
3913 if(avctx->flags&CODEC_FLAG_PASS2){
3914 if(ff_rate_control_init(&s->m) < 0)
3918 for(plane_index=0; plane_index<3; plane_index++){
3919 calculate_vissual_weight(s, &s->plane[plane_index]);
3923 avctx->coded_frame= &s->current_picture;
3924 switch(avctx->pix_fmt){
3925 // case PIX_FMT_YUV444P:
3926 // case PIX_FMT_YUV422P:
3927 case PIX_FMT_YUV420P:
3929 // case PIX_FMT_YUV411P:
3930 // case PIX_FMT_YUV410P:
3931 s->colorspace_type= 0;
3933 /* case PIX_FMT_RGBA32:
3937 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3940 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3941 s->chroma_h_shift= 1;
3942 s->chroma_v_shift= 1;
3944 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3945 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3947 s->avctx->get_buffer(s->avctx, &s->input_picture);
3952 static int frame_start(SnowContext *s){
3954 int w= s->avctx->width; //FIXME round up to x16 ?
3955 int h= s->avctx->height;
3957 if(s->current_picture.data[0]){
3958 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3959 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3960 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3963 tmp= s->last_picture;
3964 s->last_picture= s->current_picture;
3965 s->current_picture= tmp;
3967 s->current_picture.reference= 1;
3968 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3969 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3976 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3977 SnowContext *s = avctx->priv_data;
3978 RangeCoder * const c= &s->c;
3979 AVFrame *pict = data;
3980 const int width= s->avctx->width;
3981 const int height= s->avctx->height;
3982 int level, orientation, plane_index, i, y;
3984 ff_init_range_encoder(c, buf, buf_size);
3985 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3989 for(y=0; y<(height>>shift); y++)
3990 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3991 &pict->data[i][y * pict->linesize[i]],
3994 s->new_picture = *pict;
3996 if(avctx->flags&CODEC_FLAG_PASS2){
3998 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3999 s->keyframe= pict->pict_type==FF_I_TYPE;
4000 s->m.picture_number= avctx->frame_number;
4001 if(!(avctx->flags&CODEC_FLAG_QSCALE))
4002 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4004 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4005 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4009 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
4011 s->qlog += 61*QROOT/8;
4013 s->qlog= LOSSLESS_QLOG;
4017 s->current_picture.key_frame= s->keyframe;
4019 s->m.current_picture_ptr= &s->m.current_picture;
4020 if(pict->pict_type == P_TYPE){
4021 int block_width = (width +15)>>4;
4022 int block_height= (height+15)>>4;
4023 int stride= s->current_picture.linesize[0];
4025 assert(s->current_picture.data[0]);
4026 assert(s->last_picture.data[0]);
4028 s->m.avctx= s->avctx;
4029 s->m.current_picture.data[0]= s->current_picture.data[0];
4030 s->m. last_picture.data[0]= s-> last_picture.data[0];
4031 s->m. new_picture.data[0]= s-> input_picture.data[0];
4032 s->m. last_picture_ptr= &s->m. last_picture;
4034 s->m. last_picture.linesize[0]=
4035 s->m. new_picture.linesize[0]=
4036 s->m.current_picture.linesize[0]= stride;
4037 s->m.uvlinesize= s->current_picture.linesize[1];
4039 s->m.height= height;
4040 s->m.mb_width = block_width;
4041 s->m.mb_height= block_height;
4042 s->m.mb_stride= s->m.mb_width+1;
4043 s->m.b8_stride= 2*s->m.mb_width+1;
4045 s->m.pict_type= pict->pict_type;
4046 s->m.me_method= s->avctx->me_method;
4047 s->m.me.scene_change_score=0;
4048 s->m.flags= s->avctx->flags;
4049 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4050 s->m.out_format= FMT_H263;
4051 s->m.unrestricted_mv= 1;
4053 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
4054 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4055 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4057 s->m.dsp= s->dsp; //move
4064 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4067 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4069 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4071 for(plane_index=0; plane_index<3; plane_index++){
4072 Plane *p= &s->plane[plane_index];
4076 // int bits= put_bits_count(&s->c.pb);
4079 if(pict->data[plane_index]) //FIXME gray hack
4082 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4085 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4088 && pict->pict_type == P_TYPE
4089 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4090 ff_init_range_encoder(c, buf, buf_size);
4091 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4092 pict->pict_type= FF_I_TYPE;
4098 if(s->qlog == LOSSLESS_QLOG){
4101 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4106 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4108 for(level=0; level<s->spatial_decomposition_count; level++){
4109 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4110 SubBand *b= &p->band[level][orientation];
4112 quantize(s, b, b->buf, b->stride, s->qbias);
4114 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4115 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4116 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4118 correlate(s, b, b->buf, b->stride, 1, 0);
4121 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4123 for(level=0; level<s->spatial_decomposition_count; level++){
4124 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4125 SubBand *b= &p->band[level][orientation];
4127 dequantize(s, b, b->buf, b->stride);
4131 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4132 if(s->qlog == LOSSLESS_QLOG){
4135 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4140 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4141 STOP_TIMER("pred-conv")}
4142 if(s->avctx->flags&CODEC_FLAG_PSNR){
4145 if(pict->data[plane_index]) //FIXME gray hack
4148 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4152 s->avctx->error[plane_index] += error;
4153 s->current_picture.error[plane_index] = error;
4157 if(s->last_picture.data[0])
4158 avctx->release_buffer(avctx, &s->last_picture);
4160 s->current_picture.coded_picture_number = avctx->frame_number;
4161 s->current_picture.pict_type = pict->pict_type;
4162 s->current_picture.quality = pict->quality;
4163 if(avctx->flags&CODEC_FLAG_PASS1){
4164 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
4165 s->m.current_picture.display_picture_number =
4166 s->m.current_picture.coded_picture_number = avctx->frame_number;
4167 s->m.pict_type = pict->pict_type;
4168 s->m.current_picture.quality = pict->quality;
4169 ff_write_pass1_stats(&s->m);
4171 if(avctx->flags&CODEC_FLAG_PASS2){
4172 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4177 return ff_rac_terminate(c);
4180 static void common_end(SnowContext *s){
4181 int plane_index, level, orientation;
4183 av_freep(&s->spatial_dwt_buffer);
4185 av_freep(&s->m.me.scratchpad);
4186 av_freep(&s->m.me.map);
4187 av_freep(&s->m.me.score_map);
4188 av_freep(&s->m.obmc_scratchpad);
4190 av_freep(&s->block);
4192 for(plane_index=0; plane_index<3; plane_index++){
4193 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4194 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4195 SubBand *b= &s->plane[plane_index].band[level][orientation];
4197 av_freep(&b->x_coeff);
4203 static int encode_end(AVCodecContext *avctx)
4205 SnowContext *s = avctx->priv_data;
4208 av_free(avctx->stats_out);
4213 static int decode_init(AVCodecContext *avctx)
4215 SnowContext *s = avctx->priv_data;
4218 avctx->pix_fmt= PIX_FMT_YUV420P;
4222 block_size = MB_SIZE >> s->block_max_depth;
4223 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4228 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4229 SnowContext *s = avctx->priv_data;
4230 RangeCoder * const c= &s->c;
4232 AVFrame *picture = data;
4233 int level, orientation, plane_index;
4235 ff_init_range_decoder(c, buf, buf_size);
4236 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4238 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4240 if(!s->block) alloc_blocks(s);
4243 //keyframe flag dupliaction mess FIXME
4244 if(avctx->debug&FF_DEBUG_PICT_INFO)
4245 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4249 for(plane_index=0; plane_index<3; plane_index++){
4250 Plane *p= &s->plane[plane_index];
4254 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4256 if(s->avctx->debug&2048){
4257 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4258 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4262 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4263 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4269 for(level=0; level<s->spatial_decomposition_count; level++){
4270 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4271 SubBand *b= &p->band[level][orientation];
4272 unpack_coeffs(s, b, b->parent, orientation);
4275 STOP_TIMER("unpack coeffs");
4279 const int mb_h= s->b_height << s->block_max_depth;
4280 const int block_size = MB_SIZE >> s->block_max_depth;
4281 const int block_w = plane_index ? block_size/2 : block_size;
4283 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4288 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4289 for(mb_y=0; mb_y<=mb_h; mb_y++){
4291 int slice_starty = block_w*mb_y;
4292 int slice_h = block_w*(mb_y+1);
4293 if (!(s->keyframe || s->avctx->debug&512)){
4294 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4295 slice_h -= (block_w >> 1);
4300 for(level=0; level<s->spatial_decomposition_count; level++){
4301 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4302 SubBand *b= &p->band[level][orientation];
4305 int our_mb_start = mb_y;
4306 int our_mb_end = (mb_y + 1);
4308 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4309 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4310 if (!(s->keyframe || s->avctx->debug&512)){
4311 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4312 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4314 start_y = FFMIN(b->height, start_y);
4315 end_y = FFMIN(b->height, end_y);
4317 if (start_y != end_y){
4318 if (orientation == 0){
4319 SubBand * correlate_band = &p->band[0][0];
4320 int correlate_end_y = FFMIN(b->height, end_y + 1);
4321 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4322 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4323 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4324 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4327 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4331 STOP_TIMER("decode_subband_slice");
4335 for(; yd<slice_h; yd+=4){
4336 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4338 STOP_TIMER("idwt slice");}
4341 if(s->qlog == LOSSLESS_QLOG){
4342 for(; yq<slice_h && yq<h; yq++){
4343 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4345 line[x] <<= FRAC_BITS;
4350 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4352 y = FFMIN(p->height, slice_starty);
4353 end_y = FFMIN(p->height, slice_h);
4355 slice_buffer_release(&s->sb, y++);
4358 slice_buffer_flush(&s->sb);
4360 STOP_TIMER("idwt + predict_slices")}
4365 if(s->last_picture.data[0])
4366 avctx->release_buffer(avctx, &s->last_picture);
4368 if(!(s->avctx->debug&2048))
4369 *picture= s->current_picture;
4371 *picture= s->mconly_picture;
4373 *data_size = sizeof(AVFrame);
4375 bytes_read= c->bytestream - c->bytestream_start;
4376 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4381 static int decode_end(AVCodecContext *avctx)
4383 SnowContext *s = avctx->priv_data;
4385 slice_buffer_destroy(&s->sb);
4392 AVCodec snow_decoder = {
4396 sizeof(SnowContext),
4401 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4405 #ifdef CONFIG_ENCODERS
4406 AVCodec snow_encoder = {
4410 sizeof(SnowContext),
4426 int buffer[2][width*height];
4429 s.spatial_decomposition_count=6;
4430 s.spatial_decomposition_type=1;
4432 printf("testing 5/3 DWT\n");
4433 for(i=0; i<width*height; i++)
4434 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4436 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4437 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4439 for(i=0; i<width*height; i++)
4440 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4442 printf("testing 9/7 DWT\n");
4443 s.spatial_decomposition_type=0;
4444 for(i=0; i<width*height; i++)
4445 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4447 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4448 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4450 for(i=0; i<width*height; i++)
4451 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4454 printf("testing AC coder\n");
4455 memset(s.header_state, 0, sizeof(s.header_state));
4456 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4457 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4459 for(i=-256; i<256; i++){
4461 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4462 STOP_TIMER("put_symbol")
4464 ff_rac_terminate(&s.c);
4466 memset(s.header_state, 0, sizeof(s.header_state));
4467 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4468 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4470 for(i=-256; i<256; i++){
4473 j= get_symbol(&s.c, s.header_state, 1);
4474 STOP_TIMER("get_symbol")
4475 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4479 int level, orientation, x, y;
4480 int64_t errors[8][4];
4483 memset(errors, 0, sizeof(errors));
4484 s.spatial_decomposition_count=3;
4485 s.spatial_decomposition_type=0;
4486 for(level=0; level<s.spatial_decomposition_count; level++){
4487 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4488 int w= width >> (s.spatial_decomposition_count-level);
4489 int h= height >> (s.spatial_decomposition_count-level);
4490 int stride= width << (s.spatial_decomposition_count-level);
4491 DWTELEM *buf= buffer[0];
4494 if(orientation&1) buf+=w;
4495 if(orientation>1) buf+=stride>>1;
4497 memset(buffer[0], 0, sizeof(int)*width*height);
4498 buf[w/2 + h/2*stride]= 256*256;
4499 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4500 for(y=0; y<height; y++){
4501 for(x=0; x<width; x++){
4502 int64_t d= buffer[0][x + y*width];
4504 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4506 if(ABS(height/2-y)<9 && level==2) printf("\n");
4508 error= (int)(sqrt(error)+0.5);
4509 errors[level][orientation]= error;
4510 if(g) g=ff_gcd(g, error);
4514 printf("static int const visual_weight[][4]={\n");
4515 for(level=0; level<s.spatial_decomposition_count; level++){
4517 for(orientation=0; orientation<4; orientation++){
4518 printf("%8lld,", errors[level][orientation]/g);
4526 int w= width >> (s.spatial_decomposition_count-level);
4527 int h= height >> (s.spatial_decomposition_count-level);
4528 int stride= width << (s.spatial_decomposition_count-level);
4529 DWTELEM *buf= buffer[0];
4535 memset(buffer[0], 0, sizeof(int)*width*height);
4537 for(y=0; y<height; y++){
4538 for(x=0; x<width; x++){
4539 int tab[4]={0,2,3,1};
4540 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4543 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4547 buf[x + y*stride ]=169;
4548 buf[x + y*stride-w]=64;
4551 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4553 for(y=0; y<height; y++){
4554 for(x=0; x<width; x++){
4555 int64_t d= buffer[0][x + y*width];
4557 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4559 if(ABS(height/2-y)<9) printf("\n");