2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 #define MAX_DECOMPOSITIONS 8
35 #define QROOT (1<<QSHIFT)
36 #define LOSSLESS_QLOG -128
39 static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
57 static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
75 static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
93 static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
111 static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
129 static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
147 static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
165 static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
184 #define LOG2_OBMC_MAX 6
185 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
187 static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
242 static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
277 static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
297 static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
354 static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
367 static const uint8_t obmc4[16]={
375 static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
379 typedef struct BlockNode{
384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1
387 //#define TYPE_NOCOLOR 4
388 uint8_t level; //FIXME merge into type?
391 static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
399 #define LOG2_MB_SIZE 4
400 #define MB_SIZE (1<<LOG2_MB_SIZE)
402 typedef struct x_and_coeff{
407 typedef struct SubBand{
412 int qlog; ///< log(qscale)/log[2^(1/6)]
416 int stride_line; ///< Stride measured in lines, not pixels.
417 x_and_coeff * x_coeff;
418 struct SubBand *parent;
419 uint8_t state[/*7*2*/ 7 + 512][32];
422 typedef struct Plane{
425 SubBand band[MAX_DECOMPOSITIONS][4];
428 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
430 DWTELEM * * line; ///< For use by idwt and predict_slices.
431 DWTELEM * * data_stack; ///< Used for internal purposes.
436 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
439 typedef struct SnowContext{
440 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
442 AVCodecContext *avctx;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture;
449 AVFrame mconly_picture;
450 // uint8_t q_context[16];
451 uint8_t header_state[32];
452 uint8_t block_state[128 + 32*128];
456 int spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int temporal_decomposition_count;
460 DWTELEM *spatial_dwt_buffer;
464 int spatial_scalability;
470 #define QBIAS_SHIFT 3
474 Plane plane[MAX_PLANES];
476 #define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
492 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
495 static void iterative_me(SnowContext *s);
497 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
501 buf->base_buffer = base_buffer;
502 buf->line_count = line_count;
503 buf->line_width = line_width;
504 buf->data_count = max_allocated_lines;
505 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
508 for (i = 0; i < max_allocated_lines; i++)
510 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
513 buf->data_stack_top = max_allocated_lines - 1;
516 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
521 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
523 assert(buf->data_stack_top >= 0);
524 // assert(!buf->line[line]);
526 return buf->line[line];
528 offset = buf->line_width * line;
529 buffer = buf->data_stack[buf->data_stack_top];
530 buf->data_stack_top--;
531 buf->line[line] = buffer;
533 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538 static void slice_buffer_release(slice_buffer * buf, int line)
543 assert(line >= 0 && line < buf->line_count);
544 assert(buf->line[line]);
546 offset = buf->line_width * line;
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
552 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
555 static void slice_buffer_flush(slice_buffer * buf)
558 for (i = 0; i < buf->line_count; i++)
562 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563 slice_buffer_release(buf, i);
568 static void slice_buffer_destroy(slice_buffer * buf)
571 slice_buffer_flush(buf);
573 for (i = buf->data_count - 1; i >= 0; i--)
575 assert(buf->data_stack[i]);
576 av_free(buf->data_stack[i]);
578 assert(buf->data_stack);
579 av_free(buf->data_stack);
585 // Avoid a name clash on SGI IRIX
588 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589 static uint8_t qexp[QROOT];
591 static inline int mirror(int v, int m){
592 while((unsigned)v > (unsigned)m){
599 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
604 const int e= av_log2(a);
606 const int el= FFMIN(e, 10);
607 put_rac(c, state+0, 0);
610 put_rac(c, state+1+i, 1); //1..10
613 put_rac(c, state+1+9, 1); //1..10
615 put_rac(c, state+1+FFMIN(i,9), 0);
617 for(i=e-1; i>=el; i--){
618 put_rac(c, state+22+9, (a>>i)&1); //22..31
621 put_rac(c, state+22+i, (a>>i)&1); //22..31
625 put_rac(c, state+11 + el, v < 0); //11..21
628 put_rac(c, state+0, 0);
631 put_rac(c, state+1+i, 1); //1..10
633 put_rac(c, state+1+i, 0);
635 for(i=e-1; i>=0; i--){
636 put_rac(c, state+22+i, (a>>i)&1); //22..31
640 put_rac(c, state+11 + e, v < 0); //11..21
643 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
645 put_rac(c, state+1+FFMIN(i,9), 0);
647 for(i=e-1; i>=0; i--){
648 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
652 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
656 put_rac(c, state+0, 1);
660 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661 if(get_rac(c, state+0))
666 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
671 for(i=e-1; i>=0; i--){
672 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
675 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
682 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
684 int r= log2>=0 ? 1<<log2 : 1;
690 put_rac(c, state+4+log2, 1);
695 put_rac(c, state+4+log2, 0);
697 for(i=log2-1; i>=0; i--){
698 put_rac(c, state+31-i, (v>>i)&1);
702 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
704 int r= log2>=0 ? 1<<log2 : 1;
709 while(get_rac(c, state+4+log2)){
715 for(i=log2-1; i>=0; i--){
716 v+= get_rac(c, state+31-i)<<i;
722 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723 const int mirror_left= !highpass;
724 const int mirror_right= (width&1) ^ highpass;
725 const int w= (width>>1) - 1 + (highpass & width);
728 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
736 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
740 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
744 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745 const int mirror_left= !highpass;
746 const int mirror_right= (width&1) ^ highpass;
747 const int w= (width>>1) - 1 + (highpass & width);
754 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
760 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
763 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
767 int r= 3*2*ref[w*ref_step];
770 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
774 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775 const int mirror_left= !highpass;
776 const int mirror_right= (width&1) ^ highpass;
777 const int w= (width>>1) - 1 + (highpass & width);
781 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
783 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
789 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
793 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
798 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
801 for(x=start; x<width; x+=2){
805 int x2= x + 2*i - n + 1;
807 else if(x2>=width) x2= 2*width-x2-2;
808 sum += coeffs[i]*(int64_t)dst[x2];
810 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x] += (sum + (1<<shift)/2)>>shift;
815 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
817 for(y=start; y<height; y+=2){
818 for(x=0; x<width; x++){
822 int y2= y + 2*i - n + 1;
824 else if(y2>=height) y2= 2*height-y2-2;
825 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
827 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
837 #if 0 // more accurate 9/7
840 #define COEFFS1 (int[]){-25987,-25987}
843 #define COEFFS2 (int[]){-27777,-27777}
846 #define COEFFS3 (int[]){28931,28931}
849 #define COEFFS4 (int[]){14533,14533}
853 #define COEFFS1 (int[]){1,-9,-9,1}
856 #define COEFFS2 (int[]){-1,5,5,-1}
869 #define COEFFS1 (int[]){1,1}
872 #define COEFFS2 (int[]){-1,-1}
885 #define COEFFS2 (int[]){-1,-1}
888 #define COEFFS3 (int[]){-1,-1}
891 #define COEFFS4 (int[]){-5,29,29,-5}
896 #define COEFFS1 (int[]){-203,-203}
899 #define COEFFS2 (int[]){-217,-217}
902 #define COEFFS3 (int[]){113,113}
905 #define COEFFS4 (int[]){227,227}
913 #define COEFFS2 (int[]){-1,-1}
916 #define COEFFS3 (int[]){-1,-1}
919 #define COEFFS4 (int[]){3,3}
923 #define COEFFS1 (int[]){1,-9,-9,1}
926 #define COEFFS2 (int[]){1,1}
936 #define COEFFS1 (int[]){1,-9,-9,1}
939 #define COEFFS2 (int[]){-1,5,5,-1}
947 static void horizontal_decomposeX(DWTELEM *b, int width){
949 const int width2= width>>1;
950 const int w2= (width+1)>>1;
953 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
958 for(x=0; x<width2; x++){
960 temp[x+w2]= b[2*x + 1];
964 memcpy(b, temp, width*sizeof(int));
967 static void horizontal_composeX(DWTELEM *b, int width){
969 const int width2= width>>1;
971 const int w2= (width+1)>>1;
973 memcpy(temp, b, width*sizeof(int));
974 for(x=0; x<width2; x++){
976 b[2*x + 1]= temp[x+w2];
981 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
987 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
990 for(y=0; y<height; y++){
991 for(x=0; x<width; x++){
992 buffer[y*stride + x] *= SCALEX;
996 for(y=0; y<height; y++){
997 horizontal_decomposeX(buffer + y*stride, width);
1000 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1006 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1009 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1014 for(y=0; y<height; y++){
1015 horizontal_composeX(buffer + y*stride, width);
1018 for(y=0; y<height; y++){
1019 for(x=0; x<width; x++){
1020 buffer[y*stride + x] /= SCALEX;
1025 static void horizontal_decompose53i(DWTELEM *b, int width){
1026 DWTELEM temp[width];
1027 const int width2= width>>1;
1029 const int w2= (width+1)>>1;
1031 for(x=0; x<width2; x++){
1033 temp[x+w2]= b[2*x + 1];
1047 for(x=1; x+1<width2; x+=2){
1051 A2 += (A1 + A3 + 2)>>2;
1055 A1= temp[x+1+width2];
1058 A4 += (A1 + A3 + 2)>>2;
1064 A2 += (A1 + A3 + 2)>>2;
1069 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1074 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1077 for(i=0; i<width; i++){
1078 b1[i] -= (b0[i] + b2[i])>>1;
1082 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1085 for(i=0; i<width; i++){
1086 b1[i] += (b0[i] + b2[i] + 2)>>2;
1090 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1092 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1095 for(y=-2; y<height; y+=2){
1096 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1100 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1102 STOP_TIMER("horizontal_decompose53i")}
1105 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1107 STOP_TIMER("vertical_decompose53i*")}
1184 static void horizontal_decompose97i(DWTELEM *b, int width){
1185 DWTELEM temp[width];
1186 const int w2= (width+1)>>1;
1188 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1189 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1190 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1191 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1195 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1198 for(i=0; i<width; i++){
1199 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1203 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1206 for(i=0; i<width; i++){
1208 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1210 int r= 3*(b0[i] + b2[i]);
1213 b1[i] += (r+W_CO)>>W_CS;
1218 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1221 for(i=0; i<width; i++){
1223 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1225 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1230 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1233 for(i=0; i<width; i++){
1234 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1238 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1240 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1242 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1245 for(y=-4; y<height; y+=2){
1246 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1250 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1253 STOP_TIMER("horizontal_decompose97i")
1257 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1263 STOP_TIMER("vertical_decompose97i")
1273 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1276 for(level=0; level<decomposition_count; level++){
1278 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1285 static void horizontal_compose53i(DWTELEM *b, int width){
1286 DWTELEM temp[width];
1287 const int width2= width>>1;
1288 const int w2= (width+1)>>1;
1300 for(x=1; x+1<width2; x+=2){
1304 A2 += (A1 + A3 + 2)>>2;
1308 A1= temp[x+1+width2];
1311 A4 += (A1 + A3 + 2)>>2;
1317 A2 += (A1 + A3 + 2)>>2;
1321 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1322 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1324 for(x=0; x<width2; x++){
1326 b[2*x + 1]= temp[x+w2];
1332 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1335 for(i=0; i<width; i++){
1336 b1[i] += (b0[i] + b2[i])>>1;
1340 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1343 for(i=0; i<width; i++){
1344 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1348 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1354 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1360 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1363 DWTELEM *b0= cs->b0;
1364 DWTELEM *b1= cs->b1;
1365 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1369 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1371 STOP_TIMER("vertical_compose53i*")}
1374 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1376 STOP_TIMER("horizontal_compose53i")}
1383 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1385 DWTELEM *b0= cs->b0;
1386 DWTELEM *b1= cs->b1;
1387 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1391 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1393 STOP_TIMER("vertical_compose53i*")}
1396 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1398 STOP_TIMER("horizontal_compose53i")}
1405 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1407 spatial_compose53i_init(&cs, buffer, height, stride);
1408 while(cs.y <= height)
1409 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1413 static void horizontal_compose97i(DWTELEM *b, int width){
1414 DWTELEM temp[width];
1415 const int w2= (width+1)>>1;
1417 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1418 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1419 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1420 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1423 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1426 for(i=0; i<width; i++){
1427 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1431 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1434 for(i=0; i<width; i++){
1436 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1438 int r= 3*(b0[i] + b2[i]);
1441 b1[i] -= (r+W_CO)>>W_CS;
1446 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1449 for(i=0; i<width; i++){
1451 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1453 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1458 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1461 for(i=0; i<width; i++){
1462 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1466 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1469 for(i=0; i<width; i++){
1473 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1475 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1477 r= 3*(b2[i] + b4[i]);
1480 b3[i] -= (r+W_CO)>>W_CS;
1483 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1485 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1487 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1491 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1494 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1499 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1502 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1507 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1510 DWTELEM *b0= cs->b0;
1511 DWTELEM *b1= cs->b1;
1512 DWTELEM *b2= cs->b2;
1513 DWTELEM *b3= cs->b3;
1514 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1518 if(y>0 && y+4<height){
1519 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1521 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1527 STOP_TIMER("vertical_compose97i")}}
1530 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1532 if(width>400 && y+0<(unsigned)height){
1533 STOP_TIMER("horizontal_compose97i")}}
1542 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1544 DWTELEM *b0= cs->b0;
1545 DWTELEM *b1= cs->b1;
1546 DWTELEM *b2= cs->b2;
1547 DWTELEM *b3= cs->b3;
1548 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1552 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1557 STOP_TIMER("vertical_compose97i")}}
1560 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1562 if(width>400 && b0 <= b2){
1563 STOP_TIMER("horizontal_compose97i")}}
1572 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1574 spatial_compose97i_init(&cs, buffer, height, stride);
1575 while(cs.y <= height)
1576 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1579 void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1581 for(level=decomposition_count-1; level>=0; level--){
1583 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585 /* not slicified yet */
1586 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1592 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1594 for(level=decomposition_count-1; level>=0; level--){
1596 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598 /* not slicified yet */
1599 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1604 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605 const int support = type==1 ? 3 : 5;
1609 for(level=decomposition_count-1; level>=0; level--){
1610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1612 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1614 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1622 void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623 const int support = type==1 ? 3 : 5;
1627 for(level=decomposition_count-1; level>=0; level--){
1628 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1630 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1632 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1640 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1643 for(level=decomposition_count-1; level>=0; level--)
1644 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1646 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1648 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649 for(y=0; y<height; y+=4)
1650 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1654 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1655 const int w= b->width;
1656 const int h= b->height;
1668 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1669 v= src[x + y*stride];
1672 t= src[x + (y-1)*stride];
1674 lt= src[x - 1 + (y-1)*stride];
1677 rt= src[x + 1 + (y-1)*stride];
1681 l= src[x - 1 + y*stride];
1683 if(orientation==1) ll= src[y + (x-2)*stride];
1684 else ll= src[x - 2 + y*stride];
1690 if(px<b->parent->width && py<b->parent->height)
1691 p= parent[px + py*2*stride];
1693 if(!(/*ll|*/l|lt|t|rt|p)){
1695 runs[run_index++]= run;
1703 max_index= run_index;
1704 runs[run_index++]= run;
1706 run= runs[run_index++];
1708 put_symbol2(&s->c, b->state[30], max_index, 0);
1709 if(run_index <= max_index)
1710 put_symbol2(&s->c, b->state[1], run, 3);
1713 if(s->c.bytestream_end - s->c.bytestream < w*40){
1714 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1719 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1720 v= src[x + y*stride];
1723 t= src[x + (y-1)*stride];
1725 lt= src[x - 1 + (y-1)*stride];
1728 rt= src[x + 1 + (y-1)*stride];
1732 l= src[x - 1 + y*stride];
1734 if(orientation==1) ll= src[y + (x-2)*stride];
1735 else ll= src[x - 2 + y*stride];
1741 if(px<b->parent->width && py<b->parent->height)
1742 p= parent[px + py*2*stride];
1744 if(/*ll|*/l|lt|t|rt|p){
1745 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1747 put_rac(&s->c, &b->state[0][context], !!v);
1750 run= runs[run_index++];
1752 if(run_index <= max_index)
1753 put_symbol2(&s->c, b->state[1], run, 3);
1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762 int l2= 2*ABS(l) + (l<0);
1763 int t2= 2*ABS(t) + (t<0);
1765 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1766 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1774 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1775 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1776 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1777 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1778 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1781 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1782 const int w= b->width;
1783 const int h= b->height;
1788 x_and_coeff *xc= b->x_coeff;
1789 x_and_coeff *prev_xc= NULL;
1790 x_and_coeff *prev2_xc= xc;
1791 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792 x_and_coeff *prev_parent_xc= parent_xc;
1794 runs= get_symbol2(&s->c, b->state[30], 0);
1795 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1800 int lt=0, t=0, rt=0;
1802 if(y && prev_xc->x == 0){
1814 if(prev_xc->x == x + 1)
1820 if(x>>1 > parent_xc->x){
1823 if(x>>1 == parent_xc->x){
1824 p= parent_xc->coeff;
1827 if(/*ll|*/l|lt|t|rt|p){
1828 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1830 v=get_rac(&s->c, &b->state[0][context]);
1832 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1840 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1842 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1852 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1853 else max_run= FFMIN(run, w-x-1);
1855 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1861 (xc++)->x= w+1; //end marker
1867 while(parent_xc->x != parent->width+1)
1870 prev_parent_xc= parent_xc;
1872 parent_xc= prev_parent_xc;
1877 (xc++)->x= w+1; //end marker
1881 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882 const int w= b->width;
1884 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1886 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1891 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1896 /* If we are on the second or later slice, restore our index. */
1898 new_index = save_state[0];
1901 for(y=start_y; y<h; y++){
1904 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905 memset(line, 0, b->width*sizeof(DWTELEM));
1906 v = b->x_coeff[new_index].coeff;
1907 x = b->x_coeff[new_index++].x;
1910 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911 register int u= -(v&1);
1912 line[x] = (t^u) - u;
1914 v = b->x_coeff[new_index].coeff;
1915 x = b->x_coeff[new_index++].x;
1918 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919 STOP_TIMER("decode_subband")
1922 /* Save our variables for the next slice. */
1923 save_state[0] = new_index;
1928 static void reset_contexts(SnowContext *s){
1929 int plane_index, level, orientation;
1931 for(plane_index=0; plane_index<3; plane_index++){
1932 for(level=0; level<s->spatial_decomposition_count; level++){
1933 for(orientation=level ? 1:0; orientation<4; orientation++){
1934 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1938 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1942 static int alloc_blocks(SnowContext *s){
1943 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1949 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1953 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954 uint8_t *bytestream= d->bytestream;
1955 uint8_t *bytestream_start= d->bytestream_start;
1957 d->bytestream= bytestream;
1958 d->bytestream_start= bytestream_start;
1961 //near copy & paste from dsputil, FIXME
1962 static int pix_sum(uint8_t * pix, int line_size, int w)
1967 for (i = 0; i < w; i++) {
1968 for (j = 0; j < w; j++) {
1972 pix += line_size - w;
1977 //near copy & paste from dsputil, FIXME
1978 static int pix_norm1(uint8_t * pix, int line_size, int w)
1981 uint32_t *sq = squareTbl + 256;
1984 for (i = 0; i < w; i++) {
1985 for (j = 0; j < w; j ++) {
1989 pix += line_size - w;
1994 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995 const int w= s->b_width << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<rem_depth;
2010 for(j=0; j<block_w; j++){
2011 for(i=0; i<block_w; i++){
2012 s->block[index + i + j*w]= block;
2017 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018 const int offset[3]= {
2020 ((y*c->uvstride + x)>>1),
2021 ((y*c->uvstride + x)>>1),
2025 c->src[0][i]= src [i];
2026 c->ref[0][i]= ref [i] + offset[i];
2034 #define P_TOPRIGHT P[3]
2035 #define P_MEDIAN P[4]
2037 #define FLAG_QPEL 1 //must be 1
2039 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040 uint8_t p_buffer[1024];
2041 uint8_t i_buffer[1024];
2042 uint8_t p_state[sizeof(s->block_state)];
2043 uint8_t i_state[sizeof(s->block_state)];
2045 uint8_t *pbbak= s->c.bytestream;
2046 uint8_t *pbbak_start= s->c.bytestream_start;
2047 int score, score2, iscore, i_len, p_len, block_s, sum;
2048 const int w= s->b_width << s->block_max_depth;
2049 const int h= s->b_height << s->block_max_depth;
2050 const int rem_depth= s->block_max_depth - level;
2051 const int index= (x + y*w) << rem_depth;
2052 const int block_w= 1<<(LOG2_MB_SIZE - level);
2053 int trx= (x+1)<<rem_depth;
2054 int try= (y+1)<<rem_depth;
2055 BlockNode *left = x ? &s->block[index-1] : &null_block;
2056 BlockNode *top = y ? &s->block[index-w] : &null_block;
2057 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2060 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061 int pl = left->color[0];
2062 int pcb= left->color[1];
2063 int pcr= left->color[2];
2064 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065 int pmy= mid_pred(left->my, top->my, tr->my);
2068 const int stride= s->current_picture.linesize[0];
2069 const int uvstride= s->current_picture.linesize[1];
2070 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2071 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2074 int16_t last_mv[3][2];
2075 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076 const int shift= 1+qpel;
2077 MotionEstContext *c= &s->m.me;
2078 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079 int my_context= av_log2(2*ABS(left->my - top->my));
2080 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2082 assert(sizeof(s->block_state) >= 256);
2084 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2088 // clip predictors / edge ?
2090 P_LEFT[0]= left->mx;
2091 P_LEFT[1]= left->my;
2094 P_TOPRIGHT[0]= tr->mx;
2095 P_TOPRIGHT[1]= tr->my;
2097 last_mv[0][0]= s->block[index].mx;
2098 last_mv[0][1]= s->block[index].my;
2099 last_mv[1][0]= right->mx;
2100 last_mv[1][1]= right->my;
2101 last_mv[2][0]= bottom->mx;
2102 last_mv[2][1]= bottom->my;
2109 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2111 assert(s->m.me. stride == stride);
2112 assert(s->m.me.uvstride == uvstride);
2114 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2119 c->xmin = - x*block_w - 16+2;
2120 c->ymin = - y*block_w - 16+2;
2121 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2124 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2125 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2126 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2127 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2128 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2132 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2136 c->pred_x= P_LEFT[0];
2137 c->pred_y= P_LEFT[1];
2139 c->pred_x = P_MEDIAN[0];
2140 c->pred_y = P_MEDIAN[1];
2143 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2144 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2146 assert(mx >= c->xmin);
2147 assert(mx <= c->xmax);
2148 assert(my >= c->ymin);
2149 assert(my <= c->ymax);
2151 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2157 pc.bytestream_start=
2158 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2159 memcpy(p_state, s->block_state, sizeof(s->block_state));
2161 if(level!=s->block_max_depth)
2162 put_rac(&pc, &p_state[4 + s_context], 1);
2163 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2164 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2166 p_len= pc.bytestream - pc.bytestream_start;
2167 score += (s->lambda2*(p_len*8
2168 + (pc.outstanding_count - s->c.outstanding_count)*8
2169 + (-av_log2(pc.range) + av_log2(s->c.range))
2170 ))>>FF_LAMBDA_SHIFT;
2172 block_s= block_w*block_w;
2173 sum = pix_sum(current_data[0], stride, block_w);
2174 l= (sum + block_s/2)/block_s;
2175 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2177 block_s= block_w*block_w>>2;
2178 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2179 cb= (sum + block_s/2)/block_s;
2180 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2181 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2182 cr= (sum + block_s/2)/block_s;
2183 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2186 ic.bytestream_start=
2187 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2188 memcpy(i_state, s->block_state, sizeof(s->block_state));
2189 if(level!=s->block_max_depth)
2190 put_rac(&ic, &i_state[4 + s_context], 1);
2191 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2192 put_symbol(&ic, &i_state[32], l-pl , 1);
2193 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2195 i_len= ic.bytestream - ic.bytestream_start;
2196 iscore += (s->lambda2*(i_len*8
2197 + (ic.outstanding_count - s->c.outstanding_count)*8
2198 + (-av_log2(ic.range) + av_log2(s->c.range))
2199 ))>>FF_LAMBDA_SHIFT;
2201 // assert(score==256*256*256*64-1);
2202 assert(iscore < 255*255*256 + s->lambda2*10);
2203 assert(iscore >= 0);
2204 assert(l>=0 && l<=255);
2205 assert(pl>=0 && pl<=255);
2208 int varc= iscore >> 8;
2209 int vard= score >> 8;
2210 if (vard <= 64 || vard < varc)
2211 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2213 c->scene_change_score+= s->m.qscale;
2216 if(level!=s->block_max_depth){
2217 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2218 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2224 if(score2 < score && score2 < iscore)
2229 memcpy(pbbak, i_buffer, i_len);
2231 s->c.bytestream_start= pbbak_start;
2232 s->c.bytestream= pbbak + i_len;
2233 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234 memcpy(s->block_state, i_state, sizeof(s->block_state));
2237 memcpy(pbbak, p_buffer, p_len);
2239 s->c.bytestream_start= pbbak_start;
2240 s->c.bytestream= pbbak + p_len;
2241 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242 memcpy(s->block_state, p_state, sizeof(s->block_state));
2247 static always_inline int same_block(BlockNode *a, BlockNode *b){
2248 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2251 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2255 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256 const int w= s->b_width << s->block_max_depth;
2257 const int rem_depth= s->block_max_depth - level;
2258 const int index= (x + y*w) << rem_depth;
2259 int trx= (x+1)<<rem_depth;
2260 BlockNode *b= &s->block[index];
2261 BlockNode *left = x ? &s->block[index-1] : &null_block;
2262 BlockNode *top = y ? &s->block[index-w] : &null_block;
2263 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2264 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265 int pl = left->color[0];
2266 int pcb= left->color[1];
2267 int pcr= left->color[2];
2268 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269 int pmy= mid_pred(left->my, top->my, tr->my);
2270 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271 int my_context= av_log2(2*ABS(left->my - top->my));
2272 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2275 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2279 if(level!=s->block_max_depth){
2280 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2281 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2283 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2291 if(b->type & BLOCK_INTRA){
2292 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2298 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2305 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306 const int w= s->b_width << s->block_max_depth;
2307 const int rem_depth= s->block_max_depth - level;
2308 const int index= (x + y*w) << rem_depth;
2309 int trx= (x+1)<<rem_depth;
2310 BlockNode *left = x ? &s->block[index-1] : &null_block;
2311 BlockNode *top = y ? &s->block[index-w] : &null_block;
2312 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2313 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2317 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2321 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2323 int l = left->color[0];
2324 int cb= left->color[1];
2325 int cr= left->color[2];
2326 int mx= mid_pred(left->mx, top->mx, tr->mx);
2327 int my= mid_pred(left->my, top->my, tr->my);
2328 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2331 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2334 l += get_symbol(&s->c, &s->block_state[32], 1);
2335 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2338 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2341 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2343 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2350 static void encode_blocks(SnowContext *s){
2355 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2359 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2360 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2364 if(s->avctx->me_method == ME_ITER)
2365 encode_q_branch2(s, 0, x, y);
2367 encode_q_branch (s, 0, x, y);
2372 static void decode_blocks(SnowContext *s){
2379 decode_q_branch(s, 0, x, y);
2384 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2387 for(y=0; y < b_h+5; y++){
2388 for(x=0; x < b_w; x++){
2395 // int am= 9*(a1+a2) - (a0+a3);
2396 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397 // int am= 18*(a2+a3) - 2*(a1+a4);
2398 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2401 // if(b_w==16) am= 8*(a1+a2);
2403 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2404 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2406 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407 if(am&(~255)) am= ~(am>>31);
2411 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2412 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2413 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2419 tmp -= (b_h+5)*stride;
2421 for(y=0; y < b_h; y++){
2422 for(x=0; x < b_w; x++){
2423 int a0= tmp[x + 0*stride];
2424 int a1= tmp[x + 1*stride];
2425 int a2= tmp[x + 2*stride];
2426 int a3= tmp[x + 3*stride];
2427 int a4= tmp[x + 4*stride];
2428 int a5= tmp[x + 5*stride];
2429 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430 // int am= 18*(a2+a3) - 2*(a1+a4);
2431 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2434 // if(b_w==16) am= 8*(a1+a2);
2436 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2437 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2439 if(am&(~255)) am= ~(am>>31);
2442 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2443 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2444 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2445 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2450 STOP_TIMER("mc_block")
2453 #define mca(dx,dy,b_w)\
2454 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2455 uint8_t tmp[stride*(b_w+5)];\
2457 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2469 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2470 if(block->type & BLOCK_INTRA){
2472 const int color = block->color[plane_index];
2473 const int color4= color*0x01010101;
2475 for(y=0; y < b_h; y++){
2476 *(uint32_t*)&dst[0 + y*stride]= color4;
2477 *(uint32_t*)&dst[4 + y*stride]= color4;
2478 *(uint32_t*)&dst[8 + y*stride]= color4;
2479 *(uint32_t*)&dst[12+ y*stride]= color4;
2482 for(y=0; y < b_h; y++){
2483 *(uint32_t*)&dst[0 + y*stride]= color4;
2484 *(uint32_t*)&dst[4 + y*stride]= color4;
2487 for(y=0; y < b_h; y++){
2488 *(uint32_t*)&dst[0 + y*stride]= color4;
2491 for(y=0; y < b_h; y++){
2492 for(x=0; x < b_w; x++){
2493 dst[x + y*stride]= color;
2498 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2499 int mx= block->mx*scale;
2500 int my= block->my*scale;
2501 const int dx= mx&15;
2502 const int dy= my&15;
2505 src += sx + sy*stride;
2506 if( (unsigned)sx >= w - b_w - 4
2507 || (unsigned)sy >= h - b_h - 4){
2508 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2511 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2512 assert(!(b_w&(b_w-1)));
2513 assert(b_w>1 && b_h>1);
2514 if((dx&3) || (dy&3) || b_w==2 || b_h==2)
2515 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2517 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2518 else if(b_w==2*b_h){
2519 s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2520 s->dsp.put_h264_qpel_pixels_tab[2-(b_h>>3)][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2523 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2524 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2529 //FIXME name clenup (b_w, block_w, b_width stuff)
2530 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2531 DWTELEM * dst = NULL;
2532 const int b_width = s->b_width << s->block_max_depth;
2533 const int b_height= s->b_height << s->block_max_depth;
2534 const int b_stride= b_width;
2535 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2536 BlockNode *rt= lt+1;
2537 BlockNode *lb= lt+b_stride;
2538 BlockNode *rb= lb+1;
2540 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2541 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2548 }else if(b_x + 1 >= b_width){
2555 }else if(b_y + 1 >= b_height){
2560 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2564 }else if(src_x + b_w > w){
2568 obmc -= src_y*obmc_stride;
2571 }else if(src_y + b_h> h){
2575 if(b_w<=0 || b_h<=0) return;
2577 assert(src_stride > 2*MB_SIZE + 5);
2578 // old_dst += src_x + src_y*dst_stride;
2579 dst8+= src_x + src_y*src_stride;
2580 // src += src_x + src_y*src_stride;
2582 ptmp= tmp + 3*tmp_step;
2585 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2587 if(same_block(lt, rt)){
2592 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2595 if(same_block(lt, lb)){
2597 }else if(same_block(rt, lb)){
2602 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2605 if(same_block(lt, rb) ){
2607 }else if(same_block(rt, rb)){
2609 }else if(same_block(lb, rb)){
2613 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2616 for(y=0; y<b_h; y++){
2617 for(x=0; x<b_w; x++){
2618 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2619 if(add) dst[x + y*dst_stride] += v;
2620 else dst[x + y*dst_stride] -= v;
2623 for(y=0; y<b_h; y++){
2624 uint8_t *obmc2= obmc + (obmc_stride>>1);
2625 for(x=0; x<b_w; x++){
2626 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2627 if(add) dst[x + y*dst_stride] += v;
2628 else dst[x + y*dst_stride] -= v;
2631 for(y=0; y<b_h; y++){
2632 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2633 for(x=0; x<b_w; x++){
2634 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2635 if(add) dst[x + y*dst_stride] += v;
2636 else dst[x + y*dst_stride] -= v;
2639 for(y=0; y<b_h; y++){
2640 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2641 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2642 for(x=0; x<b_w; x++){
2643 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2644 if(add) dst[x + y*dst_stride] += v;
2645 else dst[x + y*dst_stride] -= v;
2653 for(y=0; y<b_h; y++){
2654 //FIXME ugly missue of obmc_stride
2655 uint8_t *obmc1= obmc + y*obmc_stride;
2656 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2657 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2658 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2659 dst = slice_buffer_get_line(sb, src_y + y);
2660 for(x=0; x<b_w; x++){
2661 int v= obmc1[x] * block[3][x + y*src_stride]
2662 +obmc2[x] * block[2][x + y*src_stride]
2663 +obmc3[x] * block[1][x + y*src_stride]
2664 +obmc4[x] * block[0][x + y*src_stride];
2666 v <<= 8 - LOG2_OBMC_MAX;
2668 v += 1<<(7 - FRAC_BITS);
2669 v >>= 8 - FRAC_BITS;
2672 // v += old_dst[x + y*dst_stride];
2673 v += dst[x + src_x];
2674 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2675 if(v&(~255)) v= ~(v>>31);
2676 dst8[x + y*src_stride] = v;
2678 // old_dst[x + y*dst_stride] -= v;
2679 dst[x + src_x] -= v;
2683 STOP_TIMER("Inner add y block")
2688 //FIXME name clenup (b_w, block_w, b_width stuff)
2689 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2690 const int b_width = s->b_width << s->block_max_depth;
2691 const int b_height= s->b_height << s->block_max_depth;
2692 const int b_stride= b_width;
2693 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2694 BlockNode *rt= lt+1;
2695 BlockNode *lb= lt+b_stride;
2696 BlockNode *rb= lb+1;
2698 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2699 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2706 }else if(b_x + 1 >= b_width){
2713 }else if(b_y + 1 >= b_height){
2718 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2722 }else if(src_x + b_w > w){
2726 obmc -= src_y*obmc_stride;
2729 }else if(src_y + b_h> h){
2733 if(b_w<=0 || b_h<=0) return;
2735 assert(src_stride > 2*MB_SIZE + 5);
2736 dst += src_x + src_y*dst_stride;
2737 dst8+= src_x + src_y*src_stride;
2738 // src += src_x + src_y*src_stride;
2740 ptmp= tmp + 3*tmp_step;
2743 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2745 if(same_block(lt, rt)){
2750 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2753 if(same_block(lt, lb)){
2755 }else if(same_block(rt, lb)){
2760 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2763 if(same_block(lt, rb) ){
2765 }else if(same_block(rt, rb)){
2767 }else if(same_block(lb, rb)){
2771 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2774 for(y=0; y<b_h; y++){
2775 for(x=0; x<b_w; x++){
2776 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2777 if(add) dst[x + y*dst_stride] += v;
2778 else dst[x + y*dst_stride] -= v;
2781 for(y=0; y<b_h; y++){
2782 uint8_t *obmc2= obmc + (obmc_stride>>1);
2783 for(x=0; x<b_w; x++){
2784 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2785 if(add) dst[x + y*dst_stride] += v;
2786 else dst[x + y*dst_stride] -= v;
2789 for(y=0; y<b_h; y++){
2790 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2791 for(x=0; x<b_w; x++){
2792 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2793 if(add) dst[x + y*dst_stride] += v;
2794 else dst[x + y*dst_stride] -= v;
2797 for(y=0; y<b_h; y++){
2798 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2799 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2800 for(x=0; x<b_w; x++){
2801 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2802 if(add) dst[x + y*dst_stride] += v;
2803 else dst[x + y*dst_stride] -= v;
2807 for(y=0; y<b_h; y++){
2808 //FIXME ugly missue of obmc_stride
2809 uint8_t *obmc1= obmc + y*obmc_stride;
2810 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2811 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2812 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2813 for(x=0; x<b_w; x++){
2814 int v= obmc1[x] * block[3][x + y*src_stride]
2815 +obmc2[x] * block[2][x + y*src_stride]
2816 +obmc3[x] * block[1][x + y*src_stride]
2817 +obmc4[x] * block[0][x + y*src_stride];
2819 v <<= 8 - LOG2_OBMC_MAX;
2821 v += 1<<(7 - FRAC_BITS);
2822 v >>= 8 - FRAC_BITS;
2825 v += dst[x + y*dst_stride];
2826 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2827 if(v&(~255)) v= ~(v>>31);
2828 dst8[x + y*src_stride] = v;
2830 dst[x + y*dst_stride] -= v;
2837 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2838 Plane *p= &s->plane[plane_index];
2839 const int mb_w= s->b_width << s->block_max_depth;
2840 const int mb_h= s->b_height << s->block_max_depth;
2842 int block_size = MB_SIZE >> s->block_max_depth;
2843 int block_w = plane_index ? block_size/2 : block_size;
2844 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2845 int obmc_stride= plane_index ? block_size : 2*block_size;
2846 int ref_stride= s->current_picture.linesize[plane_index];
2847 uint8_t *ref = s->last_picture.data[plane_index];
2848 uint8_t *dst8= s->current_picture.data[plane_index];
2853 if(s->keyframe || (s->avctx->debug&512)){
2858 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2860 // DWTELEM * line = slice_buffer_get_line(sb, y);
2861 DWTELEM * line = sb->line[y];
2864 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2865 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2867 if(v&(~255)) v= ~(v>>31);
2868 dst8[x + y*ref_stride]= v;
2872 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2874 // DWTELEM * line = slice_buffer_get_line(sb, y);
2875 DWTELEM * line = sb->line[y];
2878 line[x] -= 128 << FRAC_BITS;
2879 // buf[x + y*w]-= 128<<FRAC_BITS;
2887 for(mb_x=0; mb_x<=mb_w; mb_x++){
2890 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2891 block_w*mb_x - block_w/2,
2892 block_w*mb_y - block_w/2,
2895 w, ref_stride, obmc_stride,
2899 STOP_TIMER("add_yblock")
2902 STOP_TIMER("predict_slice")
2905 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2906 Plane *p= &s->plane[plane_index];
2907 const int mb_w= s->b_width << s->block_max_depth;
2908 const int mb_h= s->b_height << s->block_max_depth;
2910 int block_size = MB_SIZE >> s->block_max_depth;
2911 int block_w = plane_index ? block_size/2 : block_size;
2912 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2913 const int obmc_stride= plane_index ? block_size : 2*block_size;
2914 int ref_stride= s->current_picture.linesize[plane_index];
2915 uint8_t *ref = s->last_picture.data[plane_index];
2916 uint8_t *dst8= s->current_picture.data[plane_index];
2921 if(s->keyframe || (s->avctx->debug&512)){
2926 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2928 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2930 if(v&(~255)) v= ~(v>>31);
2931 dst8[x + y*ref_stride]= v;
2935 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2937 buf[x + y*w]-= 128<<FRAC_BITS;
2945 for(mb_x=0; mb_x<=mb_w; mb_x++){
2948 add_yblock(s, buf, dst8, ref, obmc,
2949 block_w*mb_x - block_w/2,
2950 block_w*mb_y - block_w/2,
2953 w, ref_stride, obmc_stride,
2957 STOP_TIMER("add_yblock")
2960 STOP_TIMER("predict_slice")
2963 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2964 const int mb_h= s->b_height << s->block_max_depth;
2966 for(mb_y=0; mb_y<=mb_h; mb_y++)
2967 predict_slice(s, buf, plane_index, add, mb_y);
2970 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2972 Plane *p= &s->plane[plane_index];
2973 const int block_size = MB_SIZE >> s->block_max_depth;
2974 const int block_w = plane_index ? block_size/2 : block_size;
2975 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2976 const int obmc_stride= plane_index ? block_size : 2*block_size;
2977 const int ref_stride= s->current_picture.linesize[plane_index];
2978 uint8_t *ref= s-> last_picture.data[plane_index];
2979 uint8_t *dst= s->current_picture.data[plane_index];
2980 uint8_t *src= s-> input_picture.data[plane_index];
2981 const static DWTELEM zero_dst[4096]; //FIXME
2982 const int b_stride = s->b_width << s->block_max_depth;
2983 const int w= p->width;
2984 const int h= p->height;
2985 int index= mb_x + mb_y*b_stride;
2986 BlockNode *b= &s->block[index];
2987 BlockNode backup= *b;
2991 b->type|= BLOCK_INTRA;
2992 b->color[plane_index]= 0;
2995 int mb_x2= mb_x + (i &1) - 1;
2996 int mb_y2= mb_y + (i>>1) - 1;
2997 int x= block_w*mb_x2 + block_w/2;
2998 int y= block_w*mb_y2 + block_w/2;
3000 add_yblock(s, zero_dst, dst, ref, obmc,
3001 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3003 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3004 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3005 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3006 int obmc_v= obmc[index];
3007 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3008 if(x<0) obmc_v += obmc[index + block_w];
3009 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3010 if(x+block_w>w) obmc_v += obmc[index - block_w];
3011 //FIXME precalc this or simplify it somehow else
3013 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v;
3014 aa += obmc_v * obmc_v; //FIXME precalclate this
3020 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3023 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3025 Plane *p= &s->plane[plane_index];
3026 const int block_size = MB_SIZE >> s->block_max_depth;
3027 const int block_w = plane_index ? block_size/2 : block_size;
3028 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3029 const int obmc_stride= plane_index ? block_size : 2*block_size;
3030 const int ref_stride= s->current_picture.linesize[plane_index];
3031 uint8_t *ref= s-> last_picture.data[plane_index];
3032 uint8_t *dst= s->current_picture.data[plane_index];
3033 uint8_t *src= s-> input_picture.data[plane_index];
3034 const static DWTELEM zero_dst[4096]; //FIXME
3035 const int b_stride = s->b_width << s->block_max_depth;
3036 const int b_height = s->b_height<< s->block_max_depth;
3037 const int w= p->width;
3038 const int h= p->height;
3041 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3044 int mb_x2= mb_x + (i &1) - 1;
3045 int mb_y2= mb_y + (i>>1) - 1;
3046 int x= block_w*mb_x2 + block_w/2;
3047 int y= block_w*mb_y2 + block_w/2;
3049 add_yblock(s, zero_dst, dst, ref, obmc,
3050 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3052 //FIXME find a cleaner/simpler way to skip the outside stuff
3053 for(y2= y; y2<0; y2++)
3054 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3055 for(y2= h; y2<y+block_w; y2++)
3056 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3058 for(y2= y; y2<y+block_w; y2++)
3059 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3062 for(y2= y; y2<y+block_w; y2++)
3063 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3066 assert(block_w== 8 || block_w==16);
3067 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3076 int x= mb_x + (i&1) - (i>>1);
3077 int y= mb_y + (i>>1);
3078 int index= x + y*b_stride;
3079 BlockNode *b = &s->block[index];
3080 BlockNode *left = x ? &s->block[index-1] : &null_block;
3081 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3082 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3083 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3084 int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3085 int dmy= b->my - mid_pred(left->my, top->my, tr->my);
3086 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3087 // int my_context= av_log2(2*ABS(left->my - top->my));
3089 if(x<0 || x>=b_stride || y>=b_height)
3096 00001XXXX 15-30 8-15
3098 //FIXME try accurate rate
3099 //FIXME intra and inter predictors if surrounding blocks arent the same type
3100 if(b->type & BLOCK_INTRA){
3101 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3102 + av_log2(2*ABS(left->color[1] - b->color[1]))
3103 + av_log2(2*ABS(left->color[2] - b->color[2])));
3105 rate += 2*(1 + av_log2(2*ABS(dmx))
3106 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3110 return distortion + rate*penalty_factor;
3113 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){
3114 const int b_stride= s->b_width << s->block_max_depth;
3115 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3116 BlockNode backup= *block;
3117 int rd, index, value;
3119 assert(mb_x>=0 && mb_y>=0);
3120 assert(mb_x<b_stride);
3123 block->color[0] = p[0];
3124 block->color[1] = p[1];
3125 block->color[2] = p[2];
3126 block->type |= BLOCK_INTRA;
3128 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3129 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3130 if(s->me_cache[index] == value)
3132 s->me_cache[index]= value;
3136 block->type &= ~BLOCK_INTRA;
3139 rd= get_block_rd(s, mb_x, mb_y, 0);
3151 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3152 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, int *best_rd){
3153 int p[2] = {p0, p1};
3154 return check_block(s, mb_x, mb_y, p, intra, best_rd);
3157 static void iterative_me(SnowContext *s){
3158 int pass, mb_x, mb_y;
3159 const int b_width = s->b_width << s->block_max_depth;
3160 const int b_height= s->b_height << s->block_max_depth;
3161 const int b_stride= b_width;
3164 for(pass=0; pass<50; pass++){
3167 for(mb_y= 0; mb_y<b_height; mb_y++){
3168 for(mb_x= 0; mb_x<b_width; mb_x++){
3169 int dia_change, i, j;
3170 int best_rd= INT_MAX;
3172 const int index= mb_x + mb_y * b_stride;
3173 BlockNode *block= &s->block[index];
3174 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3175 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3176 BlockNode *rb = mb_x<b_width ? &s->block[index +1] : &null_block;
3177 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block;
3178 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3179 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3180 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
3181 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
3183 if(pass && (block->type & BLOCK_OPT))
3185 block->type |= BLOCK_OPT;
3189 if(!s->me_cache_generation)
3190 memset(s->me_cache, 0, sizeof(s->me_cache));
3191 s->me_cache_generation += 1<<22;
3193 // get previous score (cant be cached due to OBMC)
3194 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, &best_rd);
3195 check_block_inter(s, mb_x, mb_y, 0, 0, 0, &best_rd);
3196 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, &best_rd);
3197 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, &best_rd);
3198 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, &best_rd);
3199 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, &best_rd);
3202 //FIXME avoid subpel interpol / round to nearest integer
3205 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3207 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, &best_rd);
3208 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, &best_rd);
3209 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, &best_rd);
3210 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, &best_rd);
3216 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3219 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, &best_rd);
3221 //FIXME or try the standard 2 pass qpel or similar
3224 color[i]= get_dc(s, mb_x, mb_y, i);
3226 check_block(s, mb_x, mb_y, color, 1, &best_rd);
3227 //FIXME RD style color selection
3229 if(!same_block(block, &backup)){
3230 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3231 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3232 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3233 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3234 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3235 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3236 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3237 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3242 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3248 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3249 const int level= b->level;
3250 const int w= b->width;
3251 const int h= b->height;
3252 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3253 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3254 int x,y, thres1, thres2;
3257 if(s->qlog == LOSSLESS_QLOG) return;
3259 bias= bias ? 0 : (3*qmul)>>3;
3260 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3266 int i= src[x + y*stride];
3268 if((unsigned)(i+thres1) > thres2){
3271 i/= qmul; //FIXME optimize
3272 src[x + y*stride]= i;
3276 i/= qmul; //FIXME optimize
3277 src[x + y*stride]= -i;
3280 src[x + y*stride]= 0;
3286 int i= src[x + y*stride];
3288 if((unsigned)(i+thres1) > thres2){
3291 i= (i + bias) / qmul; //FIXME optimize
3292 src[x + y*stride]= i;
3296 i= (i + bias) / qmul; //FIXME optimize
3297 src[x + y*stride]= -i;
3300 src[x + y*stride]= 0;
3304 if(level+1 == s->spatial_decomposition_count){
3305 // STOP_TIMER("quantize")
3309 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3310 const int w= b->width;
3311 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3312 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3313 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3317 if(s->qlog == LOSSLESS_QLOG) return;
3319 for(y=start_y; y<end_y; y++){
3320 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3321 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3325 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3327 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3331 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3332 STOP_TIMER("dquant")
3336 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3337 const int w= b->width;
3338 const int h= b->height;
3339 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3340 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3341 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3345 if(s->qlog == LOSSLESS_QLOG) return;
3349 int i= src[x + y*stride];
3351 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3353 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3357 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3358 STOP_TIMER("dquant")
3362 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3363 const int w= b->width;
3364 const int h= b->height;
3367 for(y=h-1; y>=0; y--){
3368 for(x=w-1; x>=0; x--){
3369 int i= x + y*stride;
3373 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3374 else src[i] -= src[i - 1];
3376 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3377 else src[i] -= src[i - 1];
3380 if(y) src[i] -= src[i - stride];
3386 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3387 const int w= b->width;
3396 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3398 for(y=start_y; y<end_y; y++){
3400 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3401 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3405 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3406 else line[x] += line[x - 1];
3408 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3409 else line[x] += line[x - 1];
3412 if(y) line[x] += prev[x];
3417 // STOP_TIMER("correlate")
3420 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3421 const int w= b->width;
3422 const int h= b->height;
3427 int i= x + y*stride;
3431 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3432 else src[i] += src[i - 1];
3434 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3435 else src[i] += src[i - 1];
3438 if(y) src[i] += src[i - stride];
3444 static void encode_header(SnowContext *s){
3445 int plane_index, level, orientation;
3448 memset(kstate, MID_STATE, sizeof(kstate));
3450 put_rac(&s->c, kstate, s->keyframe);
3451 if(s->keyframe || s->always_reset)
3454 put_symbol(&s->c, s->header_state, s->version, 0);
3455 put_rac(&s->c, s->header_state, s->always_reset);
3456 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3457 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3458 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3459 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3460 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3461 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3462 put_rac(&s->c, s->header_state, s->spatial_scalability);
3463 // put_rac(&s->c, s->header_state, s->rate_scalability);
3465 for(plane_index=0; plane_index<2; plane_index++){
3466 for(level=0; level<s->spatial_decomposition_count; level++){
3467 for(orientation=level ? 1:0; orientation<4; orientation++){
3468 if(orientation==2) continue;
3469 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3474 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3475 put_symbol(&s->c, s->header_state, s->qlog, 1);
3476 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3477 put_symbol(&s->c, s->header_state, s->qbias, 1);
3478 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3481 static int decode_header(SnowContext *s){
3482 int plane_index, level, orientation;
3485 memset(kstate, MID_STATE, sizeof(kstate));
3487 s->keyframe= get_rac(&s->c, kstate);
3488 if(s->keyframe || s->always_reset)
3491 s->version= get_symbol(&s->c, s->header_state, 0);
3493 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3496 s->always_reset= get_rac(&s->c, s->header_state);
3497 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3498 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3499 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3500 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3501 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3502 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3503 s->spatial_scalability= get_rac(&s->c, s->header_state);
3504 // s->rate_scalability= get_rac(&s->c, s->header_state);
3506 for(plane_index=0; plane_index<3; plane_index++){
3507 for(level=0; level<s->spatial_decomposition_count; level++){
3508 for(orientation=level ? 1:0; orientation<4; orientation++){
3510 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3511 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3512 else q= get_symbol(&s->c, s->header_state, 1);
3513 s->plane[plane_index].band[level][orientation].qlog= q;
3519 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3520 if(s->spatial_decomposition_type > 2){
3521 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3525 s->qlog= get_symbol(&s->c, s->header_state, 1);
3526 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3527 s->qbias= get_symbol(&s->c, s->header_state, 1);
3528 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3529 if(s->block_max_depth > 1){
3530 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3531 s->block_max_depth= 0;
3538 static void init_qexp(){
3542 for(i=0; i<QROOT; i++){
3544 v *= pow(2, 1.0 / QROOT);
3548 static int common_init(AVCodecContext *avctx){
3549 SnowContext *s = avctx->priv_data;
3551 int level, orientation, plane_index, dec;
3555 dsputil_init(&s->dsp, avctx);
3558 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3559 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3560 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3561 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3562 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3563 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3582 #define mcfh(dx,dy)\
3583 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3584 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3585 mc_block_hpel ## dx ## dy ## 16;\
3586 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3587 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3588 mc_block_hpel ## dx ## dy ## 8;
3598 dec= s->spatial_decomposition_count= 5;
3599 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3601 s->chroma_h_shift= 1; //FIXME XXX
3602 s->chroma_v_shift= 1;
3604 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3606 width= s->avctx->width;
3607 height= s->avctx->height;
3609 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3611 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3612 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3614 for(plane_index=0; plane_index<3; plane_index++){
3615 int w= s->avctx->width;
3616 int h= s->avctx->height;
3619 w>>= s->chroma_h_shift;
3620 h>>= s->chroma_v_shift;
3622 s->plane[plane_index].width = w;
3623 s->plane[plane_index].height= h;
3624 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3625 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3626 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3627 SubBand *b= &s->plane[plane_index].band[level][orientation];
3629 b->buf= s->spatial_dwt_buffer;
3631 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3632 b->width = (w + !(orientation&1))>>1;
3633 b->height= (h + !(orientation>1))>>1;
3635 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3636 b->buf_x_offset = 0;
3637 b->buf_y_offset = 0;
3641 b->buf_x_offset = (w+1)>>1;
3644 b->buf += b->stride>>1;
3645 b->buf_y_offset = b->stride_line >> 1;
3649 b->parent= &s->plane[plane_index].band[level-1][orientation];
3650 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3659 width= s->width= avctx->width;
3660 height= s->height= avctx->height;
3662 assert(width && height);
3664 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3670 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3671 int width = p->width;
3672 int height= p->height;
3673 int level, orientation, x, y;
3675 for(level=0; level<s->spatial_decomposition_count; level++){
3676 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3677 SubBand *b= &p->band[level][orientation];
3678 DWTELEM *buf= b->buf;
3681 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3682 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3683 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3684 for(y=0; y<height; y++){
3685 for(x=0; x<width; x++){
3686 int64_t d= s->spatial_dwt_buffer[x + y*width];
3691 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3692 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3697 static int encode_init(AVCodecContext *avctx)
3699 SnowContext *s = avctx->priv_data;
3702 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3703 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3704 "use vstrict=-2 / -strict -2 to use it anyway\n");
3714 s->m.flags = avctx->flags;
3715 s->m.bit_rate= avctx->bit_rate;
3717 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3718 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3719 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3720 h263_encode_init(&s->m); //mv_penalty
3722 if(avctx->flags&CODEC_FLAG_PASS1){
3723 if(!avctx->stats_out)
3724 avctx->stats_out = av_mallocz(256);
3726 if(avctx->flags&CODEC_FLAG_PASS2){
3727 if(ff_rate_control_init(&s->m) < 0)
3731 for(plane_index=0; plane_index<3; plane_index++){
3732 calculate_vissual_weight(s, &s->plane[plane_index]);
3736 avctx->coded_frame= &s->current_picture;
3737 switch(avctx->pix_fmt){
3738 // case PIX_FMT_YUV444P:
3739 // case PIX_FMT_YUV422P:
3740 case PIX_FMT_YUV420P:
3742 // case PIX_FMT_YUV411P:
3743 // case PIX_FMT_YUV410P:
3744 s->colorspace_type= 0;
3746 /* case PIX_FMT_RGBA32:
3750 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3753 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3754 s->chroma_h_shift= 1;
3755 s->chroma_v_shift= 1;
3757 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3758 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3760 s->avctx->get_buffer(s->avctx, &s->input_picture);
3765 static int frame_start(SnowContext *s){
3767 int w= s->avctx->width; //FIXME round up to x16 ?
3768 int h= s->avctx->height;
3770 if(s->current_picture.data[0]){
3771 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3772 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3773 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3776 tmp= s->last_picture;
3777 s->last_picture= s->current_picture;
3778 s->current_picture= tmp;
3780 s->current_picture.reference= 1;
3781 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3782 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3789 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3790 SnowContext *s = avctx->priv_data;
3791 RangeCoder * const c= &s->c;
3792 AVFrame *pict = data;
3793 const int width= s->avctx->width;
3794 const int height= s->avctx->height;
3795 int level, orientation, plane_index, i, y;
3797 ff_init_range_encoder(c, buf, buf_size);
3798 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3802 for(y=0; y<(height>>shift); y++)
3803 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3804 &pict->data[i][y * pict->linesize[i]],
3807 s->new_picture = *pict;
3809 if(avctx->flags&CODEC_FLAG_PASS2){
3811 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3812 s->keyframe= pict->pict_type==FF_I_TYPE;
3813 s->m.picture_number= avctx->frame_number;
3814 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3816 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3817 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3821 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3823 s->qlog += 61*QROOT/8;
3825 s->qlog= LOSSLESS_QLOG;
3829 s->current_picture.key_frame= s->keyframe;
3831 s->m.current_picture_ptr= &s->m.current_picture;
3832 if(pict->pict_type == P_TYPE){
3833 int block_width = (width +15)>>4;
3834 int block_height= (height+15)>>4;
3835 int stride= s->current_picture.linesize[0];
3837 assert(s->current_picture.data[0]);
3838 assert(s->last_picture.data[0]);
3840 s->m.avctx= s->avctx;
3841 s->m.current_picture.data[0]= s->current_picture.data[0];
3842 s->m. last_picture.data[0]= s-> last_picture.data[0];
3843 s->m. new_picture.data[0]= s-> input_picture.data[0];
3844 s->m. last_picture_ptr= &s->m. last_picture;
3846 s->m. last_picture.linesize[0]=
3847 s->m. new_picture.linesize[0]=
3848 s->m.current_picture.linesize[0]= stride;
3849 s->m.uvlinesize= s->current_picture.linesize[1];
3851 s->m.height= height;
3852 s->m.mb_width = block_width;
3853 s->m.mb_height= block_height;
3854 s->m.mb_stride= s->m.mb_width+1;
3855 s->m.b8_stride= 2*s->m.mb_width+1;
3857 s->m.pict_type= pict->pict_type;
3858 s->m.me_method= s->avctx->me_method;
3859 s->m.me.scene_change_score=0;
3860 s->m.flags= s->avctx->flags;
3861 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3862 s->m.out_format= FMT_H263;
3863 s->m.unrestricted_mv= 1;
3865 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3866 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3867 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3869 s->m.dsp= s->dsp; //move
3876 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3879 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3881 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3883 for(plane_index=0; plane_index<3; plane_index++){
3884 Plane *p= &s->plane[plane_index];
3888 // int bits= put_bits_count(&s->c.pb);
3891 if(pict->data[plane_index]) //FIXME gray hack
3894 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3897 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3900 && pict->pict_type == P_TYPE
3901 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3902 ff_init_range_encoder(c, buf, buf_size);
3903 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3904 pict->pict_type= FF_I_TYPE;
3910 if(s->qlog == LOSSLESS_QLOG){
3913 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3918 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3920 for(level=0; level<s->spatial_decomposition_count; level++){
3921 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3922 SubBand *b= &p->band[level][orientation];
3924 quantize(s, b, b->buf, b->stride, s->qbias);
3926 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3927 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3928 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3930 correlate(s, b, b->buf, b->stride, 1, 0);
3933 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3935 for(level=0; level<s->spatial_decomposition_count; level++){
3936 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3937 SubBand *b= &p->band[level][orientation];
3939 dequantize(s, b, b->buf, b->stride);
3943 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3944 if(s->qlog == LOSSLESS_QLOG){
3947 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3952 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3953 STOP_TIMER("pred-conv")}
3954 if(s->avctx->flags&CODEC_FLAG_PSNR){
3957 if(pict->data[plane_index]) //FIXME gray hack
3960 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3964 s->avctx->error[plane_index] += error;
3965 s->current_picture.error[plane_index] = error;
3969 if(s->last_picture.data[0])
3970 avctx->release_buffer(avctx, &s->last_picture);
3972 s->current_picture.coded_picture_number = avctx->frame_number;
3973 s->current_picture.pict_type = pict->pict_type;
3974 s->current_picture.quality = pict->quality;
3975 if(avctx->flags&CODEC_FLAG_PASS1){
3976 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3977 s->m.current_picture.display_picture_number =
3978 s->m.current_picture.coded_picture_number = avctx->frame_number;
3979 s->m.pict_type = pict->pict_type;
3980 s->m.current_picture.quality = pict->quality;
3981 ff_write_pass1_stats(&s->m);
3983 if(avctx->flags&CODEC_FLAG_PASS2){
3984 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3989 return ff_rac_terminate(c);
3992 static void common_end(SnowContext *s){
3993 int plane_index, level, orientation;
3995 av_freep(&s->spatial_dwt_buffer);
3997 av_freep(&s->m.me.scratchpad);
3998 av_freep(&s->m.me.map);
3999 av_freep(&s->m.me.score_map);
4001 av_freep(&s->block);
4003 for(plane_index=0; plane_index<3; plane_index++){
4004 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4005 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4006 SubBand *b= &s->plane[plane_index].band[level][orientation];
4008 av_freep(&b->x_coeff);
4014 static int encode_end(AVCodecContext *avctx)
4016 SnowContext *s = avctx->priv_data;
4019 av_free(avctx->stats_out);
4024 static int decode_init(AVCodecContext *avctx)
4026 SnowContext *s = avctx->priv_data;
4029 avctx->pix_fmt= PIX_FMT_YUV420P;
4033 block_size = MB_SIZE >> s->block_max_depth;
4034 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4039 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4040 SnowContext *s = avctx->priv_data;
4041 RangeCoder * const c= &s->c;
4043 AVFrame *picture = data;
4044 int level, orientation, plane_index;
4046 ff_init_range_decoder(c, buf, buf_size);
4047 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4049 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4051 if(!s->block) alloc_blocks(s);
4054 //keyframe flag dupliaction mess FIXME
4055 if(avctx->debug&FF_DEBUG_PICT_INFO)
4056 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4060 for(plane_index=0; plane_index<3; plane_index++){
4061 Plane *p= &s->plane[plane_index];
4065 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4067 if(s->avctx->debug&2048){
4068 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4069 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4073 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4074 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4080 for(level=0; level<s->spatial_decomposition_count; level++){
4081 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4082 SubBand *b= &p->band[level][orientation];
4083 unpack_coeffs(s, b, b->parent, orientation);
4086 STOP_TIMER("unpack coeffs");
4090 const int mb_h= s->b_height << s->block_max_depth;
4091 const int block_size = MB_SIZE >> s->block_max_depth;
4092 const int block_w = plane_index ? block_size/2 : block_size;
4094 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4099 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4100 for(mb_y=0; mb_y<=mb_h; mb_y++){
4102 int slice_starty = block_w*mb_y;
4103 int slice_h = block_w*(mb_y+1);
4104 if (!(s->keyframe || s->avctx->debug&512)){
4105 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4106 slice_h -= (block_w >> 1);
4111 for(level=0; level<s->spatial_decomposition_count; level++){
4112 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4113 SubBand *b= &p->band[level][orientation];
4116 int our_mb_start = mb_y;
4117 int our_mb_end = (mb_y + 1);
4119 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4120 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4121 if (!(s->keyframe || s->avctx->debug&512)){
4122 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4123 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4125 start_y = FFMIN(b->height, start_y);
4126 end_y = FFMIN(b->height, end_y);
4128 if (start_y != end_y){
4129 if (orientation == 0){
4130 SubBand * correlate_band = &p->band[0][0];
4131 int correlate_end_y = FFMIN(b->height, end_y + 1);
4132 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4133 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4134 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4135 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4138 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4142 STOP_TIMER("decode_subband_slice");
4146 for(; yd<slice_h; yd+=4){
4147 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4149 STOP_TIMER("idwt slice");}
4152 if(s->qlog == LOSSLESS_QLOG){
4153 for(; yq<slice_h && yq<h; yq++){
4154 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4156 line[x] <<= FRAC_BITS;
4161 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4163 y = FFMIN(p->height, slice_starty);
4164 end_y = FFMIN(p->height, slice_h);
4166 slice_buffer_release(&s->sb, y++);
4169 slice_buffer_flush(&s->sb);
4171 STOP_TIMER("idwt + predict_slices")}
4176 if(s->last_picture.data[0])
4177 avctx->release_buffer(avctx, &s->last_picture);
4179 if(!(s->avctx->debug&2048))
4180 *picture= s->current_picture;
4182 *picture= s->mconly_picture;
4184 *data_size = sizeof(AVFrame);
4186 bytes_read= c->bytestream - c->bytestream_start;
4187 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4192 static int decode_end(AVCodecContext *avctx)
4194 SnowContext *s = avctx->priv_data;
4196 slice_buffer_destroy(&s->sb);
4203 AVCodec snow_decoder = {
4207 sizeof(SnowContext),
4212 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4216 #ifdef CONFIG_ENCODERS
4217 AVCodec snow_encoder = {
4221 sizeof(SnowContext),
4237 int buffer[2][width*height];
4240 s.spatial_decomposition_count=6;
4241 s.spatial_decomposition_type=1;
4243 printf("testing 5/3 DWT\n");
4244 for(i=0; i<width*height; i++)
4245 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4247 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4248 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4250 for(i=0; i<width*height; i++)
4251 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4253 printf("testing 9/7 DWT\n");
4254 s.spatial_decomposition_type=0;
4255 for(i=0; i<width*height; i++)
4256 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4258 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4259 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4261 for(i=0; i<width*height; i++)
4262 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4265 printf("testing AC coder\n");
4266 memset(s.header_state, 0, sizeof(s.header_state));
4267 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4268 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4270 for(i=-256; i<256; i++){
4272 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4273 STOP_TIMER("put_symbol")
4275 ff_rac_terminate(&s.c);
4277 memset(s.header_state, 0, sizeof(s.header_state));
4278 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4279 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4281 for(i=-256; i<256; i++){
4284 j= get_symbol(&s.c, s.header_state, 1);
4285 STOP_TIMER("get_symbol")
4286 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4290 int level, orientation, x, y;
4291 int64_t errors[8][4];
4294 memset(errors, 0, sizeof(errors));
4295 s.spatial_decomposition_count=3;
4296 s.spatial_decomposition_type=0;
4297 for(level=0; level<s.spatial_decomposition_count; level++){
4298 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4299 int w= width >> (s.spatial_decomposition_count-level);
4300 int h= height >> (s.spatial_decomposition_count-level);
4301 int stride= width << (s.spatial_decomposition_count-level);
4302 DWTELEM *buf= buffer[0];
4305 if(orientation&1) buf+=w;
4306 if(orientation>1) buf+=stride>>1;
4308 memset(buffer[0], 0, sizeof(int)*width*height);
4309 buf[w/2 + h/2*stride]= 256*256;
4310 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4311 for(y=0; y<height; y++){
4312 for(x=0; x<width; x++){
4313 int64_t d= buffer[0][x + y*width];
4315 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4317 if(ABS(height/2-y)<9 && level==2) printf("\n");
4319 error= (int)(sqrt(error)+0.5);
4320 errors[level][orientation]= error;
4321 if(g) g=ff_gcd(g, error);
4325 printf("static int const visual_weight[][4]={\n");
4326 for(level=0; level<s.spatial_decomposition_count; level++){
4328 for(orientation=0; orientation<4; orientation++){
4329 printf("%8lld,", errors[level][orientation]/g);
4337 int w= width >> (s.spatial_decomposition_count-level);
4338 int h= height >> (s.spatial_decomposition_count-level);
4339 int stride= width << (s.spatial_decomposition_count-level);
4340 DWTELEM *buf= buffer[0];
4346 memset(buffer[0], 0, sizeof(int)*width*height);
4348 for(y=0; y<height; y++){
4349 for(x=0; x<width; x++){
4350 int tab[4]={0,2,3,1};
4351 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4354 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4358 buf[x + y*stride ]=169;
4359 buf[x + y*stride-w]=64;
4362 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4364 for(y=0; y<height; y++){
4365 for(x=0; x<width; x++){
4366 int64_t d= buffer[0][x + y*width];
4368 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4370 if(ABS(height/2-y)<9) printf("\n");