2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 #define MAX_DECOMPOSITIONS 8
35 #define QROOT (1<<QSHIFT)
36 #define LOSSLESS_QLOG -128
39 static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
57 static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
75 static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
93 static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
111 static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
129 static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
147 static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
165 static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
184 #define LOG2_OBMC_MAX 6
185 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
187 static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
242 static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
277 static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
297 static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
354 static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
367 static const uint8_t obmc4[16]={
375 static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
379 typedef struct BlockNode{
384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1
387 //#define TYPE_NOCOLOR 4
388 uint8_t level; //FIXME merge into type?
391 static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
399 #define LOG2_MB_SIZE 4
400 #define MB_SIZE (1<<LOG2_MB_SIZE)
402 typedef struct x_and_coeff{
407 typedef struct SubBand{
412 int qlog; ///< log(qscale)/log[2^(1/6)]
416 int stride_line; ///< Stride measured in lines, not pixels.
417 x_and_coeff * x_coeff;
418 struct SubBand *parent;
419 uint8_t state[/*7*2*/ 7 + 512][32];
422 typedef struct Plane{
425 SubBand band[MAX_DECOMPOSITIONS][4];
428 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
430 DWTELEM * * line; ///< For use by idwt and predict_slices.
431 DWTELEM * * data_stack; ///< Used for internal purposes.
436 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
439 typedef struct SnowContext{
440 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
442 AVCodecContext *avctx;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture;
449 AVFrame mconly_picture;
450 // uint8_t q_context[16];
451 uint8_t header_state[32];
452 uint8_t block_state[128 + 32*128];
456 int spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int temporal_decomposition_count;
460 DWTELEM *spatial_dwt_buffer;
464 int spatial_scalability;
470 #define QBIAS_SHIFT 3
474 Plane plane[MAX_PLANES];
476 #define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
492 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
495 static void iterative_me(SnowContext *s);
497 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
501 buf->base_buffer = base_buffer;
502 buf->line_count = line_count;
503 buf->line_width = line_width;
504 buf->data_count = max_allocated_lines;
505 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
508 for (i = 0; i < max_allocated_lines; i++)
510 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
513 buf->data_stack_top = max_allocated_lines - 1;
516 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
521 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
523 assert(buf->data_stack_top >= 0);
524 // assert(!buf->line[line]);
526 return buf->line[line];
528 offset = buf->line_width * line;
529 buffer = buf->data_stack[buf->data_stack_top];
530 buf->data_stack_top--;
531 buf->line[line] = buffer;
533 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538 static void slice_buffer_release(slice_buffer * buf, int line)
543 assert(line >= 0 && line < buf->line_count);
544 assert(buf->line[line]);
546 offset = buf->line_width * line;
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
552 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
555 static void slice_buffer_flush(slice_buffer * buf)
558 for (i = 0; i < buf->line_count; i++)
562 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563 slice_buffer_release(buf, i);
568 static void slice_buffer_destroy(slice_buffer * buf)
571 slice_buffer_flush(buf);
573 for (i = buf->data_count - 1; i >= 0; i--)
575 assert(buf->data_stack[i]);
576 av_free(buf->data_stack[i]);
578 assert(buf->data_stack);
579 av_free(buf->data_stack);
585 // Avoid a name clash on SGI IRIX
588 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589 static uint8_t qexp[QROOT];
591 static inline int mirror(int v, int m){
593 else if(v>m) return 2*m-v;
597 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
602 const int e= av_log2(a);
604 const int el= FFMIN(e, 10);
605 put_rac(c, state+0, 0);
608 put_rac(c, state+1+i, 1); //1..10
611 put_rac(c, state+1+9, 1); //1..10
613 put_rac(c, state+1+FFMIN(i,9), 0);
615 for(i=e-1; i>=el; i--){
616 put_rac(c, state+22+9, (a>>i)&1); //22..31
619 put_rac(c, state+22+i, (a>>i)&1); //22..31
623 put_rac(c, state+11 + el, v < 0); //11..21
626 put_rac(c, state+0, 0);
629 put_rac(c, state+1+i, 1); //1..10
631 put_rac(c, state+1+i, 0);
633 for(i=e-1; i>=0; i--){
634 put_rac(c, state+22+i, (a>>i)&1); //22..31
638 put_rac(c, state+11 + e, v < 0); //11..21
641 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
643 put_rac(c, state+1+FFMIN(i,9), 0);
645 for(i=e-1; i>=0; i--){
646 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
650 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
654 put_rac(c, state+0, 1);
658 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
659 if(get_rac(c, state+0))
664 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
669 for(i=e-1; i>=0; i--){
670 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
673 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
680 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
682 int r= log2>=0 ? 1<<log2 : 1;
688 put_rac(c, state+4+log2, 1);
693 put_rac(c, state+4+log2, 0);
695 for(i=log2-1; i>=0; i--){
696 put_rac(c, state+31-i, (v>>i)&1);
700 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
702 int r= log2>=0 ? 1<<log2 : 1;
707 while(get_rac(c, state+4+log2)){
713 for(i=log2-1; i>=0; i--){
714 v+= get_rac(c, state+31-i)<<i;
720 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
721 const int mirror_left= !highpass;
722 const int mirror_right= (width&1) ^ highpass;
723 const int w= (width>>1) - 1 + (highpass & width);
726 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
728 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
734 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
738 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
742 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
743 const int mirror_left= !highpass;
744 const int mirror_right= (width&1) ^ highpass;
745 const int w= (width>>1) - 1 + (highpass & width);
752 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
758 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
761 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
765 int r= 3*2*ref[w*ref_step];
768 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
772 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
773 const int mirror_left= !highpass;
774 const int mirror_right= (width&1) ^ highpass;
775 const int w= (width>>1) - 1 + (highpass & width);
779 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
781 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
787 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
791 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
796 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
799 for(x=start; x<width; x+=2){
803 int x2= x + 2*i - n + 1;
805 else if(x2>=width) x2= 2*width-x2-2;
806 sum += coeffs[i]*(int64_t)dst[x2];
808 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
809 else dst[x] += (sum + (1<<shift)/2)>>shift;
813 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
815 for(y=start; y<height; y+=2){
816 for(x=0; x<width; x++){
820 int y2= y + 2*i - n + 1;
822 else if(y2>=height) y2= 2*height-y2-2;
823 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
825 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
826 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
835 #if 0 // more accurate 9/7
838 #define COEFFS1 (int[]){-25987,-25987}
841 #define COEFFS2 (int[]){-27777,-27777}
844 #define COEFFS3 (int[]){28931,28931}
847 #define COEFFS4 (int[]){14533,14533}
851 #define COEFFS1 (int[]){1,-9,-9,1}
854 #define COEFFS2 (int[]){-1,5,5,-1}
867 #define COEFFS1 (int[]){1,1}
870 #define COEFFS2 (int[]){-1,-1}
883 #define COEFFS2 (int[]){-1,-1}
886 #define COEFFS3 (int[]){-1,-1}
889 #define COEFFS4 (int[]){-5,29,29,-5}
894 #define COEFFS1 (int[]){-203,-203}
897 #define COEFFS2 (int[]){-217,-217}
900 #define COEFFS3 (int[]){113,113}
903 #define COEFFS4 (int[]){227,227}
911 #define COEFFS2 (int[]){-1,-1}
914 #define COEFFS3 (int[]){-1,-1}
917 #define COEFFS4 (int[]){3,3}
921 #define COEFFS1 (int[]){1,-9,-9,1}
924 #define COEFFS2 (int[]){1,1}
934 #define COEFFS1 (int[]){1,-9,-9,1}
937 #define COEFFS2 (int[]){-1,5,5,-1}
945 static void horizontal_decomposeX(DWTELEM *b, int width){
947 const int width2= width>>1;
948 const int w2= (width+1)>>1;
951 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
952 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
953 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
954 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
956 for(x=0; x<width2; x++){
958 temp[x+w2]= b[2*x + 1];
962 memcpy(b, temp, width*sizeof(int));
965 static void horizontal_composeX(DWTELEM *b, int width){
967 const int width2= width>>1;
969 const int w2= (width+1)>>1;
971 memcpy(temp, b, width*sizeof(int));
972 for(x=0; x<width2; x++){
974 b[2*x + 1]= temp[x+w2];
979 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
980 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
981 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
982 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
988 for(y=0; y<height; y++){
989 for(x=0; x<width; x++){
990 buffer[y*stride + x] *= SCALEX;
994 for(y=0; y<height; y++){
995 horizontal_decomposeX(buffer + y*stride, width);
998 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
999 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1007 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1008 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1012 for(y=0; y<height; y++){
1013 horizontal_composeX(buffer + y*stride, width);
1016 for(y=0; y<height; y++){
1017 for(x=0; x<width; x++){
1018 buffer[y*stride + x] /= SCALEX;
1023 static void horizontal_decompose53i(DWTELEM *b, int width){
1024 DWTELEM temp[width];
1025 const int width2= width>>1;
1027 const int w2= (width+1)>>1;
1029 for(x=0; x<width2; x++){
1031 temp[x+w2]= b[2*x + 1];
1045 for(x=1; x+1<width2; x+=2){
1049 A2 += (A1 + A3 + 2)>>2;
1053 A1= temp[x+1+width2];
1056 A4 += (A1 + A3 + 2)>>2;
1062 A2 += (A1 + A3 + 2)>>2;
1067 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1068 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1072 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] -= (b0[i] + b2[i])>>1;
1080 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1083 for(i=0; i<width; i++){
1084 b1[i] += (b0[i] + b2[i] + 2)>>2;
1088 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1090 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1091 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1093 for(y=-2; y<height; y+=2){
1094 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098 if(b1 <= b3) horizontal_decompose53i(b2, width);
1099 if(y+2 < height) horizontal_decompose53i(b3, width);
1100 STOP_TIMER("horizontal_decompose53i")}
1103 if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
1104 if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
1105 STOP_TIMER("vertical_decompose53i*")}
1182 static void horizontal_decompose97i(DWTELEM *b, int width){
1183 DWTELEM temp[width];
1184 const int w2= (width+1)>>1;
1186 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1187 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1188 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1189 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1193 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1196 for(i=0; i<width; i++){
1197 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1201 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1204 for(i=0; i<width; i++){
1206 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1208 int r= 3*(b0[i] + b2[i]);
1211 b1[i] += (r+W_CO)>>W_CS;
1216 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1219 for(i=0; i<width; i++){
1221 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1223 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1228 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1231 for(i=0; i<width; i++){
1232 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1236 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1238 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1239 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1240 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1241 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1243 for(y=-4; y<height; y+=2){
1244 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1245 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1248 if(b3 <= b5) horizontal_decompose97i(b4, width);
1249 if(y+4 < height) horizontal_decompose97i(b5, width);
1251 STOP_TIMER("horizontal_decompose97i")
1255 if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
1256 if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
1257 if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
1258 if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
1261 STOP_TIMER("vertical_decompose97i")
1271 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1274 for(level=0; level<decomposition_count; level++){
1276 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1277 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1278 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1283 static void horizontal_compose53i(DWTELEM *b, int width){
1284 DWTELEM temp[width];
1285 const int width2= width>>1;
1286 const int w2= (width+1)>>1;
1298 for(x=1; x+1<width2; x+=2){
1302 A2 += (A1 + A3 + 2)>>2;
1306 A1= temp[x+1+width2];
1309 A4 += (A1 + A3 + 2)>>2;
1315 A2 += (A1 + A3 + 2)>>2;
1319 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1320 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1322 for(x=0; x<width2; x++){
1324 b[2*x + 1]= temp[x+w2];
1330 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1333 for(i=0; i<width; i++){
1334 b1[i] += (b0[i] + b2[i])>>1;
1338 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1341 for(i=0; i<width; i++){
1342 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1346 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1347 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1348 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1352 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1353 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1354 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1358 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1360 int mirror0 = mirror(y-1, height-1);
1361 int mirror1 = mirror(y , height-1);
1362 int mirror2 = mirror(y+1, height-1);
1363 int mirror3 = mirror(y+2, height-1);
1365 DWTELEM *b0= cs->b0;
1366 DWTELEM *b1= cs->b1;
1367 DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
1368 DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
1371 if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
1372 if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
1373 STOP_TIMER("vertical_compose53i*")}
1376 if(y-1 >= 0) horizontal_compose53i(b0, width);
1377 if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
1378 STOP_TIMER("horizontal_compose53i")}
1385 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1387 DWTELEM *b0= cs->b0;
1388 DWTELEM *b1= cs->b1;
1389 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1390 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1393 if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
1394 if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
1395 STOP_TIMER("vertical_compose53i*")}
1398 if(y-1 >= 0) horizontal_compose53i(b0, width);
1399 if(b0 <= b2) horizontal_compose53i(b1, width);
1400 STOP_TIMER("horizontal_compose53i")}
1407 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1409 spatial_compose53i_init(&cs, buffer, height, stride);
1410 while(cs.y <= height)
1411 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1415 static void horizontal_compose97i(DWTELEM *b, int width){
1416 DWTELEM temp[width];
1417 const int w2= (width+1)>>1;
1419 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1420 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1421 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1422 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1425 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1428 for(i=0; i<width; i++){
1429 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1433 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1436 for(i=0; i<width; i++){
1438 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1440 int r= 3*(b0[i] + b2[i]);
1443 b1[i] -= (r+W_CO)>>W_CS;
1448 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1451 for(i=0; i<width; i++){
1453 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1455 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1460 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1463 for(i=0; i<width; i++){
1464 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1468 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1471 for(i=0; i<width; i++){
1475 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1477 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1479 r= 3*(b2[i] + b4[i]);
1482 b3[i] -= (r+W_CO)>>W_CS;
1485 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1487 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1489 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1493 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1494 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1495 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1496 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1497 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1501 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1502 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1503 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1504 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1505 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1509 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1512 int mirror0 = mirror(y - 1, height - 1);
1513 int mirror1 = mirror(y + 0, height - 1);
1514 int mirror2 = mirror(y + 1, height - 1);
1515 int mirror3 = mirror(y + 2, height - 1);
1516 int mirror4 = mirror(y + 3, height - 1);
1517 int mirror5 = mirror(y + 4, height - 1);
1518 DWTELEM *b0= cs->b0;
1519 DWTELEM *b1= cs->b1;
1520 DWTELEM *b2= cs->b2;
1521 DWTELEM *b3= cs->b3;
1522 DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
1523 DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
1526 if(y>0 && y+4<height){
1527 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1529 if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
1530 if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
1531 if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
1532 if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
1535 STOP_TIMER("vertical_compose97i")}}
1538 if(y-1>= 0) horizontal_compose97i(b0, width);
1539 if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
1540 if(width>400 && mirror0 <= mirror2){
1541 STOP_TIMER("horizontal_compose97i")}}
1550 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1552 DWTELEM *b0= cs->b0;
1553 DWTELEM *b1= cs->b1;
1554 DWTELEM *b2= cs->b2;
1555 DWTELEM *b3= cs->b3;
1556 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1557 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1559 if(stride == width && y+4 < height && 0){
1561 for(x=0; x<width/2; x++)
1568 if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
1569 if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
1570 if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
1571 if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
1573 STOP_TIMER("vertical_compose97i")}}
1576 if(y-1>= 0) horizontal_compose97i(b0, width);
1577 if(b0 <= b2) horizontal_compose97i(b1, width);
1578 if(width>400 && b0 <= b2){
1579 STOP_TIMER("horizontal_compose97i")}}
1588 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1590 spatial_compose97i_init(&cs, buffer, height, stride);
1591 while(cs.y <= height)
1592 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1595 void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1597 for(level=decomposition_count-1; level>=0; level--){
1599 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1600 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1601 /* not slicified yet */
1602 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1603 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1608 void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1610 for(level=decomposition_count-1; level>=0; level--){
1612 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1613 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1614 /* not slicified yet */
1615 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1620 void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1621 const int support = type==1 ? 3 : 5;
1625 for(level=decomposition_count-1; level>=0; level--){
1626 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1628 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1630 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1638 void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1639 const int support = type==1 ? 3 : 5;
1643 for(level=decomposition_count-1; level>=0; level--){
1644 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1646 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1648 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1656 void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1659 for(level=decomposition_count-1; level>=0; level--)
1660 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1662 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1664 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1665 for(y=0; y<height; y+=4)
1666 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1670 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1671 const int w= b->width;
1672 const int h= b->height;
1684 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1685 v= src[x + y*stride];
1688 t= src[x + (y-1)*stride];
1690 lt= src[x - 1 + (y-1)*stride];
1693 rt= src[x + 1 + (y-1)*stride];
1697 l= src[x - 1 + y*stride];
1699 if(orientation==1) ll= src[y + (x-2)*stride];
1700 else ll= src[x - 2 + y*stride];
1706 if(px<b->parent->width && py<b->parent->height)
1707 p= parent[px + py*2*stride];
1709 if(!(/*ll|*/l|lt|t|rt|p)){
1711 runs[run_index++]= run;
1719 max_index= run_index;
1720 runs[run_index++]= run;
1722 run= runs[run_index++];
1724 put_symbol2(&s->c, b->state[30], max_index, 0);
1725 if(run_index <= max_index)
1726 put_symbol2(&s->c, b->state[1], run, 3);
1729 if(s->c.bytestream_end - s->c.bytestream < w*40){
1730 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1735 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1736 v= src[x + y*stride];
1739 t= src[x + (y-1)*stride];
1741 lt= src[x - 1 + (y-1)*stride];
1744 rt= src[x + 1 + (y-1)*stride];
1748 l= src[x - 1 + y*stride];
1750 if(orientation==1) ll= src[y + (x-2)*stride];
1751 else ll= src[x - 2 + y*stride];
1757 if(px<b->parent->width && py<b->parent->height)
1758 p= parent[px + py*2*stride];
1760 if(/*ll|*/l|lt|t|rt|p){
1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1763 put_rac(&s->c, &b->state[0][context], !!v);
1766 run= runs[run_index++];
1768 if(run_index <= max_index)
1769 put_symbol2(&s->c, b->state[1], run, 3);
1777 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1778 int l2= 2*ABS(l) + (l<0);
1779 int t2= 2*ABS(t) + (t<0);
1781 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1782 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1790 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1791 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1792 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1793 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1794 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1797 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1798 const int w= b->width;
1799 const int h= b->height;
1804 x_and_coeff *xc= b->x_coeff;
1805 x_and_coeff *prev_xc= NULL;
1806 x_and_coeff *prev2_xc= xc;
1807 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1808 x_and_coeff *prev_parent_xc= parent_xc;
1810 runs= get_symbol2(&s->c, b->state[30], 0);
1811 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1816 int lt=0, t=0, rt=0;
1818 if(y && prev_xc->x == 0){
1830 if(prev_xc->x == x + 1)
1836 if(x>>1 > parent_xc->x){
1839 if(x>>1 == parent_xc->x){
1840 p= parent_xc->coeff;
1843 if(/*ll|*/l|lt|t|rt|p){
1844 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1846 v=get_rac(&s->c, &b->state[0][context]);
1848 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1849 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1856 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1858 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1859 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1868 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1869 else max_run= FFMIN(run, w-x-1);
1871 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1877 (xc++)->x= w+1; //end marker
1883 while(parent_xc->x != parent->width+1)
1886 prev_parent_xc= parent_xc;
1888 parent_xc= prev_parent_xc;
1893 (xc++)->x= w+1; //end marker
1897 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1898 const int w= b->width;
1900 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1901 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1902 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1907 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1912 /* If we are on the second or later slice, restore our index. */
1914 new_index = save_state[0];
1917 for(y=start_y; y<h; y++){
1920 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1921 memset(line, 0, b->width*sizeof(DWTELEM));
1922 v = b->x_coeff[new_index].coeff;
1923 x = b->x_coeff[new_index++].x;
1926 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1927 register int u= -(v&1);
1928 line[x] = (t^u) - u;
1930 v = b->x_coeff[new_index].coeff;
1931 x = b->x_coeff[new_index++].x;
1934 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1935 STOP_TIMER("decode_subband")
1938 /* Save our variables for the next slice. */
1939 save_state[0] = new_index;
1944 static void reset_contexts(SnowContext *s){
1945 int plane_index, level, orientation;
1947 for(plane_index=0; plane_index<3; plane_index++){
1948 for(level=0; level<s->spatial_decomposition_count; level++){
1949 for(orientation=level ? 1:0; orientation<4; orientation++){
1950 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1954 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1955 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1958 static int alloc_blocks(SnowContext *s){
1959 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1960 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1965 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1969 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1970 uint8_t *bytestream= d->bytestream;
1971 uint8_t *bytestream_start= d->bytestream_start;
1973 d->bytestream= bytestream;
1974 d->bytestream_start= bytestream_start;
1977 //near copy & paste from dsputil, FIXME
1978 static int pix_sum(uint8_t * pix, int line_size, int w)
1983 for (i = 0; i < w; i++) {
1984 for (j = 0; j < w; j++) {
1988 pix += line_size - w;
1993 //near copy & paste from dsputil, FIXME
1994 static int pix_norm1(uint8_t * pix, int line_size, int w)
1997 uint32_t *sq = squareTbl + 256;
2000 for (i = 0; i < w; i++) {
2001 for (j = 0; j < w; j ++) {
2005 pix += line_size - w;
2010 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
2011 const int w= s->b_width << s->block_max_depth;
2012 const int rem_depth= s->block_max_depth - level;
2013 const int index= (x + y*w) << rem_depth;
2014 const int block_w= 1<<rem_depth;
2026 for(j=0; j<block_w; j++){
2027 for(i=0; i<block_w; i++){
2028 s->block[index + i + j*w]= block;
2033 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2034 const int offset[3]= {
2036 ((y*c->uvstride + x)>>1),
2037 ((y*c->uvstride + x)>>1),
2041 c->src[0][i]= src [i];
2042 c->ref[0][i]= ref [i] + offset[i];
2050 #define P_TOPRIGHT P[3]
2051 #define P_MEDIAN P[4]
2053 #define FLAG_QPEL 1 //must be 1
2055 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2056 uint8_t p_buffer[1024];
2057 uint8_t i_buffer[1024];
2058 uint8_t p_state[sizeof(s->block_state)];
2059 uint8_t i_state[sizeof(s->block_state)];
2061 uint8_t *pbbak= s->c.bytestream;
2062 uint8_t *pbbak_start= s->c.bytestream_start;
2063 int score, score2, iscore, i_len, p_len, block_s, sum;
2064 const int w= s->b_width << s->block_max_depth;
2065 const int h= s->b_height << s->block_max_depth;
2066 const int rem_depth= s->block_max_depth - level;
2067 const int index= (x + y*w) << rem_depth;
2068 const int block_w= 1<<(LOG2_MB_SIZE - level);
2069 int trx= (x+1)<<rem_depth;
2070 int try= (y+1)<<rem_depth;
2071 BlockNode *left = x ? &s->block[index-1] : &null_block;
2072 BlockNode *top = y ? &s->block[index-w] : &null_block;
2073 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2074 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2075 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2076 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2077 int pl = left->color[0];
2078 int pcb= left->color[1];
2079 int pcr= left->color[2];
2080 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2081 int pmy= mid_pred(left->my, top->my, tr->my);
2084 const int stride= s->current_picture.linesize[0];
2085 const int uvstride= s->current_picture.linesize[1];
2086 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2087 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2088 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2090 int16_t last_mv[3][2];
2091 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2092 const int shift= 1+qpel;
2093 MotionEstContext *c= &s->m.me;
2094 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2095 int my_context= av_log2(2*ABS(left->my - top->my));
2096 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2098 assert(sizeof(s->block_state) >= 256);
2100 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2104 // clip predictors / edge ?
2106 P_LEFT[0]= left->mx;
2107 P_LEFT[1]= left->my;
2110 P_TOPRIGHT[0]= tr->mx;
2111 P_TOPRIGHT[1]= tr->my;
2113 last_mv[0][0]= s->block[index].mx;
2114 last_mv[0][1]= s->block[index].my;
2115 last_mv[1][0]= right->mx;
2116 last_mv[1][1]= right->my;
2117 last_mv[2][0]= bottom->mx;
2118 last_mv[2][1]= bottom->my;
2125 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2127 assert(s->m.me. stride == stride);
2128 assert(s->m.me.uvstride == uvstride);
2130 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2131 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2132 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2133 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2135 c->xmin = - x*block_w - 16+2;
2136 c->ymin = - y*block_w - 16+2;
2137 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2138 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2140 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2141 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2142 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2143 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2144 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2145 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2146 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2148 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2149 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2152 c->pred_x= P_LEFT[0];
2153 c->pred_y= P_LEFT[1];
2155 c->pred_x = P_MEDIAN[0];
2156 c->pred_y = P_MEDIAN[1];
2159 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2160 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2162 assert(mx >= c->xmin);
2163 assert(mx <= c->xmax);
2164 assert(my >= c->ymin);
2165 assert(my <= c->ymax);
2167 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2168 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2169 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2173 pc.bytestream_start=
2174 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2175 memcpy(p_state, s->block_state, sizeof(s->block_state));
2177 if(level!=s->block_max_depth)
2178 put_rac(&pc, &p_state[4 + s_context], 1);
2179 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2180 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2181 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2182 p_len= pc.bytestream - pc.bytestream_start;
2183 score += (s->lambda2*(p_len*8
2184 + (pc.outstanding_count - s->c.outstanding_count)*8
2185 + (-av_log2(pc.range) + av_log2(s->c.range))
2186 ))>>FF_LAMBDA_SHIFT;
2188 block_s= block_w*block_w;
2189 sum = pix_sum(current_data[0], stride, block_w);
2190 l= (sum + block_s/2)/block_s;
2191 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2193 block_s= block_w*block_w>>2;
2194 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2195 cb= (sum + block_s/2)/block_s;
2196 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2197 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2198 cr= (sum + block_s/2)/block_s;
2199 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2202 ic.bytestream_start=
2203 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2204 memcpy(i_state, s->block_state, sizeof(s->block_state));
2205 if(level!=s->block_max_depth)
2206 put_rac(&ic, &i_state[4 + s_context], 1);
2207 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2208 put_symbol(&ic, &i_state[32], l-pl , 1);
2209 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2210 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2211 i_len= ic.bytestream - ic.bytestream_start;
2212 iscore += (s->lambda2*(i_len*8
2213 + (ic.outstanding_count - s->c.outstanding_count)*8
2214 + (-av_log2(ic.range) + av_log2(s->c.range))
2215 ))>>FF_LAMBDA_SHIFT;
2217 // assert(score==256*256*256*64-1);
2218 assert(iscore < 255*255*256 + s->lambda2*10);
2219 assert(iscore >= 0);
2220 assert(l>=0 && l<=255);
2221 assert(pl>=0 && pl<=255);
2224 int varc= iscore >> 8;
2225 int vard= score >> 8;
2226 if (vard <= 64 || vard < varc)
2227 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2229 c->scene_change_score+= s->m.qscale;
2232 if(level!=s->block_max_depth){
2233 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2234 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2235 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2236 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2237 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2238 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2240 if(score2 < score && score2 < iscore)
2245 memcpy(pbbak, i_buffer, i_len);
2247 s->c.bytestream_start= pbbak_start;
2248 s->c.bytestream= pbbak + i_len;
2249 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2250 memcpy(s->block_state, i_state, sizeof(s->block_state));
2253 memcpy(pbbak, p_buffer, p_len);
2255 s->c.bytestream_start= pbbak_start;
2256 s->c.bytestream= pbbak + p_len;
2257 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2258 memcpy(s->block_state, p_state, sizeof(s->block_state));
2263 static always_inline int same_block(BlockNode *a, BlockNode *b){
2264 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2265 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2267 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2271 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2272 const int w= s->b_width << s->block_max_depth;
2273 const int rem_depth= s->block_max_depth - level;
2274 const int index= (x + y*w) << rem_depth;
2275 int trx= (x+1)<<rem_depth;
2276 BlockNode *b= &s->block[index];
2277 BlockNode *left = x ? &s->block[index-1] : &null_block;
2278 BlockNode *top = y ? &s->block[index-w] : &null_block;
2279 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2280 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2281 int pl = left->color[0];
2282 int pcb= left->color[1];
2283 int pcr= left->color[2];
2284 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2285 int pmy= mid_pred(left->my, top->my, tr->my);
2286 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2287 int my_context= av_log2(2*ABS(left->my - top->my));
2288 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2291 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2295 if(level!=s->block_max_depth){
2296 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2297 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2299 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2300 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2301 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2302 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2303 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2307 if(b->type & BLOCK_INTRA){
2308 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2309 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2310 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2311 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2312 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2314 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2315 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2316 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2317 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2321 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2322 const int w= s->b_width << s->block_max_depth;
2323 const int rem_depth= s->block_max_depth - level;
2324 const int index= (x + y*w) << rem_depth;
2325 int trx= (x+1)<<rem_depth;
2326 BlockNode *left = x ? &s->block[index-1] : &null_block;
2327 BlockNode *top = y ? &s->block[index-w] : &null_block;
2328 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2329 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2330 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2333 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2337 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2339 int l = left->color[0];
2340 int cb= left->color[1];
2341 int cr= left->color[2];
2342 int mx= mid_pred(left->mx, top->mx, tr->mx);
2343 int my= mid_pred(left->my, top->my, tr->my);
2344 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2345 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2347 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2350 l += get_symbol(&s->c, &s->block_state[32], 1);
2351 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2352 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2354 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2355 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2357 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2359 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2360 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2361 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2362 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2366 static void encode_blocks(SnowContext *s){
2371 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2375 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2376 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2380 if(s->avctx->me_method == ME_ITER)
2381 encode_q_branch2(s, 0, x, y);
2383 encode_q_branch (s, 0, x, y);
2388 static void decode_blocks(SnowContext *s){
2395 decode_q_branch(s, 0, x, y);
2400 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2403 for(y=0; y < b_h+5; y++){
2404 for(x=0; x < b_w; x++){
2411 // int am= 9*(a1+a2) - (a0+a3);
2412 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2413 // int am= 18*(a2+a3) - 2*(a1+a4);
2414 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2415 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2417 // if(b_w==16) am= 8*(a1+a2);
2419 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2420 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2422 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2423 if(am&(~255)) am= ~(am>>31);
2427 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2428 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2429 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2430 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2435 tmp -= (b_h+5)*stride;
2437 for(y=0; y < b_h; y++){
2438 for(x=0; x < b_w; x++){
2439 int a0= tmp[x + 0*stride];
2440 int a1= tmp[x + 1*stride];
2441 int a2= tmp[x + 2*stride];
2442 int a3= tmp[x + 3*stride];
2443 int a4= tmp[x + 4*stride];
2444 int a5= tmp[x + 5*stride];
2445 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2446 // int am= 18*(a2+a3) - 2*(a1+a4);
2447 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2448 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2450 // if(b_w==16) am= 8*(a1+a2);
2452 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2453 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2455 if(am&(~255)) am= ~(am>>31);
2458 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2459 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2460 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2461 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2466 STOP_TIMER("mc_block")
2469 #define mca(dx,dy,b_w)\
2470 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2471 uint8_t tmp[stride*(b_w+5)];\
2473 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2485 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2486 if(block->type & BLOCK_INTRA){
2488 const int color= block->color[plane_index];
2489 for(y=0; y < b_h; y++){
2490 for(x=0; x < b_w; x++){
2491 dst[x + y*stride]= color;
2495 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2496 int mx= block->mx*scale;
2497 int my= block->my*scale;
2498 const int dx= mx&15;
2499 const int dy= my&15;
2502 src += sx + sy*stride;
2503 if( (unsigned)sx >= w - b_w - 4
2504 || (unsigned)sy >= h - b_h - 4){
2505 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2508 if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
2509 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2511 s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2515 //FIXME name clenup (b_w, block_w, b_width stuff)
2516 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2517 DWTELEM * dst = NULL;
2518 const int b_width = s->b_width << s->block_max_depth;
2519 const int b_height= s->b_height << s->block_max_depth;
2520 const int b_stride= b_width;
2521 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2522 BlockNode *rt= lt+1;
2523 BlockNode *lb= lt+b_stride;
2524 BlockNode *rb= lb+1;
2526 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2527 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2534 }else if(b_x + 1 >= b_width){
2541 }else if(b_y + 1 >= b_height){
2546 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2550 }else if(src_x + b_w > w){
2554 obmc -= src_y*obmc_stride;
2557 }else if(src_y + b_h> h){
2561 if(b_w<=0 || b_h<=0) return;
2563 assert(src_stride > 2*MB_SIZE + 5);
2564 // old_dst += src_x + src_y*dst_stride;
2565 dst8+= src_x + src_y*src_stride;
2566 // src += src_x + src_y*src_stride;
2568 ptmp= tmp + 3*tmp_step;
2571 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2573 if(same_block(lt, rt)){
2578 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2581 if(same_block(lt, lb)){
2583 }else if(same_block(rt, lb)){
2588 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2591 if(same_block(lt, rb) ){
2593 }else if(same_block(rt, rb)){
2595 }else if(same_block(lb, rb)){
2599 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2602 for(y=0; y<b_h; y++){
2603 for(x=0; x<b_w; x++){
2604 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2605 if(add) dst[x + y*dst_stride] += v;
2606 else dst[x + y*dst_stride] -= v;
2609 for(y=0; y<b_h; y++){
2610 uint8_t *obmc2= obmc + (obmc_stride>>1);
2611 for(x=0; x<b_w; x++){
2612 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2613 if(add) dst[x + y*dst_stride] += v;
2614 else dst[x + y*dst_stride] -= v;
2617 for(y=0; y<b_h; y++){
2618 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2619 for(x=0; x<b_w; x++){
2620 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2621 if(add) dst[x + y*dst_stride] += v;
2622 else dst[x + y*dst_stride] -= v;
2625 for(y=0; y<b_h; y++){
2626 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2627 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2628 for(x=0; x<b_w; x++){
2629 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2630 if(add) dst[x + y*dst_stride] += v;
2631 else dst[x + y*dst_stride] -= v;
2639 for(y=0; y<b_h; y++){
2640 //FIXME ugly missue of obmc_stride
2641 uint8_t *obmc1= obmc + y*obmc_stride;
2642 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2643 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2644 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2645 dst = slice_buffer_get_line(sb, src_y + y);
2646 for(x=0; x<b_w; x++){
2647 int v= obmc1[x] * block[3][x + y*src_stride]
2648 +obmc2[x] * block[2][x + y*src_stride]
2649 +obmc3[x] * block[1][x + y*src_stride]
2650 +obmc4[x] * block[0][x + y*src_stride];
2652 v <<= 8 - LOG2_OBMC_MAX;
2654 v += 1<<(7 - FRAC_BITS);
2655 v >>= 8 - FRAC_BITS;
2658 // v += old_dst[x + y*dst_stride];
2659 v += dst[x + src_x];
2660 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2661 if(v&(~255)) v= ~(v>>31);
2662 dst8[x + y*src_stride] = v;
2664 // old_dst[x + y*dst_stride] -= v;
2665 dst[x + src_x] -= v;
2669 STOP_TIMER("Inner add y block")
2674 //FIXME name clenup (b_w, block_w, b_width stuff)
2675 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2676 const int b_width = s->b_width << s->block_max_depth;
2677 const int b_height= s->b_height << s->block_max_depth;
2678 const int b_stride= b_width;
2679 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2680 BlockNode *rt= lt+1;
2681 BlockNode *lb= lt+b_stride;
2682 BlockNode *rb= lb+1;
2684 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2685 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2692 }else if(b_x + 1 >= b_width){
2699 }else if(b_y + 1 >= b_height){
2704 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2708 }else if(src_x + b_w > w){
2712 obmc -= src_y*obmc_stride;
2715 }else if(src_y + b_h> h){
2719 if(b_w<=0 || b_h<=0) return;
2721 assert(src_stride > 2*MB_SIZE + 5);
2722 dst += src_x + src_y*dst_stride;
2723 dst8+= src_x + src_y*src_stride;
2724 // src += src_x + src_y*src_stride;
2726 ptmp= tmp + 3*tmp_step;
2729 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2731 if(same_block(lt, rt)){
2736 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2739 if(same_block(lt, lb)){
2741 }else if(same_block(rt, lb)){
2746 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2749 if(same_block(lt, rb) ){
2751 }else if(same_block(rt, rb)){
2753 }else if(same_block(lb, rb)){
2757 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2760 for(y=0; y<b_h; y++){
2761 for(x=0; x<b_w; x++){
2762 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2763 if(add) dst[x + y*dst_stride] += v;
2764 else dst[x + y*dst_stride] -= v;
2767 for(y=0; y<b_h; y++){
2768 uint8_t *obmc2= obmc + (obmc_stride>>1);
2769 for(x=0; x<b_w; x++){
2770 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2771 if(add) dst[x + y*dst_stride] += v;
2772 else dst[x + y*dst_stride] -= v;
2775 for(y=0; y<b_h; y++){
2776 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2777 for(x=0; x<b_w; x++){
2778 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2779 if(add) dst[x + y*dst_stride] += v;
2780 else dst[x + y*dst_stride] -= v;
2783 for(y=0; y<b_h; y++){
2784 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2785 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2786 for(x=0; x<b_w; x++){
2787 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2788 if(add) dst[x + y*dst_stride] += v;
2789 else dst[x + y*dst_stride] -= v;
2793 for(y=0; y<b_h; y++){
2794 //FIXME ugly missue of obmc_stride
2795 uint8_t *obmc1= obmc + y*obmc_stride;
2796 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2797 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2798 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2799 for(x=0; x<b_w; x++){
2800 int v= obmc1[x] * block[3][x + y*src_stride]
2801 +obmc2[x] * block[2][x + y*src_stride]
2802 +obmc3[x] * block[1][x + y*src_stride]
2803 +obmc4[x] * block[0][x + y*src_stride];
2805 v <<= 8 - LOG2_OBMC_MAX;
2807 v += 1<<(7 - FRAC_BITS);
2808 v >>= 8 - FRAC_BITS;
2811 v += dst[x + y*dst_stride];
2812 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2813 if(v&(~255)) v= ~(v>>31);
2814 dst8[x + y*src_stride] = v;
2816 dst[x + y*dst_stride] -= v;
2823 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2824 Plane *p= &s->plane[plane_index];
2825 const int mb_w= s->b_width << s->block_max_depth;
2826 const int mb_h= s->b_height << s->block_max_depth;
2828 int block_size = MB_SIZE >> s->block_max_depth;
2829 int block_w = plane_index ? block_size/2 : block_size;
2830 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2831 int obmc_stride= plane_index ? block_size : 2*block_size;
2832 int ref_stride= s->current_picture.linesize[plane_index];
2833 uint8_t *ref = s->last_picture.data[plane_index];
2834 uint8_t *dst8= s->current_picture.data[plane_index];
2839 if(s->keyframe || (s->avctx->debug&512)){
2844 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2846 // DWTELEM * line = slice_buffer_get_line(sb, y);
2847 DWTELEM * line = sb->line[y];
2850 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2851 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2853 if(v&(~255)) v= ~(v>>31);
2854 dst8[x + y*ref_stride]= v;
2858 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2860 // DWTELEM * line = slice_buffer_get_line(sb, y);
2861 DWTELEM * line = sb->line[y];
2864 line[x] -= 128 << FRAC_BITS;
2865 // buf[x + y*w]-= 128<<FRAC_BITS;
2873 for(mb_x=0; mb_x<=mb_w; mb_x++){
2876 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2877 block_w*mb_x - block_w/2,
2878 block_w*mb_y - block_w/2,
2881 w, ref_stride, obmc_stride,
2885 STOP_TIMER("add_yblock")
2888 STOP_TIMER("predict_slice")
2891 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2892 Plane *p= &s->plane[plane_index];
2893 const int mb_w= s->b_width << s->block_max_depth;
2894 const int mb_h= s->b_height << s->block_max_depth;
2896 int block_size = MB_SIZE >> s->block_max_depth;
2897 int block_w = plane_index ? block_size/2 : block_size;
2898 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2899 const int obmc_stride= plane_index ? block_size : 2*block_size;
2900 int ref_stride= s->current_picture.linesize[plane_index];
2901 uint8_t *ref = s->last_picture.data[plane_index];
2902 uint8_t *dst8= s->current_picture.data[plane_index];
2907 if(s->keyframe || (s->avctx->debug&512)){
2912 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2914 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2916 if(v&(~255)) v= ~(v>>31);
2917 dst8[x + y*ref_stride]= v;
2921 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2923 buf[x + y*w]-= 128<<FRAC_BITS;
2931 for(mb_x=0; mb_x<=mb_w; mb_x++){
2934 add_yblock(s, buf, dst8, ref, obmc,
2935 block_w*mb_x - block_w/2,
2936 block_w*mb_y - block_w/2,
2939 w, ref_stride, obmc_stride,
2943 STOP_TIMER("add_yblock")
2946 STOP_TIMER("predict_slice")
2949 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2950 const int mb_h= s->b_height << s->block_max_depth;
2952 for(mb_y=0; mb_y<=mb_h; mb_y++)
2953 predict_slice(s, buf, plane_index, add, mb_y);
2956 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2958 Plane *p= &s->plane[plane_index];
2959 const int block_size = MB_SIZE >> s->block_max_depth;
2960 const int block_w = plane_index ? block_size/2 : block_size;
2961 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2962 const int obmc_stride= plane_index ? block_size : 2*block_size;
2963 const int ref_stride= s->current_picture.linesize[plane_index];
2964 uint8_t *ref= s-> last_picture.data[plane_index];
2965 uint8_t *dst= s->current_picture.data[plane_index];
2966 uint8_t *src= s-> input_picture.data[plane_index];
2967 const static DWTELEM zero_dst[4096]; //FIXME
2968 const int b_stride = s->b_width << s->block_max_depth;
2969 const int w= p->width;
2970 const int h= p->height;
2971 int index= mb_x + mb_y*b_stride;
2972 BlockNode *b= &s->block[index];
2973 BlockNode backup= *b;
2977 b->type|= BLOCK_INTRA;
2978 b->color[plane_index]= 0;
2981 int mb_x2= mb_x + (i &1) - 1;
2982 int mb_y2= mb_y + (i>>1) - 1;
2983 int x= block_w*mb_x2 + block_w/2;
2984 int y= block_w*mb_y2 + block_w/2;
2986 add_yblock(s, zero_dst, dst, ref, obmc,
2987 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
2989 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2990 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2991 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2992 int obmc_v= obmc[index];
2993 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2994 if(x<0) obmc_v += obmc[index + block_w];
2995 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2996 if(x+block_w>w) obmc_v += obmc[index - block_w];
2997 //FIXME precalc this or simplify it somehow else
2999 ab += (src[x2 + y2*ref_stride] - dst[x2 + y2*ref_stride]) * obmc_v;
3000 aa += obmc_v * obmc_v; //FIXME precalclate this
3006 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3009 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3011 Plane *p= &s->plane[plane_index];
3012 const int block_size = MB_SIZE >> s->block_max_depth;
3013 const int block_w = plane_index ? block_size/2 : block_size;
3014 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3015 const int obmc_stride= plane_index ? block_size : 2*block_size;
3016 const int ref_stride= s->current_picture.linesize[plane_index];
3017 uint8_t *ref= s-> last_picture.data[plane_index];
3018 uint8_t *dst= s->current_picture.data[plane_index];
3019 uint8_t *src= s-> input_picture.data[plane_index];
3020 const static DWTELEM zero_dst[4096]; //FIXME
3021 const int b_stride = s->b_width << s->block_max_depth;
3022 const int b_height = s->b_height<< s->block_max_depth;
3023 const int w= p->width;
3024 const int h= p->height;
3027 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3030 int mb_x2= mb_x + (i &1) - 1;
3031 int mb_y2= mb_y + (i>>1) - 1;
3032 int x= block_w*mb_x2 + block_w/2;
3033 int y= block_w*mb_y2 + block_w/2;
3035 add_yblock(s, zero_dst, dst, ref, obmc,
3036 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, plane_index);
3038 //FIXME find a cleaner/simpler way to skip the outside stuff
3039 for(y2= y; y2<0; y2++)
3040 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3041 for(y2= h; y2<y+block_w; y2++)
3042 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3044 for(y2= y; y2<y+block_w; y2++)
3045 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3048 for(y2= y; y2<y+block_w; y2++)
3049 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3052 assert(block_w== 8 || block_w==16);
3053 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3062 int x= mb_x + (i&1) - (i>>1);
3063 int y= mb_y + (i>>1);
3064 int index= x + y*b_stride;
3065 BlockNode *b = &s->block[index];
3066 BlockNode *left = x ? &s->block[index-1] : &null_block;
3067 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3068 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3069 BlockNode *tr = y && x+1<b_stride ? &s->block[index-b_stride+1] : tl;
3070 int dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3071 int dmy= b->my - mid_pred(left->my, top->my, tr->my);
3072 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3073 // int my_context= av_log2(2*ABS(left->my - top->my));
3075 if(x<0 || x>=b_stride || y>=b_height)
3082 00001XXXX 15-30 8-15
3084 //FIXME try accurate rate
3085 //FIXME intra and inter predictors if surrounding blocks arent the same type
3086 if(b->type & BLOCK_INTRA){
3087 rate += 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3088 + av_log2(2*ABS(left->color[1] - b->color[1]))
3089 + av_log2(2*ABS(left->color[2] - b->color[2])));
3091 rate += 2*(1 + av_log2(2*ABS(dmx))
3092 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3096 return distortion + rate*penalty_factor;
3099 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, int *best_rd){
3100 const int b_stride= s->b_width << s->block_max_depth;
3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102 BlockNode backup= *block;
3103 int rd, index, value;
3105 assert(mb_x>=0 && mb_y>=0);
3106 assert(mb_x<b_stride);
3109 block->color[0] = p[0];
3110 block->color[1] = p[1];
3111 block->color[2] = p[2];
3112 block->type |= BLOCK_INTRA;
3114 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3115 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3116 if(s->me_cache[index] == value)
3118 s->me_cache[index]= value;
3122 block->type &= ~BLOCK_INTRA;
3125 rd= get_block_rd(s, mb_x, mb_y, 0);
3137 static void iterative_me(SnowContext *s){
3138 int pass, mb_x, mb_y;
3139 const int b_width = s->b_width << s->block_max_depth;
3140 const int b_height= s->b_height << s->block_max_depth;
3141 const int b_stride= b_width;
3144 for(pass=0; pass<50; pass++){
3147 for(mb_y= 0; mb_y<b_height; mb_y++){
3148 for(mb_x= 0; mb_x<b_width; mb_x++){
3149 int dia_change, i, j;
3150 int best_rd= INT_MAX;
3152 const int index= mb_x + mb_y * b_stride;
3153 BlockNode *block= &s->block[index];
3154 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3155 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3156 BlockNode *rb = mb_x<b_width ? &s->block[index +1] : &null_block;
3157 BlockNode *bb = mb_y<b_height ? &s->block[index+b_stride ] : &null_block;
3158 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3159 BlockNode *trb= mb_x<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3160 BlockNode *blb= mb_x && mb_y<b_height ? &s->block[index+b_stride-1] : &null_block;
3161 BlockNode *brb= mb_x<b_width && mb_y<b_height ? &s->block[index+b_stride+1] : &null_block;
3163 if(pass && (block->type & BLOCK_OPT))
3165 block->type |= BLOCK_OPT;
3169 if(!s->me_cache_generation)
3170 memset(s->me_cache, 0, sizeof(s->me_cache));
3171 s->me_cache_generation += 1<<22;
3173 // get previous score (cant be cached due to OBMC)
3174 check_block(s, mb_x, mb_y, (int[2]){block->mx, block->my}, 0, &best_rd);
3175 check_block(s, mb_x, mb_y, (int[2]){0, 0}, 0, &best_rd);
3176 check_block(s, mb_x, mb_y, (int[2]){tb->mx, tb->my}, 0, &best_rd);
3177 check_block(s, mb_x, mb_y, (int[2]){lb->mx, lb->my}, 0, &best_rd);
3178 check_block(s, mb_x, mb_y, (int[2]){rb->mx, rb->my}, 0, &best_rd);
3179 check_block(s, mb_x, mb_y, (int[2]){bb->mx, bb->my}, 0, &best_rd);
3182 //FIXME avoid subpel interpol / round to nearest integer
3185 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3187 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my+(4*j)}, 0, &best_rd);
3188 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my-(4*j)}, 0, &best_rd);
3189 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+4*(i-j), block->my-(4*j)}, 0, &best_rd);
3190 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx-4*(i-j), block->my+(4*j)}, 0, &best_rd);
3196 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3199 dia_change |= check_block(s, mb_x, mb_y, (int[2]){block->mx+square[i][0], block->my+square[i][1]}, 0, &best_rd);
3201 //FIXME or try the standard 2 pass qpel or similar
3204 color[i]= get_dc(s, mb_x, mb_y, i);
3206 check_block(s, mb_x, mb_y, color, 1, &best_rd);
3207 //FIXME RD style color selection
3209 if(!same_block(block, &backup)){
3210 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3211 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3212 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3213 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3214 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3215 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3216 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3217 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3222 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3228 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3229 const int level= b->level;
3230 const int w= b->width;
3231 const int h= b->height;
3232 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3233 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3234 int x,y, thres1, thres2;
3237 if(s->qlog == LOSSLESS_QLOG) return;
3239 bias= bias ? 0 : (3*qmul)>>3;
3240 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3246 int i= src[x + y*stride];
3248 if((unsigned)(i+thres1) > thres2){
3251 i/= qmul; //FIXME optimize
3252 src[x + y*stride]= i;
3256 i/= qmul; //FIXME optimize
3257 src[x + y*stride]= -i;
3260 src[x + y*stride]= 0;
3266 int i= src[x + y*stride];
3268 if((unsigned)(i+thres1) > thres2){
3271 i= (i + bias) / qmul; //FIXME optimize
3272 src[x + y*stride]= i;
3276 i= (i + bias) / qmul; //FIXME optimize
3277 src[x + y*stride]= -i;
3280 src[x + y*stride]= 0;
3284 if(level+1 == s->spatial_decomposition_count){
3285 // STOP_TIMER("quantize")
3289 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3290 const int w= b->width;
3291 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3292 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3293 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3297 if(s->qlog == LOSSLESS_QLOG) return;
3299 for(y=start_y; y<end_y; y++){
3300 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3301 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3305 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3307 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3311 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3312 STOP_TIMER("dquant")
3316 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3317 const int w= b->width;
3318 const int h= b->height;
3319 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3320 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3321 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3325 if(s->qlog == LOSSLESS_QLOG) return;
3329 int i= src[x + y*stride];
3331 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3333 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3337 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3338 STOP_TIMER("dquant")
3342 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3343 const int w= b->width;
3344 const int h= b->height;
3347 for(y=h-1; y>=0; y--){
3348 for(x=w-1; x>=0; x--){
3349 int i= x + y*stride;
3353 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3354 else src[i] -= src[i - 1];
3356 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3357 else src[i] -= src[i - 1];
3360 if(y) src[i] -= src[i - stride];
3366 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3367 const int w= b->width;
3376 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3378 for(y=start_y; y<end_y; y++){
3380 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3381 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3385 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3386 else line[x] += line[x - 1];
3388 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3389 else line[x] += line[x - 1];
3392 if(y) line[x] += prev[x];
3397 // STOP_TIMER("correlate")
3400 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3401 const int w= b->width;
3402 const int h= b->height;
3407 int i= x + y*stride;
3411 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3412 else src[i] += src[i - 1];
3414 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3415 else src[i] += src[i - 1];
3418 if(y) src[i] += src[i - stride];
3424 static void encode_header(SnowContext *s){
3425 int plane_index, level, orientation;
3428 memset(kstate, MID_STATE, sizeof(kstate));
3430 put_rac(&s->c, kstate, s->keyframe);
3431 if(s->keyframe || s->always_reset)
3434 put_symbol(&s->c, s->header_state, s->version, 0);
3435 put_rac(&s->c, s->header_state, s->always_reset);
3436 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3437 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3438 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3439 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3440 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3441 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3442 put_rac(&s->c, s->header_state, s->spatial_scalability);
3443 // put_rac(&s->c, s->header_state, s->rate_scalability);
3445 for(plane_index=0; plane_index<2; plane_index++){
3446 for(level=0; level<s->spatial_decomposition_count; level++){
3447 for(orientation=level ? 1:0; orientation<4; orientation++){
3448 if(orientation==2) continue;
3449 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3454 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3455 put_symbol(&s->c, s->header_state, s->qlog, 1);
3456 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3457 put_symbol(&s->c, s->header_state, s->qbias, 1);
3458 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3461 static int decode_header(SnowContext *s){
3462 int plane_index, level, orientation;
3465 memset(kstate, MID_STATE, sizeof(kstate));
3467 s->keyframe= get_rac(&s->c, kstate);
3468 if(s->keyframe || s->always_reset)
3471 s->version= get_symbol(&s->c, s->header_state, 0);
3473 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3476 s->always_reset= get_rac(&s->c, s->header_state);
3477 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3478 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3479 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3480 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3481 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3482 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3483 s->spatial_scalability= get_rac(&s->c, s->header_state);
3484 // s->rate_scalability= get_rac(&s->c, s->header_state);
3486 for(plane_index=0; plane_index<3; plane_index++){
3487 for(level=0; level<s->spatial_decomposition_count; level++){
3488 for(orientation=level ? 1:0; orientation<4; orientation++){
3490 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3491 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3492 else q= get_symbol(&s->c, s->header_state, 1);
3493 s->plane[plane_index].band[level][orientation].qlog= q;
3499 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3500 if(s->spatial_decomposition_type > 2){
3501 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3505 s->qlog= get_symbol(&s->c, s->header_state, 1);
3506 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3507 s->qbias= get_symbol(&s->c, s->header_state, 1);
3508 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3509 if(s->block_max_depth > 1){
3510 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3511 s->block_max_depth= 0;
3518 static void init_qexp(){
3522 for(i=0; i<QROOT; i++){
3524 v *= pow(2, 1.0 / QROOT);
3528 static int common_init(AVCodecContext *avctx){
3529 SnowContext *s = avctx->priv_data;
3531 int level, orientation, plane_index, dec;
3535 dsputil_init(&s->dsp, avctx);
3538 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3539 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3540 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3541 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3542 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3543 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3562 #define mcfh(dx,dy)\
3563 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3564 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3565 mc_block_hpel ## dx ## dy ## 16;\
3566 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3567 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3568 mc_block_hpel ## dx ## dy ## 8;
3578 dec= s->spatial_decomposition_count= 5;
3579 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3581 s->chroma_h_shift= 1; //FIXME XXX
3582 s->chroma_v_shift= 1;
3584 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3586 width= s->avctx->width;
3587 height= s->avctx->height;
3589 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3591 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3592 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3594 for(plane_index=0; plane_index<3; plane_index++){
3595 int w= s->avctx->width;
3596 int h= s->avctx->height;
3599 w>>= s->chroma_h_shift;
3600 h>>= s->chroma_v_shift;
3602 s->plane[plane_index].width = w;
3603 s->plane[plane_index].height= h;
3604 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3605 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3606 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3607 SubBand *b= &s->plane[plane_index].band[level][orientation];
3609 b->buf= s->spatial_dwt_buffer;
3611 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3612 b->width = (w + !(orientation&1))>>1;
3613 b->height= (h + !(orientation>1))>>1;
3615 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3616 b->buf_x_offset = 0;
3617 b->buf_y_offset = 0;
3621 b->buf_x_offset = (w+1)>>1;
3624 b->buf += b->stride>>1;
3625 b->buf_y_offset = b->stride_line >> 1;
3629 b->parent= &s->plane[plane_index].band[level-1][orientation];
3630 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3639 width= s->width= avctx->width;
3640 height= s->height= avctx->height;
3642 assert(width && height);
3644 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3650 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3651 int width = p->width;
3652 int height= p->height;
3653 int level, orientation, x, y;
3655 for(level=0; level<s->spatial_decomposition_count; level++){
3656 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3657 SubBand *b= &p->band[level][orientation];
3658 DWTELEM *buf= b->buf;
3661 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3662 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3663 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3664 for(y=0; y<height; y++){
3665 for(x=0; x<width; x++){
3666 int64_t d= s->spatial_dwt_buffer[x + y*width];
3671 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3672 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3677 static int encode_init(AVCodecContext *avctx)
3679 SnowContext *s = avctx->priv_data;
3682 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3683 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3684 "use vstrict=-2 / -strict -2 to use it anyway\n");
3694 s->m.flags = avctx->flags;
3695 s->m.bit_rate= avctx->bit_rate;
3697 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3698 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3699 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3700 h263_encode_init(&s->m); //mv_penalty
3702 if(avctx->flags&CODEC_FLAG_PASS1){
3703 if(!avctx->stats_out)
3704 avctx->stats_out = av_mallocz(256);
3706 if(avctx->flags&CODEC_FLAG_PASS2){
3707 if(ff_rate_control_init(&s->m) < 0)
3711 for(plane_index=0; plane_index<3; plane_index++){
3712 calculate_vissual_weight(s, &s->plane[plane_index]);
3716 avctx->coded_frame= &s->current_picture;
3717 switch(avctx->pix_fmt){
3718 // case PIX_FMT_YUV444P:
3719 // case PIX_FMT_YUV422P:
3720 case PIX_FMT_YUV420P:
3722 // case PIX_FMT_YUV411P:
3723 // case PIX_FMT_YUV410P:
3724 s->colorspace_type= 0;
3726 /* case PIX_FMT_RGBA32:
3730 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3733 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3734 s->chroma_h_shift= 1;
3735 s->chroma_v_shift= 1;
3737 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3738 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3740 s->avctx->get_buffer(s->avctx, &s->input_picture);
3745 static int frame_start(SnowContext *s){
3747 int w= s->avctx->width; //FIXME round up to x16 ?
3748 int h= s->avctx->height;
3750 if(s->current_picture.data[0]){
3751 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3752 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3753 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3756 tmp= s->last_picture;
3757 s->last_picture= s->current_picture;
3758 s->current_picture= tmp;
3760 s->current_picture.reference= 1;
3761 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3762 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3769 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3770 SnowContext *s = avctx->priv_data;
3771 RangeCoder * const c= &s->c;
3772 AVFrame *pict = data;
3773 const int width= s->avctx->width;
3774 const int height= s->avctx->height;
3775 int level, orientation, plane_index, i, y;
3777 ff_init_range_encoder(c, buf, buf_size);
3778 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3782 for(y=0; y<(height>>shift); y++)
3783 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3784 &pict->data[i][y * pict->linesize[i]],
3787 s->new_picture = *pict;
3789 if(avctx->flags&CODEC_FLAG_PASS2){
3791 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3792 s->keyframe= pict->pict_type==FF_I_TYPE;
3793 s->m.picture_number= avctx->frame_number;
3794 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3796 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3797 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3801 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
3803 s->qlog += 61*QROOT/8;
3805 s->qlog= LOSSLESS_QLOG;
3809 s->current_picture.key_frame= s->keyframe;
3811 s->m.current_picture_ptr= &s->m.current_picture;
3812 if(pict->pict_type == P_TYPE){
3813 int block_width = (width +15)>>4;
3814 int block_height= (height+15)>>4;
3815 int stride= s->current_picture.linesize[0];
3817 assert(s->current_picture.data[0]);
3818 assert(s->last_picture.data[0]);
3820 s->m.avctx= s->avctx;
3821 s->m.current_picture.data[0]= s->current_picture.data[0];
3822 s->m. last_picture.data[0]= s-> last_picture.data[0];
3823 s->m. new_picture.data[0]= s-> input_picture.data[0];
3824 s->m. last_picture_ptr= &s->m. last_picture;
3826 s->m. last_picture.linesize[0]=
3827 s->m. new_picture.linesize[0]=
3828 s->m.current_picture.linesize[0]= stride;
3829 s->m.uvlinesize= s->current_picture.linesize[1];
3831 s->m.height= height;
3832 s->m.mb_width = block_width;
3833 s->m.mb_height= block_height;
3834 s->m.mb_stride= s->m.mb_width+1;
3835 s->m.b8_stride= 2*s->m.mb_width+1;
3837 s->m.pict_type= pict->pict_type;
3838 s->m.me_method= s->avctx->me_method;
3839 s->m.me.scene_change_score=0;
3840 s->m.flags= s->avctx->flags;
3841 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
3842 s->m.out_format= FMT_H263;
3843 s->m.unrestricted_mv= 1;
3845 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
3846 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
3847 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
3849 s->m.dsp= s->dsp; //move
3856 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
3859 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
3861 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
3863 for(plane_index=0; plane_index<3; plane_index++){
3864 Plane *p= &s->plane[plane_index];
3868 // int bits= put_bits_count(&s->c.pb);
3871 if(pict->data[plane_index]) //FIXME gray hack
3874 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
3877 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
3880 && pict->pict_type == P_TYPE
3881 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
3882 ff_init_range_encoder(c, buf, buf_size);
3883 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3884 pict->pict_type= FF_I_TYPE;
3890 if(s->qlog == LOSSLESS_QLOG){
3893 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
3898 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3900 for(level=0; level<s->spatial_decomposition_count; level++){
3901 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3902 SubBand *b= &p->band[level][orientation];
3904 quantize(s, b, b->buf, b->stride, s->qbias);
3906 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
3907 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
3908 assert(b->parent==NULL || b->parent->stride == b->stride*2);
3910 correlate(s, b, b->buf, b->stride, 1, 0);
3913 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
3915 for(level=0; level<s->spatial_decomposition_count; level++){
3916 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3917 SubBand *b= &p->band[level][orientation];
3919 dequantize(s, b, b->buf, b->stride);
3923 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
3924 if(s->qlog == LOSSLESS_QLOG){
3927 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
3932 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
3933 STOP_TIMER("pred-conv")}
3934 if(s->avctx->flags&CODEC_FLAG_PSNR){
3937 if(pict->data[plane_index]) //FIXME gray hack
3940 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
3944 s->avctx->error[plane_index] += error;
3945 s->current_picture.error[plane_index] = error;
3949 if(s->last_picture.data[0])
3950 avctx->release_buffer(avctx, &s->last_picture);
3952 s->current_picture.coded_picture_number = avctx->frame_number;
3953 s->current_picture.pict_type = pict->pict_type;
3954 s->current_picture.quality = pict->quality;
3955 if(avctx->flags&CODEC_FLAG_PASS1){
3956 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
3957 s->m.current_picture.display_picture_number =
3958 s->m.current_picture.coded_picture_number = avctx->frame_number;
3959 s->m.pict_type = pict->pict_type;
3960 s->m.current_picture.quality = pict->quality;
3961 ff_write_pass1_stats(&s->m);
3963 if(avctx->flags&CODEC_FLAG_PASS2){
3964 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
3969 return ff_rac_terminate(c);
3972 static void common_end(SnowContext *s){
3973 int plane_index, level, orientation;
3975 av_freep(&s->spatial_dwt_buffer);
3977 av_freep(&s->m.me.scratchpad);
3978 av_freep(&s->m.me.map);
3979 av_freep(&s->m.me.score_map);
3981 av_freep(&s->block);
3983 for(plane_index=0; plane_index<3; plane_index++){
3984 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3985 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3986 SubBand *b= &s->plane[plane_index].band[level][orientation];
3988 av_freep(&b->x_coeff);
3994 static int encode_end(AVCodecContext *avctx)
3996 SnowContext *s = avctx->priv_data;
3999 av_free(avctx->stats_out);
4004 static int decode_init(AVCodecContext *avctx)
4006 SnowContext *s = avctx->priv_data;
4009 avctx->pix_fmt= PIX_FMT_YUV420P;
4013 block_size = MB_SIZE >> s->block_max_depth;
4014 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4019 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4020 SnowContext *s = avctx->priv_data;
4021 RangeCoder * const c= &s->c;
4023 AVFrame *picture = data;
4024 int level, orientation, plane_index;
4026 ff_init_range_decoder(c, buf, buf_size);
4027 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4029 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4031 if(!s->block) alloc_blocks(s);
4034 //keyframe flag dupliaction mess FIXME
4035 if(avctx->debug&FF_DEBUG_PICT_INFO)
4036 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4040 for(plane_index=0; plane_index<3; plane_index++){
4041 Plane *p= &s->plane[plane_index];
4045 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4047 if(s->avctx->debug&2048){
4048 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4049 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4053 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4054 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4060 for(level=0; level<s->spatial_decomposition_count; level++){
4061 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4062 SubBand *b= &p->band[level][orientation];
4063 unpack_coeffs(s, b, b->parent, orientation);
4066 STOP_TIMER("unpack coeffs");
4070 const int mb_h= s->b_height << s->block_max_depth;
4071 const int block_size = MB_SIZE >> s->block_max_depth;
4072 const int block_w = plane_index ? block_size/2 : block_size;
4074 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4079 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4080 for(mb_y=0; mb_y<=mb_h; mb_y++){
4082 int slice_starty = block_w*mb_y;
4083 int slice_h = block_w*(mb_y+1);
4084 if (!(s->keyframe || s->avctx->debug&512)){
4085 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4086 slice_h -= (block_w >> 1);
4091 for(level=0; level<s->spatial_decomposition_count; level++){
4092 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4093 SubBand *b= &p->band[level][orientation];
4096 int our_mb_start = mb_y;
4097 int our_mb_end = (mb_y + 1);
4098 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
4099 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
4100 if (!(s->keyframe || s->avctx->debug&512)){
4101 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4102 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4104 start_y = FFMIN(b->height, start_y);
4105 end_y = FFMIN(b->height, end_y);
4107 if (start_y != end_y){
4108 if (orientation == 0){
4109 SubBand * correlate_band = &p->band[0][0];
4110 int correlate_end_y = FFMIN(b->height, end_y + 1);
4111 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4112 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4113 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4114 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4117 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4121 STOP_TIMER("decode_subband_slice");
4125 for(; yd<slice_h; yd+=4){
4126 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4128 STOP_TIMER("idwt slice");}
4131 if(s->qlog == LOSSLESS_QLOG){
4132 for(; yq<slice_h && yq<h; yq++){
4133 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4135 line[x] <<= FRAC_BITS;
4140 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4142 y = FFMIN(p->height, slice_starty);
4143 end_y = FFMIN(p->height, slice_h);
4145 slice_buffer_release(&s->sb, y++);
4148 slice_buffer_flush(&s->sb);
4150 STOP_TIMER("idwt + predict_slices")}
4155 if(s->last_picture.data[0])
4156 avctx->release_buffer(avctx, &s->last_picture);
4158 if(!(s->avctx->debug&2048))
4159 *picture= s->current_picture;
4161 *picture= s->mconly_picture;
4163 *data_size = sizeof(AVFrame);
4165 bytes_read= c->bytestream - c->bytestream_start;
4166 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4171 static int decode_end(AVCodecContext *avctx)
4173 SnowContext *s = avctx->priv_data;
4175 slice_buffer_destroy(&s->sb);
4182 AVCodec snow_decoder = {
4186 sizeof(SnowContext),
4191 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4195 #ifdef CONFIG_ENCODERS
4196 AVCodec snow_encoder = {
4200 sizeof(SnowContext),
4216 int buffer[2][width*height];
4219 s.spatial_decomposition_count=6;
4220 s.spatial_decomposition_type=1;
4222 printf("testing 5/3 DWT\n");
4223 for(i=0; i<width*height; i++)
4224 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4226 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4227 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4229 for(i=0; i<width*height; i++)
4230 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4232 printf("testing 9/7 DWT\n");
4233 s.spatial_decomposition_type=0;
4234 for(i=0; i<width*height; i++)
4235 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4237 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4238 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4240 for(i=0; i<width*height; i++)
4241 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4244 printf("testing AC coder\n");
4245 memset(s.header_state, 0, sizeof(s.header_state));
4246 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4247 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4249 for(i=-256; i<256; i++){
4251 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4252 STOP_TIMER("put_symbol")
4254 ff_rac_terminate(&s.c);
4256 memset(s.header_state, 0, sizeof(s.header_state));
4257 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4258 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4260 for(i=-256; i<256; i++){
4263 j= get_symbol(&s.c, s.header_state, 1);
4264 STOP_TIMER("get_symbol")
4265 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4269 int level, orientation, x, y;
4270 int64_t errors[8][4];
4273 memset(errors, 0, sizeof(errors));
4274 s.spatial_decomposition_count=3;
4275 s.spatial_decomposition_type=0;
4276 for(level=0; level<s.spatial_decomposition_count; level++){
4277 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4278 int w= width >> (s.spatial_decomposition_count-level);
4279 int h= height >> (s.spatial_decomposition_count-level);
4280 int stride= width << (s.spatial_decomposition_count-level);
4281 DWTELEM *buf= buffer[0];
4284 if(orientation&1) buf+=w;
4285 if(orientation>1) buf+=stride>>1;
4287 memset(buffer[0], 0, sizeof(int)*width*height);
4288 buf[w/2 + h/2*stride]= 256*256;
4289 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4290 for(y=0; y<height; y++){
4291 for(x=0; x<width; x++){
4292 int64_t d= buffer[0][x + y*width];
4294 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4296 if(ABS(height/2-y)<9 && level==2) printf("\n");
4298 error= (int)(sqrt(error)+0.5);
4299 errors[level][orientation]= error;
4300 if(g) g=ff_gcd(g, error);
4304 printf("static int const visual_weight[][4]={\n");
4305 for(level=0; level<s.spatial_decomposition_count; level++){
4307 for(orientation=0; orientation<4; orientation++){
4308 printf("%8lld,", errors[level][orientation]/g);
4316 int w= width >> (s.spatial_decomposition_count-level);
4317 int h= height >> (s.spatial_decomposition_count-level);
4318 int stride= width << (s.spatial_decomposition_count-level);
4319 DWTELEM *buf= buffer[0];
4325 memset(buffer[0], 0, sizeof(int)*width*height);
4327 for(y=0; y<height; y++){
4328 for(x=0; x<width; x++){
4329 int tab[4]={0,2,3,1};
4330 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4333 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4337 buf[x + y*stride ]=169;
4338 buf[x + y*stride-w]=64;
4341 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4343 for(y=0; y<height; y++){
4344 for(x=0; x<width; x++){
4345 int64_t d= buffer[0][x + y*width];
4347 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4349 if(ABS(height/2-y)<9) printf("\n");