2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 #define MAX_DECOMPOSITIONS 8
35 #define QROOT (1<<QSHIFT)
36 #define LOSSLESS_QLOG -128
39 static const int8_t quant3[256]={
40 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
43 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
44 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
45 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
46 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
47 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
50 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
51 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
52 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
53 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
54 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
55 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
57 static const int8_t quant3b[256]={
58 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
61 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
62 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
63 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
70 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
71 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
72 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
73 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
75 static const int8_t quant3bA[256]={
76 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
88 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
89 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
90 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
91 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
93 static const int8_t quant5[256]={
94 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
97 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
98 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
99 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
100 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
101 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
104 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
105 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
106 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
107 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
108 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
109 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
111 static const int8_t quant7[256]={
112 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
113 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
114 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
115 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
116 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
117 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
118 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
119 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
120 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
121 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
122 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
123 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
124 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
125 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
126 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
127 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
129 static const int8_t quant9[256]={
130 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
131 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
136 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
139 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
140 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
141 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
142 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
143 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
144 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
145 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
147 static const int8_t quant11[256]={
148 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
149 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
150 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
151 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
152 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
153 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
154 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
155 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
156 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
157 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
158 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
159 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
160 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
161 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
162 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
163 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
165 static const int8_t quant13[256]={
166 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
167 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
168 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
169 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
170 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
171 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
172 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
173 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
174 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
175 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
176 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
177 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
178 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
179 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
180 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
181 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
184 #define LOG2_OBMC_MAX 6
185 #define OBMC_MAX (1<<(LOG2_OBMC_MAX))
187 static const uint8_t obmc32[1024]={
188 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
189 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
190 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
191 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
192 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
193 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
194 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
195 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
196 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
197 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
198 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
201 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
202 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
203 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
204 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
205 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
206 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
207 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
208 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
209 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
210 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
211 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
212 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
213 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
214 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
215 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
216 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
217 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
218 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
219 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
222 static const uint8_t obmc16[256]={
223 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
224 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
225 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
228 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
229 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
230 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
231 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
232 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
233 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
234 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
235 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
236 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
237 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
238 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
242 static const uint8_t obmc32[1024]={
243 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
244 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
245 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
246 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
247 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
248 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
249 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
250 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
251 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
252 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
253 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
254 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
255 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
256 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
257 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
258 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
259 2, 6,10,14,17,21,25,29,33,37,41,45,48,52,56,60,60,56,52,48,45,41,37,33,29,25,21,17,14,10, 6, 2,
260 2, 5, 9,13,16,20,24,27,31,34,38,42,45,49,53,56,56,53,49,45,42,38,34,31,27,24,20,16,13, 9, 5, 2,
261 2, 5, 8,12,15,19,22,25,29,32,35,39,42,46,49,52,52,49,46,42,39,35,32,29,25,22,19,15,12, 8, 5, 2,
262 2, 5, 8,11,14,17,20,23,27,30,33,36,39,42,45,48,48,45,42,39,36,33,30,27,23,20,17,14,11, 8, 5, 2,
263 1, 4, 7,10,13,16,19,22,24,27,30,33,36,39,42,45,45,42,39,36,33,30,27,24,22,19,16,13,10, 7, 4, 1,
264 1, 4, 7, 9,12,14,17,20,22,25,28,30,33,35,38,41,41,38,35,33,30,28,25,22,20,17,14,12, 9, 7, 4, 1,
265 1, 4, 6, 8,11,13,15,18,20,23,25,27,30,32,34,37,37,34,32,30,27,25,23,20,18,15,13,11, 8, 6, 4, 1,
266 1, 3, 5, 7,10,12,14,16,18,20,22,24,27,29,31,33,33,31,29,27,24,22,20,18,16,14,12,10, 7, 5, 3, 1,
267 1, 3, 5, 7, 8,10,12,14,16,18,20,22,23,25,27,29,29,27,25,23,22,20,18,16,14,12,10, 8, 7, 5, 3, 1,
268 1, 2, 4, 6, 7, 9,11,12,14,15,17,19,20,22,24,25,25,24,22,20,19,17,15,14,12,11, 9, 7, 6, 4, 2, 1,
269 1, 2, 3, 5, 6, 8, 9,10,12,13,14,16,17,19,20,21,21,20,19,17,16,14,13,12,10, 9, 8, 6, 5, 3, 2, 1,
270 1, 2, 3, 4, 5, 6, 7, 8,10,11,12,13,14,15,16,17,17,16,15,14,13,12,11,10, 8, 7, 6, 5, 4, 3, 2, 1,
271 0, 1, 2, 3, 4, 5, 6, 7, 7, 8, 9,10,11,12,13,14,14,13,12,11,10, 9, 8, 7, 7, 6, 5, 4, 3, 2, 1, 0,
272 0, 1, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 8, 9,10,10, 9, 8, 8, 7, 7, 6, 5, 5, 4, 3, 3, 2, 2, 1, 0,
273 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 5, 5, 5, 4, 4, 4, 3, 3, 2, 2, 2, 1, 1, 1, 0,
274 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0,
277 static const uint8_t obmc16[256]={
278 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
279 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
280 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
281 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
282 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
283 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
284 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
285 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
286 4,11,19,26,34,41,49,56,56,49,41,34,26,19,11, 4,
287 3,10,16,23,29,36,42,49,49,42,36,29,23,16,10, 3,
288 3, 8,14,19,25,30,36,41,41,36,30,25,19,14, 8, 3,
289 2, 7,11,16,20,25,29,34,34,29,25,20,16,11, 7, 2,
290 2, 5, 9,12,16,19,23,26,26,23,19,16,12, 9, 5, 2,
291 1, 4, 6, 9,11,14,16,19,19,16,14,11, 9, 6, 4, 1,
292 1, 2, 4, 5, 7, 8,10,11,11,10, 8, 7, 5, 4, 2, 1,
293 0, 1, 1, 2, 2, 3, 3, 4, 4, 3, 3, 2, 2, 1, 1, 0,
297 static const uint8_t obmc32[1024]={
298 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
300 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
301 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
302 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
303 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
304 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
305 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
306 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
307 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
308 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
311 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
312 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
313 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
314 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
315 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
316 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
317 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
318 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
319 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
320 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
321 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
322 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
323 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
324 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
325 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
326 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
327 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
328 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
329 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
332 static const uint8_t obmc16[256]={
333 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
334 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
335 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
338 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
339 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
340 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
341 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
342 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
343 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
344 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
345 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
346 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
347 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
348 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
354 static const uint8_t obmc8[64]={
355 1, 3, 5, 7, 7, 5, 3, 1,
356 3, 9,15,21,21,15, 9, 3,
357 5,15,25,35,35,25,15, 5,
358 7,21,35,49,49,35,21, 7,
359 7,21,35,49,49,35,21, 7,
360 5,15,25,35,35,25,15, 5,
361 3, 9,15,21,21,15, 9, 3,
362 1, 3, 5, 7, 7, 5, 3, 1,
367 static const uint8_t obmc4[16]={
375 static const uint8_t *obmc_tab[4]={
376 obmc32, obmc16, obmc8, obmc4
379 typedef struct BlockNode{
384 //#define TYPE_SPLIT 1
385 #define BLOCK_INTRA 1
387 //#define TYPE_NOCOLOR 4
388 uint8_t level; //FIXME merge into type?
391 static const BlockNode null_block= { //FIXME add border maybe
392 .color= {128,128,128},
399 #define LOG2_MB_SIZE 4
400 #define MB_SIZE (1<<LOG2_MB_SIZE)
402 typedef struct x_and_coeff{
407 typedef struct SubBand{
412 int qlog; ///< log(qscale)/log[2^(1/6)]
416 int stride_line; ///< Stride measured in lines, not pixels.
417 x_and_coeff * x_coeff;
418 struct SubBand *parent;
419 uint8_t state[/*7*2*/ 7 + 512][32];
422 typedef struct Plane{
425 SubBand band[MAX_DECOMPOSITIONS][4];
428 /** Used to minimize the amount of memory used in order to optimize cache performance. **/
430 DWTELEM * * line; ///< For use by idwt and predict_slices.
431 DWTELEM * * data_stack; ///< Used for internal purposes.
436 DWTELEM * base_buffer; ///< Buffer that this structure is caching.
439 typedef struct SnowContext{
440 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
442 AVCodecContext *avctx;
446 AVFrame input_picture; ///< new_picture with the internal linesizes
447 AVFrame current_picture;
448 AVFrame last_picture;
449 AVFrame mconly_picture;
450 // uint8_t q_context[16];
451 uint8_t header_state[32];
452 uint8_t block_state[128 + 32*128];
456 int spatial_decomposition_type;
457 int temporal_decomposition_type;
458 int spatial_decomposition_count;
459 int temporal_decomposition_count;
460 DWTELEM *spatial_dwt_buffer;
464 int spatial_scalability;
470 #define QBIAS_SHIFT 3
474 Plane plane[MAX_PLANES];
476 #define ME_CACHE_SIZE 1024
477 int me_cache[ME_CACHE_SIZE];
478 int me_cache_generation;
481 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
492 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
493 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
495 static void iterative_me(SnowContext *s);
497 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
501 buf->base_buffer = base_buffer;
502 buf->line_count = line_count;
503 buf->line_width = line_width;
504 buf->data_count = max_allocated_lines;
505 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
506 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
508 for (i = 0; i < max_allocated_lines; i++)
510 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
513 buf->data_stack_top = max_allocated_lines - 1;
516 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
521 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
523 assert(buf->data_stack_top >= 0);
524 // assert(!buf->line[line]);
526 return buf->line[line];
528 offset = buf->line_width * line;
529 buffer = buf->data_stack[buf->data_stack_top];
530 buf->data_stack_top--;
531 buf->line[line] = buffer;
533 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
538 static void slice_buffer_release(slice_buffer * buf, int line)
543 assert(line >= 0 && line < buf->line_count);
544 assert(buf->line[line]);
546 offset = buf->line_width * line;
547 buffer = buf->line[line];
548 buf->data_stack_top++;
549 buf->data_stack[buf->data_stack_top] = buffer;
550 buf->line[line] = NULL;
552 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
555 static void slice_buffer_flush(slice_buffer * buf)
558 for (i = 0; i < buf->line_count; i++)
562 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
563 slice_buffer_release(buf, i);
568 static void slice_buffer_destroy(slice_buffer * buf)
571 slice_buffer_flush(buf);
573 for (i = buf->data_count - 1; i >= 0; i--)
575 assert(buf->data_stack[i]);
576 av_free(buf->data_stack[i]);
578 assert(buf->data_stack);
579 av_free(buf->data_stack);
585 // Avoid a name clash on SGI IRIX
588 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
589 static uint8_t qexp[QROOT];
591 static inline int mirror(int v, int m){
592 while((unsigned)v > (unsigned)m){
599 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
604 const int e= av_log2(a);
606 const int el= FFMIN(e, 10);
607 put_rac(c, state+0, 0);
610 put_rac(c, state+1+i, 1); //1..10
613 put_rac(c, state+1+9, 1); //1..10
615 put_rac(c, state+1+FFMIN(i,9), 0);
617 for(i=e-1; i>=el; i--){
618 put_rac(c, state+22+9, (a>>i)&1); //22..31
621 put_rac(c, state+22+i, (a>>i)&1); //22..31
625 put_rac(c, state+11 + el, v < 0); //11..21
628 put_rac(c, state+0, 0);
631 put_rac(c, state+1+i, 1); //1..10
633 put_rac(c, state+1+i, 0);
635 for(i=e-1; i>=0; i--){
636 put_rac(c, state+22+i, (a>>i)&1); //22..31
640 put_rac(c, state+11 + e, v < 0); //11..21
643 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
645 put_rac(c, state+1+FFMIN(i,9), 0);
647 for(i=e-1; i>=0; i--){
648 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
652 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
656 put_rac(c, state+0, 1);
660 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
661 if(get_rac(c, state+0))
666 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
671 for(i=e-1; i>=0; i--){
672 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
675 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
682 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
684 int r= log2>=0 ? 1<<log2 : 1;
690 put_rac(c, state+4+log2, 1);
695 put_rac(c, state+4+log2, 0);
697 for(i=log2-1; i>=0; i--){
698 put_rac(c, state+31-i, (v>>i)&1);
702 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
704 int r= log2>=0 ? 1<<log2 : 1;
709 while(get_rac(c, state+4+log2)){
715 for(i=log2-1; i>=0; i--){
716 v+= get_rac(c, state+31-i)<<i;
722 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
723 const int mirror_left= !highpass;
724 const int mirror_right= (width&1) ^ highpass;
725 const int w= (width>>1) - 1 + (highpass & width);
728 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
730 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
736 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
740 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
744 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
745 const int mirror_left= !highpass;
746 const int mirror_right= (width&1) ^ highpass;
747 const int w= (width>>1) - 1 + (highpass & width);
754 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
760 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
763 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
767 int r= 3*2*ref[w*ref_step];
770 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
774 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
775 const int mirror_left= !highpass;
776 const int mirror_right= (width&1) ^ highpass;
777 const int w= (width>>1) - 1 + (highpass & width);
781 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
783 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
789 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
793 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
798 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
801 for(x=start; x<width; x+=2){
805 int x2= x + 2*i - n + 1;
807 else if(x2>=width) x2= 2*width-x2-2;
808 sum += coeffs[i]*(int64_t)dst[x2];
810 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
811 else dst[x] += (sum + (1<<shift)/2)>>shift;
815 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
817 for(y=start; y<height; y+=2){
818 for(x=0; x<width; x++){
822 int y2= y + 2*i - n + 1;
824 else if(y2>=height) y2= 2*height-y2-2;
825 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
827 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
828 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
837 #if 0 // more accurate 9/7
840 #define COEFFS1 (int[]){-25987,-25987}
843 #define COEFFS2 (int[]){-27777,-27777}
846 #define COEFFS3 (int[]){28931,28931}
849 #define COEFFS4 (int[]){14533,14533}
853 #define COEFFS1 (int[]){1,-9,-9,1}
856 #define COEFFS2 (int[]){-1,5,5,-1}
869 #define COEFFS1 (int[]){1,1}
872 #define COEFFS2 (int[]){-1,-1}
885 #define COEFFS2 (int[]){-1,-1}
888 #define COEFFS3 (int[]){-1,-1}
891 #define COEFFS4 (int[]){-5,29,29,-5}
896 #define COEFFS1 (int[]){-203,-203}
899 #define COEFFS2 (int[]){-217,-217}
902 #define COEFFS3 (int[]){113,113}
905 #define COEFFS4 (int[]){227,227}
913 #define COEFFS2 (int[]){-1,-1}
916 #define COEFFS3 (int[]){-1,-1}
919 #define COEFFS4 (int[]){3,3}
923 #define COEFFS1 (int[]){1,-9,-9,1}
926 #define COEFFS2 (int[]){1,1}
936 #define COEFFS1 (int[]){1,-9,-9,1}
939 #define COEFFS2 (int[]){-1,5,5,-1}
947 static void horizontal_decomposeX(DWTELEM *b, int width){
949 const int width2= width>>1;
950 const int w2= (width+1)>>1;
953 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
954 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
955 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
956 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
958 for(x=0; x<width2; x++){
960 temp[x+w2]= b[2*x + 1];
964 memcpy(b, temp, width*sizeof(int));
967 static void horizontal_composeX(DWTELEM *b, int width){
969 const int width2= width>>1;
971 const int w2= (width+1)>>1;
973 memcpy(temp, b, width*sizeof(int));
974 for(x=0; x<width2; x++){
976 b[2*x + 1]= temp[x+w2];
981 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
982 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
983 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
984 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
987 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
990 for(y=0; y<height; y++){
991 for(x=0; x<width; x++){
992 buffer[y*stride + x] *= SCALEX;
996 for(y=0; y<height; y++){
997 horizontal_decomposeX(buffer + y*stride, width);
1000 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1003 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1006 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1009 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1012 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1014 for(y=0; y<height; y++){
1015 horizontal_composeX(buffer + y*stride, width);
1018 for(y=0; y<height; y++){
1019 for(x=0; x<width; x++){
1020 buffer[y*stride + x] /= SCALEX;
1025 static void horizontal_decompose53i(DWTELEM *b, int width){
1026 DWTELEM temp[width];
1027 const int width2= width>>1;
1029 const int w2= (width+1)>>1;
1031 for(x=0; x<width2; x++){
1033 temp[x+w2]= b[2*x + 1];
1047 for(x=1; x+1<width2; x+=2){
1051 A2 += (A1 + A3 + 2)>>2;
1055 A1= temp[x+1+width2];
1058 A4 += (A1 + A3 + 2)>>2;
1064 A2 += (A1 + A3 + 2)>>2;
1069 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1070 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1074 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1077 for(i=0; i<width; i++){
1078 b1[i] -= (b0[i] + b2[i])>>1;
1082 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1085 for(i=0; i<width; i++){
1086 b1[i] += (b0[i] + b2[i] + 2)>>2;
1090 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1092 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1093 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1095 for(y=-2; y<height; y+=2){
1096 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1097 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1100 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1101 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1102 STOP_TIMER("horizontal_decompose53i")}
1105 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1106 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1107 STOP_TIMER("vertical_decompose53i*")}
1184 static void horizontal_decompose97i(DWTELEM *b, int width){
1185 DWTELEM temp[width];
1186 const int w2= (width+1)>>1;
1188 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1189 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1190 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1191 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1195 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1198 for(i=0; i<width; i++){
1199 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1203 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1206 for(i=0; i<width; i++){
1208 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1210 int r= 3*(b0[i] + b2[i]);
1213 b1[i] += (r+W_CO)>>W_CS;
1218 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1221 for(i=0; i<width; i++){
1223 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1225 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1230 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1233 for(i=0; i<width; i++){
1234 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1238 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1240 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1241 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1242 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1243 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1245 for(y=-4; y<height; y+=2){
1246 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1247 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1250 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1251 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1253 STOP_TIMER("horizontal_decompose97i")
1257 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1258 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1259 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1260 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1263 STOP_TIMER("vertical_decompose97i")
1273 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1276 for(level=0; level<decomposition_count; level++){
1278 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1279 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1280 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1285 static void horizontal_compose53i(DWTELEM *b, int width){
1286 DWTELEM temp[width];
1287 const int width2= width>>1;
1288 const int w2= (width+1)>>1;
1300 for(x=1; x+1<width2; x+=2){
1304 A2 += (A1 + A3 + 2)>>2;
1308 A1= temp[x+1+width2];
1311 A4 += (A1 + A3 + 2)>>2;
1317 A2 += (A1 + A3 + 2)>>2;
1321 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1322 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1324 for(x=0; x<width2; x++){
1326 b[2*x + 1]= temp[x+w2];
1332 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1335 for(i=0; i<width; i++){
1336 b1[i] += (b0[i] + b2[i])>>1;
1340 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1343 for(i=0; i<width; i++){
1344 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1348 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1349 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1350 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1354 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1355 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1356 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1360 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1363 DWTELEM *b0= cs->b0;
1364 DWTELEM *b1= cs->b1;
1365 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1366 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1369 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1370 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1371 STOP_TIMER("vertical_compose53i*")}
1374 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1375 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1376 STOP_TIMER("horizontal_compose53i")}
1383 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1385 DWTELEM *b0= cs->b0;
1386 DWTELEM *b1= cs->b1;
1387 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1388 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1391 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1392 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1393 STOP_TIMER("vertical_compose53i*")}
1396 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1397 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1398 STOP_TIMER("horizontal_compose53i")}
1405 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1407 spatial_compose53i_init(&cs, buffer, height, stride);
1408 while(cs.y <= height)
1409 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1413 static void horizontal_compose97i(DWTELEM *b, int width){
1414 DWTELEM temp[width];
1415 const int w2= (width+1)>>1;
1417 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1418 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1419 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1420 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1423 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1426 for(i=0; i<width; i++){
1427 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1431 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1434 for(i=0; i<width; i++){
1436 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1438 int r= 3*(b0[i] + b2[i]);
1441 b1[i] -= (r+W_CO)>>W_CS;
1446 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1449 for(i=0; i<width; i++){
1451 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1453 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1458 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1461 for(i=0; i<width; i++){
1462 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1466 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1469 for(i=0; i<width; i++){
1473 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1475 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1477 r= 3*(b2[i] + b4[i]);
1480 b3[i] -= (r+W_CO)>>W_CS;
1483 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1485 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1487 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1491 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1492 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1493 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1494 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1495 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1499 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1500 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1501 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1502 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1503 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1507 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1510 DWTELEM *b0= cs->b0;
1511 DWTELEM *b1= cs->b1;
1512 DWTELEM *b2= cs->b2;
1513 DWTELEM *b3= cs->b3;
1514 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1515 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1518 if(y>0 && y+4<height){
1519 vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1521 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1522 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1523 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1524 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1527 STOP_TIMER("vertical_compose97i")}}
1530 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1531 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1532 if(width>400 && y+0<(unsigned)height){
1533 STOP_TIMER("horizontal_compose97i")}}
1542 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1544 DWTELEM *b0= cs->b0;
1545 DWTELEM *b1= cs->b1;
1546 DWTELEM *b2= cs->b2;
1547 DWTELEM *b3= cs->b3;
1548 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1549 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1552 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1553 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1554 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1555 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1557 STOP_TIMER("vertical_compose97i")}}
1560 if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
1561 if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
1562 if(width>400 && b0 <= b2){
1563 STOP_TIMER("horizontal_compose97i")}}
1572 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1574 spatial_compose97i_init(&cs, buffer, height, stride);
1575 while(cs.y <= height)
1576 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1579 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1581 for(level=decomposition_count-1; level>=0; level--){
1583 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1584 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1585 /* not slicified yet */
1586 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1587 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1592 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1594 for(level=decomposition_count-1; level>=0; level--){
1596 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1597 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1598 /* not slicified yet */
1599 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1604 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1605 const int support = type==1 ? 3 : 5;
1609 for(level=decomposition_count-1; level>=0; level--){
1610 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1612 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1614 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1622 static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1623 const int support = type==1 ? 3 : 5;
1627 for(level=decomposition_count-1; level>=0; level--){
1628 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1630 case 0: spatial_compose97i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1632 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1640 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1643 for(level=decomposition_count-1; level>=0; level--)
1644 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1646 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1648 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1649 for(y=0; y<height; y+=4)
1650 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1654 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1655 const int w= b->width;
1656 const int h= b->height;
1668 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1669 v= src[x + y*stride];
1672 t= src[x + (y-1)*stride];
1674 lt= src[x - 1 + (y-1)*stride];
1677 rt= src[x + 1 + (y-1)*stride];
1681 l= src[x - 1 + y*stride];
1683 if(orientation==1) ll= src[y + (x-2)*stride];
1684 else ll= src[x - 2 + y*stride];
1690 if(px<b->parent->width && py<b->parent->height)
1691 p= parent[px + py*2*stride];
1693 if(!(/*ll|*/l|lt|t|rt|p)){
1695 runs[run_index++]= run;
1703 max_index= run_index;
1704 runs[run_index++]= run;
1706 run= runs[run_index++];
1708 put_symbol2(&s->c, b->state[30], max_index, 0);
1709 if(run_index <= max_index)
1710 put_symbol2(&s->c, b->state[1], run, 3);
1713 if(s->c.bytestream_end - s->c.bytestream < w*40){
1714 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1719 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1720 v= src[x + y*stride];
1723 t= src[x + (y-1)*stride];
1725 lt= src[x - 1 + (y-1)*stride];
1728 rt= src[x + 1 + (y-1)*stride];
1732 l= src[x - 1 + y*stride];
1734 if(orientation==1) ll= src[y + (x-2)*stride];
1735 else ll= src[x - 2 + y*stride];
1741 if(px<b->parent->width && py<b->parent->height)
1742 p= parent[px + py*2*stride];
1744 if(/*ll|*/l|lt|t|rt|p){
1745 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1747 put_rac(&s->c, &b->state[0][context], !!v);
1750 run= runs[run_index++];
1752 if(run_index <= max_index)
1753 put_symbol2(&s->c, b->state[1], run, 3);
1761 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1762 int l2= 2*ABS(l) + (l<0);
1763 int t2= 2*ABS(t) + (t<0);
1765 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1766 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1774 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1775 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1776 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1777 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1778 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1781 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1782 const int w= b->width;
1783 const int h= b->height;
1788 x_and_coeff *xc= b->x_coeff;
1789 x_and_coeff *prev_xc= NULL;
1790 x_and_coeff *prev2_xc= xc;
1791 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1792 x_and_coeff *prev_parent_xc= parent_xc;
1794 runs= get_symbol2(&s->c, b->state[30], 0);
1795 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1800 int lt=0, t=0, rt=0;
1802 if(y && prev_xc->x == 0){
1814 if(prev_xc->x == x + 1)
1820 if(x>>1 > parent_xc->x){
1823 if(x>>1 == parent_xc->x){
1824 p= parent_xc->coeff;
1827 if(/*ll|*/l|lt|t|rt|p){
1828 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1830 v=get_rac(&s->c, &b->state[0][context]);
1832 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1833 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1840 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1842 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1843 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1852 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1853 else max_run= FFMIN(run, w-x-1);
1855 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1861 (xc++)->x= w+1; //end marker
1867 while(parent_xc->x != parent->width+1)
1870 prev_parent_xc= parent_xc;
1872 parent_xc= prev_parent_xc;
1877 (xc++)->x= w+1; //end marker
1881 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1882 const int w= b->width;
1884 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1885 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1886 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1891 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1896 /* If we are on the second or later slice, restore our index. */
1898 new_index = save_state[0];
1901 for(y=start_y; y<h; y++){
1904 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1905 memset(line, 0, b->width*sizeof(DWTELEM));
1906 v = b->x_coeff[new_index].coeff;
1907 x = b->x_coeff[new_index++].x;
1910 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1911 register int u= -(v&1);
1912 line[x] = (t^u) - u;
1914 v = b->x_coeff[new_index].coeff;
1915 x = b->x_coeff[new_index++].x;
1918 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1919 STOP_TIMER("decode_subband")
1922 /* Save our variables for the next slice. */
1923 save_state[0] = new_index;
1928 static void reset_contexts(SnowContext *s){
1929 int plane_index, level, orientation;
1931 for(plane_index=0; plane_index<3; plane_index++){
1932 for(level=0; level<s->spatial_decomposition_count; level++){
1933 for(orientation=level ? 1:0; orientation<4; orientation++){
1934 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1938 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1939 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1942 static int alloc_blocks(SnowContext *s){
1943 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1944 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1949 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1953 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1954 uint8_t *bytestream= d->bytestream;
1955 uint8_t *bytestream_start= d->bytestream_start;
1957 d->bytestream= bytestream;
1958 d->bytestream_start= bytestream_start;
1961 //near copy & paste from dsputil, FIXME
1962 static int pix_sum(uint8_t * pix, int line_size, int w)
1967 for (i = 0; i < w; i++) {
1968 for (j = 0; j < w; j++) {
1972 pix += line_size - w;
1977 //near copy & paste from dsputil, FIXME
1978 static int pix_norm1(uint8_t * pix, int line_size, int w)
1981 uint32_t *sq = squareTbl + 256;
1984 for (i = 0; i < w; i++) {
1985 for (j = 0; j < w; j ++) {
1989 pix += line_size - w;
1994 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int type){
1995 const int w= s->b_width << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<rem_depth;
2010 for(j=0; j<block_w; j++){
2011 for(i=0; i<block_w; i++){
2012 s->block[index + i + j*w]= block;
2017 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
2018 const int offset[3]= {
2020 ((y*c->uvstride + x)>>1),
2021 ((y*c->uvstride + x)>>1),
2025 c->src[0][i]= src [i];
2026 c->ref[0][i]= ref [i] + offset[i];
2034 #define P_TOPRIGHT P[3]
2035 #define P_MEDIAN P[4]
2037 #define FLAG_QPEL 1 //must be 1
2039 static int encode_q_branch(SnowContext *s, int level, int x, int y){
2040 uint8_t p_buffer[1024];
2041 uint8_t i_buffer[1024];
2042 uint8_t p_state[sizeof(s->block_state)];
2043 uint8_t i_state[sizeof(s->block_state)];
2045 uint8_t *pbbak= s->c.bytestream;
2046 uint8_t *pbbak_start= s->c.bytestream_start;
2047 int score, score2, iscore, i_len, p_len, block_s, sum;
2048 const int w= s->b_width << s->block_max_depth;
2049 const int h= s->b_height << s->block_max_depth;
2050 const int rem_depth= s->block_max_depth - level;
2051 const int index= (x + y*w) << rem_depth;
2052 const int block_w= 1<<(LOG2_MB_SIZE - level);
2053 int trx= (x+1)<<rem_depth;
2054 int try= (y+1)<<rem_depth;
2055 BlockNode *left = x ? &s->block[index-1] : &null_block;
2056 BlockNode *top = y ? &s->block[index-w] : &null_block;
2057 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2058 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2059 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2060 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2061 int pl = left->color[0];
2062 int pcb= left->color[1];
2063 int pcr= left->color[2];
2064 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2065 int pmy= mid_pred(left->my, top->my, tr->my);
2068 const int stride= s->current_picture.linesize[0];
2069 const int uvstride= s->current_picture.linesize[1];
2070 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2071 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2072 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2074 int16_t last_mv[3][2];
2075 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2076 const int shift= 1+qpel;
2077 MotionEstContext *c= &s->m.me;
2078 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2079 int my_context= av_log2(2*ABS(left->my - top->my));
2080 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2082 assert(sizeof(s->block_state) >= 256);
2084 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2088 // clip predictors / edge ?
2090 P_LEFT[0]= left->mx;
2091 P_LEFT[1]= left->my;
2094 P_TOPRIGHT[0]= tr->mx;
2095 P_TOPRIGHT[1]= tr->my;
2097 last_mv[0][0]= s->block[index].mx;
2098 last_mv[0][1]= s->block[index].my;
2099 last_mv[1][0]= right->mx;
2100 last_mv[1][1]= right->my;
2101 last_mv[2][0]= bottom->mx;
2102 last_mv[2][1]= bottom->my;
2109 init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
2111 assert(s->m.me. stride == stride);
2112 assert(s->m.me.uvstride == uvstride);
2114 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2115 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2116 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2117 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2119 c->xmin = - x*block_w - 16+2;
2120 c->ymin = - y*block_w - 16+2;
2121 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2122 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2124 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2125 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2126 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2127 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2128 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2129 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2130 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2132 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2133 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2136 c->pred_x= P_LEFT[0];
2137 c->pred_y= P_LEFT[1];
2139 c->pred_x = P_MEDIAN[0];
2140 c->pred_y = P_MEDIAN[1];
2143 score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
2144 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2146 assert(mx >= c->xmin);
2147 assert(mx <= c->xmax);
2148 assert(my >= c->ymin);
2149 assert(my <= c->ymax);
2151 score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2152 score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2153 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2157 pc.bytestream_start=
2158 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2159 memcpy(p_state, s->block_state, sizeof(s->block_state));
2161 if(level!=s->block_max_depth)
2162 put_rac(&pc, &p_state[4 + s_context], 1);
2163 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2164 put_symbol(&pc, &p_state[128 + 32*mx_context], mx - pmx, 1);
2165 put_symbol(&pc, &p_state[128 + 32*my_context], my - pmy, 1);
2166 p_len= pc.bytestream - pc.bytestream_start;
2167 score += (s->lambda2*(p_len*8
2168 + (pc.outstanding_count - s->c.outstanding_count)*8
2169 + (-av_log2(pc.range) + av_log2(s->c.range))
2170 ))>>FF_LAMBDA_SHIFT;
2172 block_s= block_w*block_w;
2173 sum = pix_sum(current_data[0], stride, block_w);
2174 l= (sum + block_s/2)/block_s;
2175 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2177 block_s= block_w*block_w>>2;
2178 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2179 cb= (sum + block_s/2)/block_s;
2180 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2181 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2182 cr= (sum + block_s/2)/block_s;
2183 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2186 ic.bytestream_start=
2187 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2188 memcpy(i_state, s->block_state, sizeof(s->block_state));
2189 if(level!=s->block_max_depth)
2190 put_rac(&ic, &i_state[4 + s_context], 1);
2191 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2192 put_symbol(&ic, &i_state[32], l-pl , 1);
2193 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2194 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2195 i_len= ic.bytestream - ic.bytestream_start;
2196 iscore += (s->lambda2*(i_len*8
2197 + (ic.outstanding_count - s->c.outstanding_count)*8
2198 + (-av_log2(ic.range) + av_log2(s->c.range))
2199 ))>>FF_LAMBDA_SHIFT;
2201 // assert(score==256*256*256*64-1);
2202 assert(iscore < 255*255*256 + s->lambda2*10);
2203 assert(iscore >= 0);
2204 assert(l>=0 && l<=255);
2205 assert(pl>=0 && pl<=255);
2208 int varc= iscore >> 8;
2209 int vard= score >> 8;
2210 if (vard <= 64 || vard < varc)
2211 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2213 c->scene_change_score+= s->m.qscale;
2216 if(level!=s->block_max_depth){
2217 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2218 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2219 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2220 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2221 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2222 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2224 if(score2 < score && score2 < iscore)
2229 memcpy(pbbak, i_buffer, i_len);
2231 s->c.bytestream_start= pbbak_start;
2232 s->c.bytestream= pbbak + i_len;
2233 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, BLOCK_INTRA);
2234 memcpy(s->block_state, i_state, sizeof(s->block_state));
2237 memcpy(pbbak, p_buffer, p_len);
2239 s->c.bytestream_start= pbbak_start;
2240 s->c.bytestream= pbbak + p_len;
2241 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, 0);
2242 memcpy(s->block_state, p_state, sizeof(s->block_state));
2247 static always_inline int same_block(BlockNode *a, BlockNode *b){
2248 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2249 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2251 return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
2255 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2256 const int w= s->b_width << s->block_max_depth;
2257 const int rem_depth= s->block_max_depth - level;
2258 const int index= (x + y*w) << rem_depth;
2259 int trx= (x+1)<<rem_depth;
2260 BlockNode *b= &s->block[index];
2261 BlockNode *left = x ? &s->block[index-1] : &null_block;
2262 BlockNode *top = y ? &s->block[index-w] : &null_block;
2263 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2264 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2265 int pl = left->color[0];
2266 int pcb= left->color[1];
2267 int pcr= left->color[2];
2268 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2269 int pmy= mid_pred(left->my, top->my, tr->my);
2270 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2271 int my_context= av_log2(2*ABS(left->my - top->my));
2272 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2275 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
2279 if(level!=s->block_max_depth){
2280 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2281 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2283 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2284 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2285 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2286 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2287 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2291 if(b->type & BLOCK_INTRA){
2292 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2293 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2294 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2295 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2296 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
2298 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2299 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2300 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2301 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
2305 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2306 const int w= s->b_width << s->block_max_depth;
2307 const int rem_depth= s->block_max_depth - level;
2308 const int index= (x + y*w) << rem_depth;
2309 int trx= (x+1)<<rem_depth;
2310 BlockNode *left = x ? &s->block[index-1] : &null_block;
2311 BlockNode *top = y ? &s->block[index-w] : &null_block;
2312 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2313 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2314 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2317 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
2321 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2323 int l = left->color[0];
2324 int cb= left->color[1];
2325 int cr= left->color[2];
2326 int mx= mid_pred(left->mx, top->mx, tr->mx);
2327 int my= mid_pred(left->my, top->my, tr->my);
2328 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2329 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2331 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2334 l += get_symbol(&s->c, &s->block_state[32], 1);
2335 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2336 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2338 mx+= get_symbol(&s->c, &s->block_state[128 + 32*mx_context], 1);
2339 my+= get_symbol(&s->c, &s->block_state[128 + 32*my_context], 1);
2341 set_blocks(s, level, x, y, l, cb, cr, mx, my, type);
2343 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2344 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2345 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2346 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2350 static void encode_blocks(SnowContext *s){
2355 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2359 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2360 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2364 if(s->avctx->me_method == ME_ITER)
2365 encode_q_branch2(s, 0, x, y);
2367 encode_q_branch (s, 0, x, y);
2372 static void decode_blocks(SnowContext *s){
2379 decode_q_branch(s, 0, x, y);
2384 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2387 for(y=0; y < b_h+5; y++){
2388 for(x=0; x < b_w; x++){
2395 // int am= 9*(a1+a2) - (a0+a3);
2396 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2397 // int am= 18*(a2+a3) - 2*(a1+a4);
2398 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2399 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2401 // if(b_w==16) am= 8*(a1+a2);
2403 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2404 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2406 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2407 if(am&(~255)) am= ~(am>>31);
2411 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2412 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2413 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2414 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2419 tmp -= (b_h+5)*stride;
2421 for(y=0; y < b_h; y++){
2422 for(x=0; x < b_w; x++){
2423 int a0= tmp[x + 0*stride];
2424 int a1= tmp[x + 1*stride];
2425 int a2= tmp[x + 2*stride];
2426 int a3= tmp[x + 3*stride];
2427 int a4= tmp[x + 4*stride];
2428 int a5= tmp[x + 5*stride];
2429 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2430 // int am= 18*(a2+a3) - 2*(a1+a4);
2431 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2432 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2434 // if(b_w==16) am= 8*(a1+a2);
2436 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2437 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2439 if(am&(~255)) am= ~(am>>31);
2442 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2443 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2444 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2445 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2450 STOP_TIMER("mc_block")
2453 #define mca(dx,dy,b_w)\
2454 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2455 uint8_t tmp[stride*(b_w+5)];\
2457 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2469 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2470 if(block->type & BLOCK_INTRA){
2472 const int color = block->color[plane_index];
2473 const int color4= color*0x01010101;
2475 for(y=0; y < b_h; y++){
2476 *(uint32_t*)&dst[0 + y*stride]= color4;
2477 *(uint32_t*)&dst[4 + y*stride]= color4;
2478 *(uint32_t*)&dst[8 + y*stride]= color4;
2479 *(uint32_t*)&dst[12+ y*stride]= color4;
2480 *(uint32_t*)&dst[16+ y*stride]= color4;
2481 *(uint32_t*)&dst[20+ y*stride]= color4;
2482 *(uint32_t*)&dst[24+ y*stride]= color4;
2483 *(uint32_t*)&dst[28+ y*stride]= color4;
2486 for(y=0; y < b_h; y++){
2487 *(uint32_t*)&dst[0 + y*stride]= color4;
2488 *(uint32_t*)&dst[4 + y*stride]= color4;
2489 *(uint32_t*)&dst[8 + y*stride]= color4;
2490 *(uint32_t*)&dst[12+ y*stride]= color4;
2493 for(y=0; y < b_h; y++){
2494 *(uint32_t*)&dst[0 + y*stride]= color4;
2495 *(uint32_t*)&dst[4 + y*stride]= color4;
2498 for(y=0; y < b_h; y++){
2499 *(uint32_t*)&dst[0 + y*stride]= color4;
2502 for(y=0; y < b_h; y++){
2503 for(x=0; x < b_w; x++){
2504 dst[x + y*stride]= color;
2509 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2510 int mx= block->mx*scale;
2511 int my= block->my*scale;
2512 const int dx= mx&15;
2513 const int dy= my&15;
2514 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2517 src += sx + sy*stride;
2518 if( (unsigned)sx >= w - b_w - 4
2519 || (unsigned)sy >= h - b_h - 4){
2520 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2523 assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2524 assert(!(b_w&(b_w-1)));
2525 assert(b_w>1 && b_h>1);
2526 assert(tab_index>=0 && tab_index<4 || b_w==32);
2527 if((dx&3) || (dy&3))
2528 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2531 for(y=0; y<b_h; y+=16){
2532 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2533 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2536 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2537 else if(b_w==2*b_h){
2538 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2539 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2542 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2543 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2548 //FIXME name clenup (b_w, block_w, b_width stuff)
2549 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2550 DWTELEM * dst = NULL;
2551 const int b_width = s->b_width << s->block_max_depth;
2552 const int b_height= s->b_height << s->block_max_depth;
2553 const int b_stride= b_width;
2554 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2555 BlockNode *rt= lt+1;
2556 BlockNode *lb= lt+b_stride;
2557 BlockNode *rb= lb+1;
2559 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2560 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2567 }else if(b_x + 1 >= b_width){
2574 }else if(b_y + 1 >= b_height){
2579 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2583 }else if(src_x + b_w > w){
2587 obmc -= src_y*obmc_stride;
2590 }else if(src_y + b_h> h){
2594 if(b_w<=0 || b_h<=0) return;
2596 assert(src_stride > 2*MB_SIZE + 5);
2597 // old_dst += src_x + src_y*dst_stride;
2598 dst8+= src_x + src_y*src_stride;
2599 // src += src_x + src_y*src_stride;
2601 ptmp= tmp + 3*tmp_step;
2604 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2606 if(same_block(lt, rt)){
2611 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2614 if(same_block(lt, lb)){
2616 }else if(same_block(rt, lb)){
2621 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2624 if(same_block(lt, rb) ){
2626 }else if(same_block(rt, rb)){
2628 }else if(same_block(lb, rb)){
2632 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2635 for(y=0; y<b_h; y++){
2636 for(x=0; x<b_w; x++){
2637 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2638 if(add) dst[x + y*dst_stride] += v;
2639 else dst[x + y*dst_stride] -= v;
2642 for(y=0; y<b_h; y++){
2643 uint8_t *obmc2= obmc + (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2650 for(y=0; y<b_h; y++){
2651 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2652 for(x=0; x<b_w; x++){
2653 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2654 if(add) dst[x + y*dst_stride] += v;
2655 else dst[x + y*dst_stride] -= v;
2658 for(y=0; y<b_h; y++){
2659 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2660 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2661 for(x=0; x<b_w; x++){
2662 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2663 if(add) dst[x + y*dst_stride] += v;
2664 else dst[x + y*dst_stride] -= v;
2672 for(y=0; y<b_h; y++){
2673 //FIXME ugly missue of obmc_stride
2674 uint8_t *obmc1= obmc + y*obmc_stride;
2675 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2676 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2677 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2678 dst = slice_buffer_get_line(sb, src_y + y);
2679 for(x=0; x<b_w; x++){
2680 int v= obmc1[x] * block[3][x + y*src_stride]
2681 +obmc2[x] * block[2][x + y*src_stride]
2682 +obmc3[x] * block[1][x + y*src_stride]
2683 +obmc4[x] * block[0][x + y*src_stride];
2685 v <<= 8 - LOG2_OBMC_MAX;
2687 v += 1<<(7 - FRAC_BITS);
2688 v >>= 8 - FRAC_BITS;
2691 // v += old_dst[x + y*dst_stride];
2692 v += dst[x + src_x];
2693 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2694 if(v&(~255)) v= ~(v>>31);
2695 dst8[x + y*src_stride] = v;
2697 // old_dst[x + y*dst_stride] -= v;
2698 dst[x + src_x] -= v;
2702 STOP_TIMER("Inner add y block")
2707 //FIXME name clenup (b_w, block_w, b_width stuff)
2708 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2709 const int b_width = s->b_width << s->block_max_depth;
2710 const int b_height= s->b_height << s->block_max_depth;
2711 const int b_stride= b_width;
2712 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2713 BlockNode *rt= lt+1;
2714 BlockNode *lb= lt+b_stride;
2715 BlockNode *rb= lb+1;
2717 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2718 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2725 }else if(b_x + 1 >= b_width){
2732 }else if(b_y + 1 >= b_height){
2737 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2743 }else if(src_x + b_w > w){
2747 obmc -= src_y*obmc_stride;
2750 dst -= src_y*dst_stride;
2752 }else if(src_y + b_h> h){
2756 if(b_w<=0 || b_h<=0) return;
2758 assert(src_stride > 2*MB_SIZE + 5);
2760 dst += src_x + src_y*dst_stride;
2761 dst8+= src_x + src_y*src_stride;
2762 // src += src_x + src_y*src_stride;
2764 ptmp= tmp + 3*tmp_step;
2767 pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2769 if(same_block(lt, rt)){
2774 pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2777 if(same_block(lt, lb)){
2779 }else if(same_block(rt, lb)){
2784 pred_block(s, block[2], src, tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2787 if(same_block(lt, rb) ){
2789 }else if(same_block(rt, rb)){
2791 }else if(same_block(lb, rb)){
2795 pred_block(s, block[3], src, tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2798 for(y=0; y<b_h; y++){
2799 for(x=0; x<b_w; x++){
2800 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2801 if(add) dst[x + y*dst_stride] += v;
2802 else dst[x + y*dst_stride] -= v;
2805 for(y=0; y<b_h; y++){
2806 uint8_t *obmc2= obmc + (obmc_stride>>1);
2807 for(x=0; x<b_w; x++){
2808 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2809 if(add) dst[x + y*dst_stride] += v;
2810 else dst[x + y*dst_stride] -= v;
2813 for(y=0; y<b_h; y++){
2814 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2815 for(x=0; x<b_w; x++){
2816 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2817 if(add) dst[x + y*dst_stride] += v;
2818 else dst[x + y*dst_stride] -= v;
2821 for(y=0; y<b_h; y++){
2822 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2823 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2824 for(x=0; x<b_w; x++){
2825 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2826 if(add) dst[x + y*dst_stride] += v;
2827 else dst[x + y*dst_stride] -= v;
2831 for(y=0; y<b_h; y++){
2832 //FIXME ugly missue of obmc_stride
2833 uint8_t *obmc1= obmc + y*obmc_stride;
2834 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2835 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2836 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2837 for(x=0; x<b_w; x++){
2838 int v= obmc1[x] * block[3][x + y*src_stride]
2839 +obmc2[x] * block[2][x + y*src_stride]
2840 +obmc3[x] * block[1][x + y*src_stride]
2841 +obmc4[x] * block[0][x + y*src_stride];
2843 v <<= 8 - LOG2_OBMC_MAX;
2845 v += 1<<(7 - FRAC_BITS);
2846 v >>= 8 - FRAC_BITS;
2849 v += dst[x + y*dst_stride];
2850 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2851 if(v&(~255)) v= ~(v>>31);
2852 dst8[x + y*src_stride] = v;
2854 dst[x + y*dst_stride] -= v;
2861 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2862 Plane *p= &s->plane[plane_index];
2863 const int mb_w= s->b_width << s->block_max_depth;
2864 const int mb_h= s->b_height << s->block_max_depth;
2866 int block_size = MB_SIZE >> s->block_max_depth;
2867 int block_w = plane_index ? block_size/2 : block_size;
2868 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2869 int obmc_stride= plane_index ? block_size : 2*block_size;
2870 int ref_stride= s->current_picture.linesize[plane_index];
2871 uint8_t *ref = s->last_picture.data[plane_index];
2872 uint8_t *dst8= s->current_picture.data[plane_index];
2877 if(s->keyframe || (s->avctx->debug&512)){
2882 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2884 // DWTELEM * line = slice_buffer_get_line(sb, y);
2885 DWTELEM * line = sb->line[y];
2888 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2889 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2891 if(v&(~255)) v= ~(v>>31);
2892 dst8[x + y*ref_stride]= v;
2896 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2898 // DWTELEM * line = slice_buffer_get_line(sb, y);
2899 DWTELEM * line = sb->line[y];
2902 line[x] -= 128 << FRAC_BITS;
2903 // buf[x + y*w]-= 128<<FRAC_BITS;
2911 for(mb_x=0; mb_x<=mb_w; mb_x++){
2914 add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
2915 block_w*mb_x - block_w/2,
2916 block_w*mb_y - block_w/2,
2919 w, ref_stride, obmc_stride,
2923 STOP_TIMER("add_yblock")
2926 STOP_TIMER("predict_slice")
2929 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2930 Plane *p= &s->plane[plane_index];
2931 const int mb_w= s->b_width << s->block_max_depth;
2932 const int mb_h= s->b_height << s->block_max_depth;
2934 int block_size = MB_SIZE >> s->block_max_depth;
2935 int block_w = plane_index ? block_size/2 : block_size;
2936 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2937 const int obmc_stride= plane_index ? block_size : 2*block_size;
2938 int ref_stride= s->current_picture.linesize[plane_index];
2939 uint8_t *ref = s->last_picture.data[plane_index];
2940 uint8_t *dst8= s->current_picture.data[plane_index];
2945 if(s->keyframe || (s->avctx->debug&512)){
2950 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2952 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2954 if(v&(~255)) v= ~(v>>31);
2955 dst8[x + y*ref_stride]= v;
2959 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2961 buf[x + y*w]-= 128<<FRAC_BITS;
2969 for(mb_x=0; mb_x<=mb_w; mb_x++){
2972 add_yblock(s, buf, dst8, ref, obmc,
2973 block_w*mb_x - block_w/2,
2974 block_w*mb_y - block_w/2,
2977 w, ref_stride, obmc_stride,
2979 add, 1, plane_index);
2981 STOP_TIMER("add_yblock")
2984 STOP_TIMER("predict_slice")
2987 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2988 const int mb_h= s->b_height << s->block_max_depth;
2990 for(mb_y=0; mb_y<=mb_h; mb_y++)
2991 predict_slice(s, buf, plane_index, add, mb_y);
2994 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2996 Plane *p= &s->plane[plane_index];
2997 const int block_size = MB_SIZE >> s->block_max_depth;
2998 const int block_w = plane_index ? block_size/2 : block_size;
2999 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3000 const int obmc_stride= plane_index ? block_size : 2*block_size;
3001 const int ref_stride= s->current_picture.linesize[plane_index];
3002 uint8_t *ref= s-> last_picture.data[plane_index];
3003 uint8_t *src= s-> input_picture.data[plane_index];
3004 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3005 const int b_stride = s->b_width << s->block_max_depth;
3006 const int w= p->width;
3007 const int h= p->height;
3008 int index= mb_x + mb_y*b_stride;
3009 BlockNode *b= &s->block[index];
3010 BlockNode backup= *b;
3014 b->type|= BLOCK_INTRA;
3015 b->color[plane_index]= 0;
3016 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
3019 int mb_x2= mb_x + (i &1) - 1;
3020 int mb_y2= mb_y + (i>>1) - 1;
3021 int x= block_w*mb_x2 + block_w/2;
3022 int y= block_w*mb_y2 + block_w/2;
3024 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
3025 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
3027 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
3028 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
3029 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
3030 int obmc_v= obmc[index];
3032 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
3033 if(x<0) obmc_v += obmc[index + block_w];
3034 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
3035 if(x+block_w>w) obmc_v += obmc[index - block_w];
3036 //FIXME precalc this or simplify it somehow else
3038 d = -dst[index] + (1<<(FRAC_BITS-1));
3040 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
3041 aa += obmc_v * obmc_v; //FIXME precalclate this
3047 return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3050 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3051 const int b_stride = s->b_width << s->block_max_depth;
3052 const int b_height = s->b_height<< s->block_max_depth;
3053 int index= x + y*b_stride;
3054 BlockNode *b = &s->block[index];
3055 BlockNode *left = x ? &s->block[index-1] : &null_block;
3056 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3057 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3058 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3060 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3061 // int my_context= av_log2(2*ABS(left->my - top->my));
3063 if(x<0 || x>=b_stride || y>=b_height)
3065 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3066 dmy= b->my - mid_pred(left->my, top->my, tr->my);
3072 00001XXXX 15-30 8-15
3074 //FIXME try accurate rate
3075 //FIXME intra and inter predictors if surrounding blocks arent the same type
3076 if(b->type & BLOCK_INTRA){
3077 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3078 + av_log2(2*ABS(left->color[1] - b->color[1]))
3079 + av_log2(2*ABS(left->color[2] - b->color[2])));
3081 return 2*(1 + av_log2(2*ABS(dmx))
3082 + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
3085 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3086 Plane *p= &s->plane[plane_index];
3087 const int block_size = MB_SIZE >> s->block_max_depth;
3088 const int block_w = plane_index ? block_size/2 : block_size;
3089 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3090 const int obmc_stride= plane_index ? block_size : 2*block_size;
3091 const int ref_stride= s->current_picture.linesize[plane_index];
3092 uint8_t *ref= s-> last_picture.data[plane_index];
3093 uint8_t *dst= s->current_picture.data[plane_index];
3094 uint8_t *src= s-> input_picture.data[plane_index];
3095 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3096 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3097 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3098 const int b_stride = s->b_width << s->block_max_depth;
3099 const int b_height = s->b_height<< s->block_max_depth;
3100 const int w= p->width;
3101 const int h= p->height;
3104 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3105 int sx= block_w*mb_x - block_w/2;
3106 int sy= block_w*mb_y - block_w/2;
3107 const int x0= FFMAX(0,-sx);
3108 const int y0= FFMAX(0,-sy);
3109 const int x1= FFMIN(block_w*2, w-sx);
3110 const int y1= FFMIN(block_w*2, h-sy);
3113 pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3115 for(y=y0; y<y1; y++){
3116 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3117 const DWTELEM *pred1 = pred + y*obmc_stride;
3118 uint8_t *cur1 = cur + y*ref_stride;
3119 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3120 for(x=x0; x<x1; x++){
3121 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3122 v = (v + pred1[x]) >> FRAC_BITS;
3123 if(v&(~255)) v= ~(v>>31);
3128 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3132 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3133 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3137 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3146 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3148 if(mb_x == b_stride-2)
3149 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3151 return distortion + rate*penalty_factor;
3154 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3156 Plane *p= &s->plane[plane_index];
3157 const int block_size = MB_SIZE >> s->block_max_depth;
3158 const int block_w = plane_index ? block_size/2 : block_size;
3159 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3160 const int obmc_stride= plane_index ? block_size : 2*block_size;
3161 const int ref_stride= s->current_picture.linesize[plane_index];
3162 uint8_t *ref= s-> last_picture.data[plane_index];
3163 uint8_t *dst= s->current_picture.data[plane_index];
3164 uint8_t *src= s-> input_picture.data[plane_index];
3165 const static DWTELEM zero_dst[4096]; //FIXME
3166 const int b_stride = s->b_width << s->block_max_depth;
3167 const int b_height = s->b_height<< s->block_max_depth;
3168 const int w= p->width;
3169 const int h= p->height;
3172 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3175 int mb_x2= mb_x + (i%3) - 1;
3176 int mb_y2= mb_y + (i/3) - 1;
3177 int x= block_w*mb_x2 + block_w/2;
3178 int y= block_w*mb_y2 + block_w/2;
3180 add_yblock(s, zero_dst, dst, ref, obmc,
3181 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3183 //FIXME find a cleaner/simpler way to skip the outside stuff
3184 for(y2= y; y2<0; y2++)
3185 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3186 for(y2= h; y2<y+block_w; y2++)
3187 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3189 for(y2= y; y2<y+block_w; y2++)
3190 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3193 for(y2= y; y2<y+block_w; y2++)
3194 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3197 assert(block_w== 8 || block_w==16);
3198 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3202 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3203 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3211 rate = get_block_bits(s, mb_x, mb_y, 2);
3212 for(i=merged?4:0; i<9; i++){
3213 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3214 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3217 return distortion + rate*penalty_factor;
3220 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3221 const int b_stride= s->b_width << s->block_max_depth;
3222 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3223 BlockNode backup= *block;
3224 int rd, index, value;
3226 assert(mb_x>=0 && mb_y>=0);
3227 assert(mb_x<b_stride);
3230 block->color[0] = p[0];
3231 block->color[1] = p[1];
3232 block->color[2] = p[2];
3233 block->type |= BLOCK_INTRA;
3235 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3236 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
3237 if(s->me_cache[index] == value)
3239 s->me_cache[index]= value;
3243 block->type &= ~BLOCK_INTRA;
3246 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3258 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3259 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
3260 int p[2] = {p0, p1};
3261 return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
3264 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){
3265 const int b_stride= s->b_width << s->block_max_depth;
3266 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3267 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3268 int rd, index, value;
3270 assert(mb_x>=0 && mb_y>=0);
3271 assert(mb_x<b_stride);
3272 assert(((mb_x|mb_y)&1) == 0);
3274 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3275 value= s->me_cache_generation + (p0>>10) + (p1<<6);
3276 if(s->me_cache[index] == value)
3278 s->me_cache[index]= value;
3282 block->type &= ~BLOCK_INTRA;
3283 block[1]= block[b_stride]= block[b_stride+1]= *block;
3285 rd= get_4block_rd(s, mb_x, mb_y, 0);
3292 block[0]= backup[0];
3293 block[1]= backup[1];
3294 block[b_stride]= backup[2];
3295 block[b_stride+1]= backup[3];
3300 static void iterative_me(SnowContext *s){
3301 int pass, mb_x, mb_y;
3302 const int b_width = s->b_width << s->block_max_depth;
3303 const int b_height= s->b_height << s->block_max_depth;
3304 const int b_stride= b_width;
3307 for(pass=0; pass<50; pass++){
3310 for(mb_y= 0; mb_y<b_height; mb_y++){
3311 for(mb_x= 0; mb_x<b_width; mb_x++){
3312 int dia_change, i, j;
3313 int best_rd= INT_MAX;
3315 const int index= mb_x + mb_y * b_stride;
3316 BlockNode *block= &s->block[index];
3317 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3318 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3319 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block;
3320 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block;
3321 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3322 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3323 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3324 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
3325 const int b_w= (MB_SIZE >> s->block_max_depth);
3326 uint8_t obmc_edged[b_w*2][b_w*2];
3328 if(pass && (block->type & BLOCK_OPT))
3330 block->type |= BLOCK_OPT;
3334 if(!s->me_cache_generation)
3335 memset(s->me_cache, 0, sizeof(s->me_cache));
3336 s->me_cache_generation += 1<<22;
3341 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3343 for(y=0; y<b_w*2; y++)
3344 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3345 if(mb_x==b_stride-1)
3346 for(y=0; y<b_w*2; y++)
3347 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3349 for(x=0; x<b_w*2; x++)
3350 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3351 for(y=1; y<b_w; y++)
3352 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3354 if(mb_y==b_height-1){
3355 for(x=0; x<b_w*2; x++)
3356 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3357 for(y=b_w; y<b_w*2-1; y++)
3358 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3362 //skip stuff outside the picture
3363 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3365 uint8_t *src= s-> input_picture.data[0];
3366 uint8_t *dst= s->current_picture.data[0];
3367 const int stride= s->current_picture.linesize[0];
3368 const int block_w= MB_SIZE >> s->block_max_depth;
3369 const int sx= block_w*mb_x - block_w/2;
3370 const int sy= block_w*mb_y - block_w/2;
3371 const int w= s->plane[0].width;
3372 const int h= s->plane[0].height;
3376 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3377 for(y=h; y<sy+block_w*2; y++)
3378 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3380 for(y=sy; y<sy+block_w*2; y++)
3381 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3383 if(sx+block_w*2 > w){
3384 for(y=sy; y<sy+block_w*2; y++)
3385 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3389 // intra(black) = neighbors' contribution to the current block
3391 color[i]= get_dc(s, mb_x, mb_y, i);
3393 // get previous score (cant be cached due to OBMC)
3394 if(pass > 0 && (block->type&BLOCK_INTRA)){
3395 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3396 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3398 check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
3400 check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
3401 check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
3402 check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
3403 check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
3404 check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
3407 //FIXME avoid subpel interpol / round to nearest integer
3410 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3412 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3413 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3414 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
3415 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
3421 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3424 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
3426 //FIXME or try the standard 2 pass qpel or similar
3428 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3429 //FIXME RD style color selection
3431 if(!same_block(block, &backup)){
3432 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3433 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3434 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3435 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3436 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3437 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3438 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3439 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3444 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3449 if(s->block_max_depth == 1){
3451 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3452 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3453 int dia_change, i, j;
3454 int best_rd, init_rd;
3455 const int index= mb_x + mb_y * b_stride;
3458 b[0]= &s->block[index];
3460 b[2]= b[0]+b_stride;
3462 if(same_block(b[0], b[1]) &&
3463 same_block(b[0], b[2]) &&
3464 same_block(b[0], b[3]))
3467 if(!s->me_cache_generation)
3468 memset(s->me_cache, 0, sizeof(s->me_cache));
3469 s->me_cache_generation += 1<<22;
3471 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3473 check_4block_inter(s, mb_x, mb_y,
3474 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3475 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd);
3478 if(!(b[i]->type&BLOCK_INTRA))
3479 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd);
3481 if(init_rd != best_rd)
3485 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3489 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3490 const int level= b->level;
3491 const int w= b->width;
3492 const int h= b->height;
3493 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3494 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3495 int x,y, thres1, thres2;
3498 if(s->qlog == LOSSLESS_QLOG) return;
3500 bias= bias ? 0 : (3*qmul)>>3;
3501 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3507 int i= src[x + y*stride];
3509 if((unsigned)(i+thres1) > thres2){
3512 i/= qmul; //FIXME optimize
3513 src[x + y*stride]= i;
3517 i/= qmul; //FIXME optimize
3518 src[x + y*stride]= -i;
3521 src[x + y*stride]= 0;
3527 int i= src[x + y*stride];
3529 if((unsigned)(i+thres1) > thres2){
3532 i= (i + bias) / qmul; //FIXME optimize
3533 src[x + y*stride]= i;
3537 i= (i + bias) / qmul; //FIXME optimize
3538 src[x + y*stride]= -i;
3541 src[x + y*stride]= 0;
3545 if(level+1 == s->spatial_decomposition_count){
3546 // STOP_TIMER("quantize")
3550 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3551 const int w= b->width;
3552 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3553 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3554 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3558 if(s->qlog == LOSSLESS_QLOG) return;
3560 for(y=start_y; y<end_y; y++){
3561 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3562 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3566 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3568 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3572 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3573 STOP_TIMER("dquant")
3577 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3578 const int w= b->width;
3579 const int h= b->height;
3580 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3581 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3582 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3586 if(s->qlog == LOSSLESS_QLOG) return;
3590 int i= src[x + y*stride];
3592 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3594 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3598 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3599 STOP_TIMER("dquant")
3603 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3604 const int w= b->width;
3605 const int h= b->height;
3608 for(y=h-1; y>=0; y--){
3609 for(x=w-1; x>=0; x--){
3610 int i= x + y*stride;
3614 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3615 else src[i] -= src[i - 1];
3617 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3618 else src[i] -= src[i - 1];
3621 if(y) src[i] -= src[i - stride];
3627 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3628 const int w= b->width;
3637 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3639 for(y=start_y; y<end_y; y++){
3641 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3642 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3646 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3647 else line[x] += line[x - 1];
3649 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3650 else line[x] += line[x - 1];
3653 if(y) line[x] += prev[x];
3658 // STOP_TIMER("correlate")
3661 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3662 const int w= b->width;
3663 const int h= b->height;
3668 int i= x + y*stride;
3672 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3673 else src[i] += src[i - 1];
3675 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3676 else src[i] += src[i - 1];
3679 if(y) src[i] += src[i - stride];
3685 static void encode_header(SnowContext *s){
3686 int plane_index, level, orientation;
3689 memset(kstate, MID_STATE, sizeof(kstate));
3691 put_rac(&s->c, kstate, s->keyframe);
3692 if(s->keyframe || s->always_reset)
3695 put_symbol(&s->c, s->header_state, s->version, 0);
3696 put_rac(&s->c, s->header_state, s->always_reset);
3697 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3698 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3699 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3700 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3701 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3702 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3703 put_rac(&s->c, s->header_state, s->spatial_scalability);
3704 // put_rac(&s->c, s->header_state, s->rate_scalability);
3706 for(plane_index=0; plane_index<2; plane_index++){
3707 for(level=0; level<s->spatial_decomposition_count; level++){
3708 for(orientation=level ? 1:0; orientation<4; orientation++){
3709 if(orientation==2) continue;
3710 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3715 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3716 put_symbol(&s->c, s->header_state, s->qlog, 1);
3717 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3718 put_symbol(&s->c, s->header_state, s->qbias, 1);
3719 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3722 static int decode_header(SnowContext *s){
3723 int plane_index, level, orientation;
3726 memset(kstate, MID_STATE, sizeof(kstate));
3728 s->keyframe= get_rac(&s->c, kstate);
3729 if(s->keyframe || s->always_reset)
3732 s->version= get_symbol(&s->c, s->header_state, 0);
3734 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3737 s->always_reset= get_rac(&s->c, s->header_state);
3738 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3739 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3740 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3741 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3742 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3743 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3744 s->spatial_scalability= get_rac(&s->c, s->header_state);
3745 // s->rate_scalability= get_rac(&s->c, s->header_state);
3747 for(plane_index=0; plane_index<3; plane_index++){
3748 for(level=0; level<s->spatial_decomposition_count; level++){
3749 for(orientation=level ? 1:0; orientation<4; orientation++){
3751 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3752 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3753 else q= get_symbol(&s->c, s->header_state, 1);
3754 s->plane[plane_index].band[level][orientation].qlog= q;
3760 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3761 if(s->spatial_decomposition_type > 2){
3762 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3766 s->qlog= get_symbol(&s->c, s->header_state, 1);
3767 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3768 s->qbias= get_symbol(&s->c, s->header_state, 1);
3769 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3770 if(s->block_max_depth > 1){
3771 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3772 s->block_max_depth= 0;
3779 static void init_qexp(void){
3783 for(i=0; i<QROOT; i++){
3785 v *= pow(2, 1.0 / QROOT);
3789 static int common_init(AVCodecContext *avctx){
3790 SnowContext *s = avctx->priv_data;
3792 int level, orientation, plane_index, dec;
3796 dsputil_init(&s->dsp, avctx);
3799 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3800 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3801 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3802 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3803 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3804 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3823 #define mcfh(dx,dy)\
3824 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3825 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3826 mc_block_hpel ## dx ## dy ## 16;\
3827 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3828 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3829 mc_block_hpel ## dx ## dy ## 8;
3839 dec= s->spatial_decomposition_count= 5;
3840 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3842 s->chroma_h_shift= 1; //FIXME XXX
3843 s->chroma_v_shift= 1;
3845 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3847 width= s->avctx->width;
3848 height= s->avctx->height;
3850 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3852 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3853 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3855 for(plane_index=0; plane_index<3; plane_index++){
3856 int w= s->avctx->width;
3857 int h= s->avctx->height;
3860 w>>= s->chroma_h_shift;
3861 h>>= s->chroma_v_shift;
3863 s->plane[plane_index].width = w;
3864 s->plane[plane_index].height= h;
3865 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3866 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3867 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3868 SubBand *b= &s->plane[plane_index].band[level][orientation];
3870 b->buf= s->spatial_dwt_buffer;
3872 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3873 b->width = (w + !(orientation&1))>>1;
3874 b->height= (h + !(orientation>1))>>1;
3876 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3877 b->buf_x_offset = 0;
3878 b->buf_y_offset = 0;
3882 b->buf_x_offset = (w+1)>>1;
3885 b->buf += b->stride>>1;
3886 b->buf_y_offset = b->stride_line >> 1;
3890 b->parent= &s->plane[plane_index].band[level-1][orientation];
3891 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3900 width= s->width= avctx->width;
3901 height= s->height= avctx->height;
3903 assert(width && height);
3905 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3911 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3912 int width = p->width;
3913 int height= p->height;
3914 int level, orientation, x, y;
3916 for(level=0; level<s->spatial_decomposition_count; level++){
3917 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3918 SubBand *b= &p->band[level][orientation];
3919 DWTELEM *buf= b->buf;
3922 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3923 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3924 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3925 for(y=0; y<height; y++){
3926 for(x=0; x<width; x++){
3927 int64_t d= s->spatial_dwt_buffer[x + y*width];
3932 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3933 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3938 static int encode_init(AVCodecContext *avctx)
3940 SnowContext *s = avctx->priv_data;
3943 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3944 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3945 "use vstrict=-2 / -strict -2 to use it anyway\n");
3955 s->m.flags = avctx->flags;
3956 s->m.bit_rate= avctx->bit_rate;
3958 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3959 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3960 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3961 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3962 h263_encode_init(&s->m); //mv_penalty
3964 if(avctx->flags&CODEC_FLAG_PASS1){
3965 if(!avctx->stats_out)
3966 avctx->stats_out = av_mallocz(256);
3968 if(avctx->flags&CODEC_FLAG_PASS2){
3969 if(ff_rate_control_init(&s->m) < 0)
3973 for(plane_index=0; plane_index<3; plane_index++){
3974 calculate_vissual_weight(s, &s->plane[plane_index]);
3978 avctx->coded_frame= &s->current_picture;
3979 switch(avctx->pix_fmt){
3980 // case PIX_FMT_YUV444P:
3981 // case PIX_FMT_YUV422P:
3982 case PIX_FMT_YUV420P:
3984 // case PIX_FMT_YUV411P:
3985 // case PIX_FMT_YUV410P:
3986 s->colorspace_type= 0;
3988 /* case PIX_FMT_RGBA32:
3992 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3995 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3996 s->chroma_h_shift= 1;
3997 s->chroma_v_shift= 1;
3999 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4000 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4002 s->avctx->get_buffer(s->avctx, &s->input_picture);
4007 static int frame_start(SnowContext *s){
4009 int w= s->avctx->width; //FIXME round up to x16 ?
4010 int h= s->avctx->height;
4012 if(s->current_picture.data[0]){
4013 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4014 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4015 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4018 tmp= s->last_picture;
4019 s->last_picture= s->current_picture;
4020 s->current_picture= tmp;
4022 s->current_picture.reference= 1;
4023 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4024 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4031 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4032 SnowContext *s = avctx->priv_data;
4033 RangeCoder * const c= &s->c;
4034 AVFrame *pict = data;
4035 const int width= s->avctx->width;
4036 const int height= s->avctx->height;
4037 int level, orientation, plane_index, i, y;
4039 ff_init_range_encoder(c, buf, buf_size);
4040 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4044 for(y=0; y<(height>>shift); y++)
4045 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4046 &pict->data[i][y * pict->linesize[i]],
4049 s->new_picture = *pict;
4051 if(avctx->flags&CODEC_FLAG_PASS2){
4053 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4054 s->keyframe= pict->pict_type==FF_I_TYPE;
4055 s->m.picture_number= avctx->frame_number;
4056 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4058 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4059 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4063 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
4065 s->qlog += 61*QROOT/8;
4067 s->qlog= LOSSLESS_QLOG;
4071 s->current_picture.key_frame= s->keyframe;
4073 s->m.current_picture_ptr= &s->m.current_picture;
4074 if(pict->pict_type == P_TYPE){
4075 int block_width = (width +15)>>4;
4076 int block_height= (height+15)>>4;
4077 int stride= s->current_picture.linesize[0];
4079 assert(s->current_picture.data[0]);
4080 assert(s->last_picture.data[0]);
4082 s->m.avctx= s->avctx;
4083 s->m.current_picture.data[0]= s->current_picture.data[0];
4084 s->m. last_picture.data[0]= s-> last_picture.data[0];
4085 s->m. new_picture.data[0]= s-> input_picture.data[0];
4086 s->m. last_picture_ptr= &s->m. last_picture;
4088 s->m. last_picture.linesize[0]=
4089 s->m. new_picture.linesize[0]=
4090 s->m.current_picture.linesize[0]= stride;
4091 s->m.uvlinesize= s->current_picture.linesize[1];
4093 s->m.height= height;
4094 s->m.mb_width = block_width;
4095 s->m.mb_height= block_height;
4096 s->m.mb_stride= s->m.mb_width+1;
4097 s->m.b8_stride= 2*s->m.mb_width+1;
4099 s->m.pict_type= pict->pict_type;
4100 s->m.me_method= s->avctx->me_method;
4101 s->m.me.scene_change_score=0;
4102 s->m.flags= s->avctx->flags;
4103 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4104 s->m.out_format= FMT_H263;
4105 s->m.unrestricted_mv= 1;
4107 s->lambda = s->m.lambda= pict->quality * 3/2; //FIXME bug somewhere else
4108 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4109 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4111 s->m.dsp= s->dsp; //move
4118 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4121 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4123 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4125 for(plane_index=0; plane_index<3; plane_index++){
4126 Plane *p= &s->plane[plane_index];
4130 // int bits= put_bits_count(&s->c.pb);
4133 if(pict->data[plane_index]) //FIXME gray hack
4136 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4139 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4142 && pict->pict_type == P_TYPE
4143 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4144 ff_init_range_encoder(c, buf, buf_size);
4145 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4146 pict->pict_type= FF_I_TYPE;
4152 if(s->qlog == LOSSLESS_QLOG){
4155 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4160 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4162 for(level=0; level<s->spatial_decomposition_count; level++){
4163 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4164 SubBand *b= &p->band[level][orientation];
4166 quantize(s, b, b->buf, b->stride, s->qbias);
4168 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4169 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4170 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4172 correlate(s, b, b->buf, b->stride, 1, 0);
4175 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4177 for(level=0; level<s->spatial_decomposition_count; level++){
4178 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4179 SubBand *b= &p->band[level][orientation];
4181 dequantize(s, b, b->buf, b->stride);
4185 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4186 if(s->qlog == LOSSLESS_QLOG){
4189 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4194 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4195 STOP_TIMER("pred-conv")}
4196 if(s->avctx->flags&CODEC_FLAG_PSNR){
4199 if(pict->data[plane_index]) //FIXME gray hack
4202 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4206 s->avctx->error[plane_index] += error;
4207 s->current_picture.error[plane_index] = error;
4211 if(s->last_picture.data[0])
4212 avctx->release_buffer(avctx, &s->last_picture);
4214 s->current_picture.coded_picture_number = avctx->frame_number;
4215 s->current_picture.pict_type = pict->pict_type;
4216 s->current_picture.quality = pict->quality;
4217 if(avctx->flags&CODEC_FLAG_PASS1){
4218 s->m.p_tex_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits - s->m.mv_bits;
4219 s->m.current_picture.display_picture_number =
4220 s->m.current_picture.coded_picture_number = avctx->frame_number;
4221 s->m.pict_type = pict->pict_type;
4222 s->m.current_picture.quality = pict->quality;
4223 ff_write_pass1_stats(&s->m);
4225 if(avctx->flags&CODEC_FLAG_PASS2){
4226 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4231 return ff_rac_terminate(c);
4234 static void common_end(SnowContext *s){
4235 int plane_index, level, orientation;
4237 av_freep(&s->spatial_dwt_buffer);
4239 av_freep(&s->m.me.scratchpad);
4240 av_freep(&s->m.me.map);
4241 av_freep(&s->m.me.score_map);
4242 av_freep(&s->m.obmc_scratchpad);
4244 av_freep(&s->block);
4246 for(plane_index=0; plane_index<3; plane_index++){
4247 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4248 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4249 SubBand *b= &s->plane[plane_index].band[level][orientation];
4251 av_freep(&b->x_coeff);
4257 static int encode_end(AVCodecContext *avctx)
4259 SnowContext *s = avctx->priv_data;
4262 av_free(avctx->stats_out);
4267 static int decode_init(AVCodecContext *avctx)
4269 SnowContext *s = avctx->priv_data;
4272 avctx->pix_fmt= PIX_FMT_YUV420P;
4276 block_size = MB_SIZE >> s->block_max_depth;
4277 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4282 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4283 SnowContext *s = avctx->priv_data;
4284 RangeCoder * const c= &s->c;
4286 AVFrame *picture = data;
4287 int level, orientation, plane_index;
4289 ff_init_range_decoder(c, buf, buf_size);
4290 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4292 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4294 if(!s->block) alloc_blocks(s);
4297 //keyframe flag dupliaction mess FIXME
4298 if(avctx->debug&FF_DEBUG_PICT_INFO)
4299 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4303 for(plane_index=0; plane_index<3; plane_index++){
4304 Plane *p= &s->plane[plane_index];
4308 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4310 if(s->avctx->debug&2048){
4311 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4312 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4316 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4317 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4323 for(level=0; level<s->spatial_decomposition_count; level++){
4324 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4325 SubBand *b= &p->band[level][orientation];
4326 unpack_coeffs(s, b, b->parent, orientation);
4329 STOP_TIMER("unpack coeffs");
4333 const int mb_h= s->b_height << s->block_max_depth;
4334 const int block_size = MB_SIZE >> s->block_max_depth;
4335 const int block_w = plane_index ? block_size/2 : block_size;
4337 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4342 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4343 for(mb_y=0; mb_y<=mb_h; mb_y++){
4345 int slice_starty = block_w*mb_y;
4346 int slice_h = block_w*(mb_y+1);
4347 if (!(s->keyframe || s->avctx->debug&512)){
4348 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4349 slice_h -= (block_w >> 1);
4354 for(level=0; level<s->spatial_decomposition_count; level++){
4355 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4356 SubBand *b= &p->band[level][orientation];
4359 int our_mb_start = mb_y;
4360 int our_mb_end = (mb_y + 1);
4362 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4363 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4364 if (!(s->keyframe || s->avctx->debug&512)){
4365 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4366 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4368 start_y = FFMIN(b->height, start_y);
4369 end_y = FFMIN(b->height, end_y);
4371 if (start_y != end_y){
4372 if (orientation == 0){
4373 SubBand * correlate_band = &p->band[0][0];
4374 int correlate_end_y = FFMIN(b->height, end_y + 1);
4375 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4376 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4377 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4378 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4381 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4385 STOP_TIMER("decode_subband_slice");
4389 for(; yd<slice_h; yd+=4){
4390 ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4392 STOP_TIMER("idwt slice");}
4395 if(s->qlog == LOSSLESS_QLOG){
4396 for(; yq<slice_h && yq<h; yq++){
4397 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4399 line[x] <<= FRAC_BITS;
4404 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4406 y = FFMIN(p->height, slice_starty);
4407 end_y = FFMIN(p->height, slice_h);
4409 slice_buffer_release(&s->sb, y++);
4412 slice_buffer_flush(&s->sb);
4414 STOP_TIMER("idwt + predict_slices")}
4419 if(s->last_picture.data[0])
4420 avctx->release_buffer(avctx, &s->last_picture);
4422 if(!(s->avctx->debug&2048))
4423 *picture= s->current_picture;
4425 *picture= s->mconly_picture;
4427 *data_size = sizeof(AVFrame);
4429 bytes_read= c->bytestream - c->bytestream_start;
4430 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4435 static int decode_end(AVCodecContext *avctx)
4437 SnowContext *s = avctx->priv_data;
4439 slice_buffer_destroy(&s->sb);
4446 AVCodec snow_decoder = {
4450 sizeof(SnowContext),
4455 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4459 #ifdef CONFIG_ENCODERS
4460 AVCodec snow_encoder = {
4464 sizeof(SnowContext),
4480 int buffer[2][width*height];
4483 s.spatial_decomposition_count=6;
4484 s.spatial_decomposition_type=1;
4486 printf("testing 5/3 DWT\n");
4487 for(i=0; i<width*height; i++)
4488 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4490 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4491 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4493 for(i=0; i<width*height; i++)
4494 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4496 printf("testing 9/7 DWT\n");
4497 s.spatial_decomposition_type=0;
4498 for(i=0; i<width*height; i++)
4499 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4501 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4502 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4504 for(i=0; i<width*height; i++)
4505 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4508 printf("testing AC coder\n");
4509 memset(s.header_state, 0, sizeof(s.header_state));
4510 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4511 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4513 for(i=-256; i<256; i++){
4515 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4516 STOP_TIMER("put_symbol")
4518 ff_rac_terminate(&s.c);
4520 memset(s.header_state, 0, sizeof(s.header_state));
4521 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4522 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4524 for(i=-256; i<256; i++){
4527 j= get_symbol(&s.c, s.header_state, 1);
4528 STOP_TIMER("get_symbol")
4529 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4533 int level, orientation, x, y;
4534 int64_t errors[8][4];
4537 memset(errors, 0, sizeof(errors));
4538 s.spatial_decomposition_count=3;
4539 s.spatial_decomposition_type=0;
4540 for(level=0; level<s.spatial_decomposition_count; level++){
4541 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4542 int w= width >> (s.spatial_decomposition_count-level);
4543 int h= height >> (s.spatial_decomposition_count-level);
4544 int stride= width << (s.spatial_decomposition_count-level);
4545 DWTELEM *buf= buffer[0];
4548 if(orientation&1) buf+=w;
4549 if(orientation>1) buf+=stride>>1;
4551 memset(buffer[0], 0, sizeof(int)*width*height);
4552 buf[w/2 + h/2*stride]= 256*256;
4553 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4554 for(y=0; y<height; y++){
4555 for(x=0; x<width; x++){
4556 int64_t d= buffer[0][x + y*width];
4558 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4560 if(ABS(height/2-y)<9 && level==2) printf("\n");
4562 error= (int)(sqrt(error)+0.5);
4563 errors[level][orientation]= error;
4564 if(g) g=ff_gcd(g, error);
4568 printf("static int const visual_weight[][4]={\n");
4569 for(level=0; level<s.spatial_decomposition_count; level++){
4571 for(orientation=0; orientation<4; orientation++){
4572 printf("%8lld,", errors[level][orientation]/g);
4580 int w= width >> (s.spatial_decomposition_count-level);
4581 int h= height >> (s.spatial_decomposition_count-level);
4582 int stride= width << (s.spatial_decomposition_count-level);
4583 DWTELEM *buf= buffer[0];
4589 memset(buffer[0], 0, sizeof(int)*width*height);
4591 for(y=0; y<height; y++){
4592 for(x=0; x<width; x++){
4593 int tab[4]={0,2,3,1};
4594 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4597 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4601 buf[x + y*stride ]=169;
4602 buf[x + y*stride-w]=64;
4605 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4607 for(y=0; y<height; y++){
4608 for(x=0; x<width; x++){
4609 int64_t d= buffer[0][x + y*width];
4611 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4613 if(ABS(height/2-y)<9) printf("\n");