2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49 static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103 static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121 static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139 static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157 static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232 static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 static const uint8_t obmc16[256]={
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
287 static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
344 static const uint8_t obmc8[64]={
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
357 static const uint8_t obmc4[16]={
365 static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
369 typedef struct BlockNode{
375 //#define TYPE_SPLIT 1
376 #define BLOCK_INTRA 1
378 //#define TYPE_NOCOLOR 4
379 uint8_t level; //FIXME merge into type?
382 static const BlockNode null_block= { //FIXME add border maybe
383 .color= {128,128,128},
391 #define LOG2_MB_SIZE 4
392 #define MB_SIZE (1<<LOG2_MB_SIZE)
394 typedef struct x_and_coeff{
399 typedef struct SubBand{
404 int qlog; ///< log(qscale)/log[2^(1/6)]
408 int stride_line; ///< Stride measured in lines, not pixels.
409 x_and_coeff * x_coeff;
410 struct SubBand *parent;
411 uint8_t state[/*7*2*/ 7 + 512][32];
414 typedef struct Plane{
417 SubBand band[MAX_DECOMPOSITIONS][4];
420 typedef struct SnowContext{
421 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
423 AVCodecContext *avctx;
427 AVFrame input_picture; ///< new_picture with the internal linesizes
428 AVFrame current_picture;
429 AVFrame last_picture[MAX_REF_FRAMES];
430 AVFrame mconly_picture;
431 // uint8_t q_context[16];
432 uint8_t header_state[32];
433 uint8_t block_state[128 + 32*128];
437 int spatial_decomposition_type;
438 int temporal_decomposition_type;
439 int spatial_decomposition_count;
440 int temporal_decomposition_count;
443 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
444 uint32_t *ref_scores[MAX_REF_FRAMES];
445 DWTELEM *spatial_dwt_buffer;
449 int spatial_scalability;
456 #define QBIAS_SHIFT 3
460 Plane plane[MAX_PLANES];
462 #define ME_CACHE_SIZE 1024
463 int me_cache[ME_CACHE_SIZE];
464 int me_cache_generation;
467 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
478 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
479 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481 static void iterative_me(SnowContext *s);
483 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
487 buf->base_buffer = base_buffer;
488 buf->line_count = line_count;
489 buf->line_width = line_width;
490 buf->data_count = max_allocated_lines;
491 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
492 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
494 for (i = 0; i < max_allocated_lines; i++)
496 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
499 buf->data_stack_top = max_allocated_lines - 1;
502 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
507 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
509 assert(buf->data_stack_top >= 0);
510 // assert(!buf->line[line]);
512 return buf->line[line];
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
519 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
524 static void slice_buffer_release(slice_buffer * buf, int line)
529 assert(line >= 0 && line < buf->line_count);
530 assert(buf->line[line]);
532 offset = buf->line_width * line;
533 buffer = buf->line[line];
534 buf->data_stack_top++;
535 buf->data_stack[buf->data_stack_top] = buffer;
536 buf->line[line] = NULL;
538 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
541 static void slice_buffer_flush(slice_buffer * buf)
544 for (i = 0; i < buf->line_count; i++)
548 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
549 slice_buffer_release(buf, i);
554 static void slice_buffer_destroy(slice_buffer * buf)
557 slice_buffer_flush(buf);
559 for (i = buf->data_count - 1; i >= 0; i--)
561 assert(buf->data_stack[i]);
562 av_freep(&buf->data_stack[i]);
564 assert(buf->data_stack);
565 av_freep(&buf->data_stack);
567 av_freep(&buf->line);
571 // Avoid a name clash on SGI IRIX
574 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
575 static uint8_t qexp[QROOT];
577 static inline int mirror(int v, int m){
578 while((unsigned)v > (unsigned)m){
585 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
590 const int e= av_log2(a);
592 const int el= FFMIN(e, 10);
593 put_rac(c, state+0, 0);
596 put_rac(c, state+1+i, 1); //1..10
599 put_rac(c, state+1+9, 1); //1..10
601 put_rac(c, state+1+FFMIN(i,9), 0);
603 for(i=e-1; i>=el; i--){
604 put_rac(c, state+22+9, (a>>i)&1); //22..31
607 put_rac(c, state+22+i, (a>>i)&1); //22..31
611 put_rac(c, state+11 + el, v < 0); //11..21
614 put_rac(c, state+0, 0);
617 put_rac(c, state+1+i, 1); //1..10
619 put_rac(c, state+1+i, 0);
621 for(i=e-1; i>=0; i--){
622 put_rac(c, state+22+i, (a>>i)&1); //22..31
626 put_rac(c, state+11 + e, v < 0); //11..21
629 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
631 put_rac(c, state+1+FFMIN(i,9), 0);
633 for(i=e-1; i>=0; i--){
634 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
638 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
642 put_rac(c, state+0, 1);
646 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
647 if(get_rac(c, state+0))
652 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
657 for(i=e-1; i>=0; i--){
658 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
661 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
668 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
670 int r= log2>=0 ? 1<<log2 : 1;
676 put_rac(c, state+4+log2, 1);
681 put_rac(c, state+4+log2, 0);
683 for(i=log2-1; i>=0; i--){
684 put_rac(c, state+31-i, (v>>i)&1);
688 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
690 int r= log2>=0 ? 1<<log2 : 1;
695 while(get_rac(c, state+4+log2)){
701 for(i=log2-1; i>=0; i--){
702 v+= get_rac(c, state+31-i)<<i;
708 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
709 const int mirror_left= !highpass;
710 const int mirror_right= (width&1) ^ highpass;
711 const int w= (width>>1) - 1 + (highpass & width);
714 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
716 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
722 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
726 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
731 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
732 const int mirror_left= !highpass;
733 const int mirror_right= (width&1) ^ highpass;
734 const int w= (width>>1) - 1 + (highpass & width);
741 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
747 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
750 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
754 int r= 3*2*ref[w*ref_step];
757 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
763 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
764 const int mirror_left= !highpass;
765 const int mirror_right= (width&1) ^ highpass;
766 const int w= (width>>1) - 1 + (highpass & width);
770 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
772 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
778 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
782 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
788 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791 for(x=start; x<width; x+=2){
795 int x2= x + 2*i - n + 1;
797 else if(x2>=width) x2= 2*width-x2-2;
798 sum += coeffs[i]*(int64_t)dst[x2];
800 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
801 else dst[x] += (sum + (1<<shift)/2)>>shift;
805 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
807 for(y=start; y<height; y+=2){
808 for(x=0; x<width; x++){
812 int y2= y + 2*i - n + 1;
814 else if(y2>=height) y2= 2*height-y2-2;
815 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
817 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
818 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
827 #if 0 // more accurate 9/7
830 #define COEFFS1 (int[]){-25987,-25987}
833 #define COEFFS2 (int[]){-27777,-27777}
836 #define COEFFS3 (int[]){28931,28931}
839 #define COEFFS4 (int[]){14533,14533}
843 #define COEFFS1 (int[]){1,-9,-9,1}
846 #define COEFFS2 (int[]){-1,5,5,-1}
859 #define COEFFS1 (int[]){1,1}
862 #define COEFFS2 (int[]){-1,-1}
875 #define COEFFS2 (int[]){-1,-1}
878 #define COEFFS3 (int[]){-1,-1}
881 #define COEFFS4 (int[]){-5,29,29,-5}
886 #define COEFFS1 (int[]){-203,-203}
889 #define COEFFS2 (int[]){-217,-217}
892 #define COEFFS3 (int[]){113,113}
895 #define COEFFS4 (int[]){227,227}
903 #define COEFFS2 (int[]){-1,-1}
906 #define COEFFS3 (int[]){-1,-1}
909 #define COEFFS4 (int[]){3,3}
913 #define COEFFS1 (int[]){1,-9,-9,1}
916 #define COEFFS2 (int[]){1,1}
926 #define COEFFS1 (int[]){1,-9,-9,1}
929 #define COEFFS2 (int[]){-1,5,5,-1}
937 static void horizontal_decomposeX(DWTELEM *b, int width){
939 const int width2= width>>1;
940 const int w2= (width+1)>>1;
943 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
944 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
945 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
946 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
948 for(x=0; x<width2; x++){
950 temp[x+w2]= b[2*x + 1];
954 memcpy(b, temp, width*sizeof(int));
957 static void horizontal_composeX(DWTELEM *b, int width){
959 const int width2= width>>1;
961 const int w2= (width+1)>>1;
963 memcpy(temp, b, width*sizeof(int));
964 for(x=0; x<width2; x++){
966 b[2*x + 1]= temp[x+w2];
971 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
972 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
973 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
974 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
977 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
980 for(y=0; y<height; y++){
981 for(x=0; x<width; x++){
982 buffer[y*stride + x] *= SCALEX;
986 for(y=0; y<height; y++){
987 horizontal_decomposeX(buffer + y*stride, width);
990 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
991 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
992 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
993 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
996 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
999 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1002 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1004 for(y=0; y<height; y++){
1005 horizontal_composeX(buffer + y*stride, width);
1008 for(y=0; y<height; y++){
1009 for(x=0; x<width; x++){
1010 buffer[y*stride + x] /= SCALEX;
1015 static void horizontal_decompose53i(DWTELEM *b, int width){
1016 DWTELEM temp[width];
1017 const int width2= width>>1;
1019 const int w2= (width+1)>>1;
1021 for(x=0; x<width2; x++){
1023 temp[x+w2]= b[2*x + 1];
1037 for(x=1; x+1<width2; x+=2){
1041 A2 += (A1 + A3 + 2)>>2;
1045 A1= temp[x+1+width2];
1048 A4 += (A1 + A3 + 2)>>2;
1054 A2 += (A1 + A3 + 2)>>2;
1059 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1060 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1064 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1067 for(i=0; i<width; i++){
1068 b1[i] -= (b0[i] + b2[i])>>1;
1072 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] += (b0[i] + b2[i] + 2)>>2;
1080 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1082 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1083 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1085 for(y=-2; y<height; y+=2){
1086 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1087 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1090 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1091 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1092 STOP_TIMER("horizontal_decompose53i")}
1095 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1096 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1097 STOP_TIMER("vertical_decompose53i*")}
1104 static void horizontal_decompose97i(DWTELEM *b, int width){
1105 DWTELEM temp[width];
1106 const int w2= (width+1)>>1;
1108 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1109 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1110 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1111 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1115 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1118 for(i=0; i<width; i++){
1119 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1123 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126 for(i=0; i<width; i++){
1128 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1130 int r= 3*(b0[i] + b2[i]);
1133 b1[i] += (r+W_CO)>>W_CS;
1138 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141 for(i=0; i<width; i++){
1143 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1145 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1150 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1153 for(i=0; i<width; i++){
1154 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1158 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1160 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1161 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1162 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1163 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1165 for(y=-4; y<height; y+=2){
1166 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1167 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1170 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1171 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1173 STOP_TIMER("horizontal_decompose97i")
1177 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1178 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1179 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1180 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1183 STOP_TIMER("vertical_decompose97i")
1193 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1196 for(level=0; level<decomposition_count; level++){
1198 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1199 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1200 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1205 static void horizontal_compose53i(DWTELEM *b, int width){
1206 DWTELEM temp[width];
1207 const int width2= width>>1;
1208 const int w2= (width+1)>>1;
1220 for(x=1; x+1<width2; x+=2){
1224 A2 += (A1 + A3 + 2)>>2;
1228 A1= temp[x+1+width2];
1231 A4 += (A1 + A3 + 2)>>2;
1237 A2 += (A1 + A3 + 2)>>2;
1241 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1242 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1244 for(x=0; x<width2; x++){
1246 b[2*x + 1]= temp[x+w2];
1252 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1255 for(i=0; i<width; i++){
1256 b1[i] += (b0[i] + b2[i])>>1;
1260 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263 for(i=0; i<width; i++){
1264 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1268 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1269 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1270 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1274 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1275 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1276 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1280 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1283 DWTELEM *b0= cs->b0;
1284 DWTELEM *b1= cs->b1;
1285 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1286 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1289 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1290 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1291 STOP_TIMER("vertical_compose53i*")}
1294 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1295 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1296 STOP_TIMER("horizontal_compose53i")}
1303 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1305 DWTELEM *b0= cs->b0;
1306 DWTELEM *b1= cs->b1;
1307 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1308 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1311 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1312 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1313 STOP_TIMER("vertical_compose53i*")}
1316 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1317 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1318 STOP_TIMER("horizontal_compose53i")}
1325 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1327 spatial_compose53i_init(&cs, buffer, height, stride);
1328 while(cs.y <= height)
1329 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1333 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1334 DWTELEM temp[width];
1335 const int w2= (width+1)>>1;
1337 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1338 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1339 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1340 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1343 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1346 for(i=0; i<width; i++){
1347 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1351 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354 for(i=0; i<width; i++){
1356 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1358 int r= 3*(b0[i] + b2[i]);
1361 b1[i] -= (r+W_CO)>>W_CS;
1366 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1369 for(i=0; i<width; i++){
1371 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1373 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1378 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1381 for(i=0; i<width; i++){
1382 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1386 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1389 for(i=0; i<width; i++){
1393 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1395 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1397 r= 3*(b2[i] + b4[i]);
1400 b3[i] -= (r+W_CO)>>W_CS;
1403 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1405 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1407 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1411 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1412 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1413 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1414 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1415 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1419 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1420 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1421 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1422 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1423 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1427 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1430 DWTELEM *b0= cs->b0;
1431 DWTELEM *b1= cs->b1;
1432 DWTELEM *b2= cs->b2;
1433 DWTELEM *b3= cs->b3;
1434 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1435 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1438 if(y>0 && y+4<height){
1439 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1441 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1442 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1443 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1444 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1447 STOP_TIMER("vertical_compose97i")}}
1450 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1451 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1452 if(width>400 && y+0<(unsigned)height){
1453 STOP_TIMER("horizontal_compose97i")}}
1462 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1464 DWTELEM *b0= cs->b0;
1465 DWTELEM *b1= cs->b1;
1466 DWTELEM *b2= cs->b2;
1467 DWTELEM *b3= cs->b3;
1468 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1469 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1472 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1473 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1474 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1475 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1477 STOP_TIMER("vertical_compose97i")}}
1480 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1481 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1482 if(width>400 && b0 <= b2){
1483 STOP_TIMER("horizontal_compose97i")}}
1492 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1494 spatial_compose97i_init(&cs, buffer, height, stride);
1495 while(cs.y <= height)
1496 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1499 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1501 for(level=decomposition_count-1; level>=0; level--){
1503 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1504 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1505 /* not slicified yet */
1506 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1507 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1512 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1514 for(level=decomposition_count-1; level>=0; level--){
1516 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1517 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1518 /* not slicified yet */
1519 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1524 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1525 const int support = type==1 ? 3 : 5;
1529 for(level=decomposition_count-1; level>=0; level--){
1530 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1532 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1534 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1542 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1543 const int support = type==1 ? 3 : 5;
1547 for(level=decomposition_count-1; level>=0; level--){
1548 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1550 case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1552 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1560 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1563 for(level=decomposition_count-1; level>=0; level--)
1564 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1566 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1568 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1569 for(y=0; y<height; y+=4)
1570 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1574 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1575 const int w= b->width;
1576 const int h= b->height;
1588 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1589 v= src[x + y*stride];
1592 t= src[x + (y-1)*stride];
1594 lt= src[x - 1 + (y-1)*stride];
1597 rt= src[x + 1 + (y-1)*stride];
1601 l= src[x - 1 + y*stride];
1603 if(orientation==1) ll= src[y + (x-2)*stride];
1604 else ll= src[x - 2 + y*stride];
1610 if(px<b->parent->width && py<b->parent->height)
1611 p= parent[px + py*2*stride];
1613 if(!(/*ll|*/l|lt|t|rt|p)){
1615 runs[run_index++]= run;
1623 max_index= run_index;
1624 runs[run_index++]= run;
1626 run= runs[run_index++];
1628 put_symbol2(&s->c, b->state[30], max_index, 0);
1629 if(run_index <= max_index)
1630 put_symbol2(&s->c, b->state[1], run, 3);
1633 if(s->c.bytestream_end - s->c.bytestream < w*40){
1634 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1639 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1640 v= src[x + y*stride];
1643 t= src[x + (y-1)*stride];
1645 lt= src[x - 1 + (y-1)*stride];
1648 rt= src[x + 1 + (y-1)*stride];
1652 l= src[x - 1 + y*stride];
1654 if(orientation==1) ll= src[y + (x-2)*stride];
1655 else ll= src[x - 2 + y*stride];
1661 if(px<b->parent->width && py<b->parent->height)
1662 p= parent[px + py*2*stride];
1664 if(/*ll|*/l|lt|t|rt|p){
1665 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1667 put_rac(&s->c, &b->state[0][context], !!v);
1670 run= runs[run_index++];
1672 if(run_index <= max_index)
1673 put_symbol2(&s->c, b->state[1], run, 3);
1681 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1682 int l2= 2*ABS(l) + (l<0);
1683 int t2= 2*ABS(t) + (t<0);
1685 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1686 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1694 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1695 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1696 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1697 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1698 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1701 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1702 const int w= b->width;
1703 const int h= b->height;
1708 x_and_coeff *xc= b->x_coeff;
1709 x_and_coeff *prev_xc= NULL;
1710 x_and_coeff *prev2_xc= xc;
1711 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1712 x_and_coeff *prev_parent_xc= parent_xc;
1714 runs= get_symbol2(&s->c, b->state[30], 0);
1715 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1720 int lt=0, t=0, rt=0;
1722 if(y && prev_xc->x == 0){
1734 if(prev_xc->x == x + 1)
1740 if(x>>1 > parent_xc->x){
1743 if(x>>1 == parent_xc->x){
1744 p= parent_xc->coeff;
1747 if(/*ll|*/l|lt|t|rt|p){
1748 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1750 v=get_rac(&s->c, &b->state[0][context]);
1752 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1753 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1760 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1762 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1763 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1772 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1773 else max_run= FFMIN(run, w-x-1);
1775 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1781 (xc++)->x= w+1; //end marker
1787 while(parent_xc->x != parent->width+1)
1790 prev_parent_xc= parent_xc;
1792 parent_xc= prev_parent_xc;
1797 (xc++)->x= w+1; //end marker
1801 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1802 const int w= b->width;
1804 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1805 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1806 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1811 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1816 /* If we are on the second or later slice, restore our index. */
1818 new_index = save_state[0];
1821 for(y=start_y; y<h; y++){
1824 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1825 memset(line, 0, b->width*sizeof(DWTELEM));
1826 v = b->x_coeff[new_index].coeff;
1827 x = b->x_coeff[new_index++].x;
1830 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1831 register int u= -(v&1);
1832 line[x] = (t^u) - u;
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1838 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1839 STOP_TIMER("decode_subband")
1842 /* Save our variables for the next slice. */
1843 save_state[0] = new_index;
1848 static void reset_contexts(SnowContext *s){
1849 int plane_index, level, orientation;
1851 for(plane_index=0; plane_index<3; plane_index++){
1852 for(level=0; level<s->spatial_decomposition_count; level++){
1853 for(orientation=level ? 1:0; orientation<4; orientation++){
1854 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1858 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1859 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1862 static int alloc_blocks(SnowContext *s){
1863 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1864 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1869 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1873 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1874 uint8_t *bytestream= d->bytestream;
1875 uint8_t *bytestream_start= d->bytestream_start;
1877 d->bytestream= bytestream;
1878 d->bytestream_start= bytestream_start;
1881 //near copy & paste from dsputil, FIXME
1882 static int pix_sum(uint8_t * pix, int line_size, int w)
1887 for (i = 0; i < w; i++) {
1888 for (j = 0; j < w; j++) {
1892 pix += line_size - w;
1897 //near copy & paste from dsputil, FIXME
1898 static int pix_norm1(uint8_t * pix, int line_size, int w)
1901 uint32_t *sq = squareTbl + 256;
1904 for (i = 0; i < w; i++) {
1905 for (j = 0; j < w; j ++) {
1909 pix += line_size - w;
1914 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1915 const int w= s->b_width << s->block_max_depth;
1916 const int rem_depth= s->block_max_depth - level;
1917 const int index= (x + y*w) << rem_depth;
1918 const int block_w= 1<<rem_depth;
1931 for(j=0; j<block_w; j++){
1932 for(i=0; i<block_w; i++){
1933 s->block[index + i + j*w]= block;
1938 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1939 const int offset[3]= {
1941 ((y*c->uvstride + x)>>1),
1942 ((y*c->uvstride + x)>>1),
1946 c->src[0][i]= src [i];
1947 c->ref[0][i]= ref [i] + offset[i];
1955 #define P_TOPRIGHT P[3]
1956 #define P_MEDIAN P[4]
1958 #define FLAG_QPEL 1 //must be 1
1960 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1961 uint8_t p_buffer[1024];
1962 uint8_t i_buffer[1024];
1963 uint8_t p_state[sizeof(s->block_state)];
1964 uint8_t i_state[sizeof(s->block_state)];
1966 uint8_t *pbbak= s->c.bytestream;
1967 uint8_t *pbbak_start= s->c.bytestream_start;
1968 int score, score2, iscore, i_len, p_len, block_s, sum;
1969 const int w= s->b_width << s->block_max_depth;
1970 const int h= s->b_height << s->block_max_depth;
1971 const int rem_depth= s->block_max_depth - level;
1972 const int index= (x + y*w) << rem_depth;
1973 const int block_w= 1<<(LOG2_MB_SIZE - level);
1974 int trx= (x+1)<<rem_depth;
1975 int try= (y+1)<<rem_depth;
1976 BlockNode *left = x ? &s->block[index-1] : &null_block;
1977 BlockNode *top = y ? &s->block[index-w] : &null_block;
1978 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1979 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1980 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1981 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1982 int pl = left->color[0];
1983 int pcb= left->color[1];
1984 int pcr= left->color[2];
1985 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1986 int pmy= mid_pred(left->my, top->my, tr->my);
1989 const int stride= s->current_picture.linesize[0];
1990 const int uvstride= s->current_picture.linesize[1];
1991 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1992 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1993 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1995 int16_t last_mv[3][2];
1996 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1997 const int shift= 1+qpel;
1998 MotionEstContext *c= &s->m.me;
1999 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2000 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2001 int my_context= av_log2(2*ABS(left->my - top->my));
2002 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2003 int ref, best_ref, ref_score, ref_mx, ref_my;
2005 assert(sizeof(s->block_state) >= 256);
2007 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
2011 // clip predictors / edge ?
2013 P_LEFT[0]= left->mx;
2014 P_LEFT[1]= left->my;
2017 P_TOPRIGHT[0]= tr->mx;
2018 P_TOPRIGHT[1]= tr->my;
2020 last_mv[0][0]= s->block[index].mx;
2021 last_mv[0][1]= s->block[index].my;
2022 last_mv[1][0]= right->mx;
2023 last_mv[1][1]= right->my;
2024 last_mv[2][0]= bottom->mx;
2025 last_mv[2][1]= bottom->my;
2032 assert(s->m.me. stride == stride);
2033 assert(s->m.me.uvstride == uvstride);
2035 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2036 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2037 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2038 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2040 c->xmin = - x*block_w - 16+2;
2041 c->ymin = - y*block_w - 16+2;
2042 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2043 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2045 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2046 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2047 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2048 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2049 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2050 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2051 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2053 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2054 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2057 c->pred_x= P_LEFT[0];
2058 c->pred_y= P_LEFT[1];
2060 c->pred_x = P_MEDIAN[0];
2061 c->pred_y = P_MEDIAN[1];
2066 for(ref=0; ref<s->ref_frames; ref++){
2067 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2069 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2070 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2072 assert(ref_mx >= c->xmin);
2073 assert(ref_mx <= c->xmax);
2074 assert(ref_my >= c->ymin);
2075 assert(ref_my <= c->ymax);
2077 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2078 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2079 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2080 if(s->ref_mvs[ref]){
2081 s->ref_mvs[ref][index][0]= ref_mx;
2082 s->ref_mvs[ref][index][1]= ref_my;
2083 s->ref_scores[ref][index]= ref_score;
2085 if(score > ref_score){
2092 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2096 pc.bytestream_start=
2097 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2098 memcpy(p_state, s->block_state, sizeof(s->block_state));
2100 if(level!=s->block_max_depth)
2101 put_rac(&pc, &p_state[4 + s_context], 1);
2102 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2103 if(s->ref_frames > 1)
2104 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2105 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2106 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2107 p_len= pc.bytestream - pc.bytestream_start;
2108 score += (s->lambda2*(p_len*8
2109 + (pc.outstanding_count - s->c.outstanding_count)*8
2110 + (-av_log2(pc.range) + av_log2(s->c.range))
2111 ))>>FF_LAMBDA_SHIFT;
2113 block_s= block_w*block_w;
2114 sum = pix_sum(current_data[0], stride, block_w);
2115 l= (sum + block_s/2)/block_s;
2116 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2118 block_s= block_w*block_w>>2;
2119 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2120 cb= (sum + block_s/2)/block_s;
2121 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2122 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2123 cr= (sum + block_s/2)/block_s;
2124 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2127 ic.bytestream_start=
2128 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2129 memcpy(i_state, s->block_state, sizeof(s->block_state));
2130 if(level!=s->block_max_depth)
2131 put_rac(&ic, &i_state[4 + s_context], 1);
2132 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2133 put_symbol(&ic, &i_state[32], l-pl , 1);
2134 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2135 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2136 i_len= ic.bytestream - ic.bytestream_start;
2137 iscore += (s->lambda2*(i_len*8
2138 + (ic.outstanding_count - s->c.outstanding_count)*8
2139 + (-av_log2(ic.range) + av_log2(s->c.range))
2140 ))>>FF_LAMBDA_SHIFT;
2142 // assert(score==256*256*256*64-1);
2143 assert(iscore < 255*255*256 + s->lambda2*10);
2144 assert(iscore >= 0);
2145 assert(l>=0 && l<=255);
2146 assert(pl>=0 && pl<=255);
2149 int varc= iscore >> 8;
2150 int vard= score >> 8;
2151 if (vard <= 64 || vard < varc)
2152 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2154 c->scene_change_score+= s->m.qscale;
2157 if(level!=s->block_max_depth){
2158 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2159 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2160 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2161 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2162 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2163 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2165 if(score2 < score && score2 < iscore)
2170 memcpy(pbbak, i_buffer, i_len);
2172 s->c.bytestream_start= pbbak_start;
2173 s->c.bytestream= pbbak + i_len;
2174 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2175 memcpy(s->block_state, i_state, sizeof(s->block_state));
2178 memcpy(pbbak, p_buffer, p_len);
2180 s->c.bytestream_start= pbbak_start;
2181 s->c.bytestream= pbbak + p_len;
2182 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2183 memcpy(s->block_state, p_state, sizeof(s->block_state));
2188 static always_inline int same_block(BlockNode *a, BlockNode *b){
2189 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2190 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2192 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2196 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2197 const int w= s->b_width << s->block_max_depth;
2198 const int rem_depth= s->block_max_depth - level;
2199 const int index= (x + y*w) << rem_depth;
2200 int trx= (x+1)<<rem_depth;
2201 BlockNode *b= &s->block[index];
2202 BlockNode *left = x ? &s->block[index-1] : &null_block;
2203 BlockNode *top = y ? &s->block[index-w] : &null_block;
2204 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2205 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2206 int pl = left->color[0];
2207 int pcb= left->color[1];
2208 int pcr= left->color[2];
2209 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2210 int pmy= mid_pred(left->my, top->my, tr->my);
2211 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2212 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2213 int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2214 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2217 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
2221 if(level!=s->block_max_depth){
2222 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2223 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2225 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2226 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2227 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2228 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2229 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2233 if(b->type & BLOCK_INTRA){
2234 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2235 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2236 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2237 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2238 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2240 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2241 if(s->ref_frames > 1)
2242 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2243 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2244 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2245 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2249 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2250 const int w= s->b_width << s->block_max_depth;
2251 const int rem_depth= s->block_max_depth - level;
2252 const int index= (x + y*w) << rem_depth;
2253 int trx= (x+1)<<rem_depth;
2254 BlockNode *left = x ? &s->block[index-1] : &null_block;
2255 BlockNode *top = y ? &s->block[index-w] : &null_block;
2256 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2257 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2258 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2261 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2265 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2267 int l = left->color[0];
2268 int cb= left->color[1];
2269 int cr= left->color[2];
2270 int mx= mid_pred(left->mx, top->mx, tr->mx);
2271 int my= mid_pred(left->my, top->my, tr->my);
2273 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2274 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2275 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2277 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2280 l += get_symbol(&s->c, &s->block_state[32], 1);
2281 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2282 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2284 if(s->ref_frames > 1)
2285 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2286 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2287 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2289 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2291 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2292 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2293 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2294 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2298 static void encode_blocks(SnowContext *s){
2303 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2307 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2308 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2312 if(s->avctx->me_method == ME_ITER)
2313 encode_q_branch2(s, 0, x, y);
2315 encode_q_branch (s, 0, x, y);
2320 static void decode_blocks(SnowContext *s){
2327 decode_q_branch(s, 0, x, y);
2332 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2335 for(y=0; y < b_h+5; y++){
2336 for(x=0; x < b_w; x++){
2343 // int am= 9*(a1+a2) - (a0+a3);
2344 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2345 // int am= 18*(a2+a3) - 2*(a1+a4);
2346 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2347 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2349 // if(b_w==16) am= 8*(a1+a2);
2351 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2352 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2354 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2355 if(am&(~255)) am= ~(am>>31);
2359 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2360 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2361 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2362 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2367 tmp -= (b_h+5)*stride;
2369 for(y=0; y < b_h; y++){
2370 for(x=0; x < b_w; x++){
2371 int a0= tmp[x + 0*stride];
2372 int a1= tmp[x + 1*stride];
2373 int a2= tmp[x + 2*stride];
2374 int a3= tmp[x + 3*stride];
2375 int a4= tmp[x + 4*stride];
2376 int a5= tmp[x + 5*stride];
2377 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2378 // int am= 18*(a2+a3) - 2*(a1+a4);
2379 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2380 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2382 // if(b_w==16) am= 8*(a1+a2);
2384 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2385 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2387 if(am&(~255)) am= ~(am>>31);
2390 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2391 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2392 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2393 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2398 STOP_TIMER("mc_block")
2401 #define mca(dx,dy,b_w)\
2402 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2403 uint8_t tmp[stride*(b_w+5)];\
2405 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2417 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2418 if(block->type & BLOCK_INTRA){
2420 const int color = block->color[plane_index];
2421 const int color4= color*0x01010101;
2423 for(y=0; y < b_h; y++){
2424 *(uint32_t*)&dst[0 + y*stride]= color4;
2425 *(uint32_t*)&dst[4 + y*stride]= color4;
2426 *(uint32_t*)&dst[8 + y*stride]= color4;
2427 *(uint32_t*)&dst[12+ y*stride]= color4;
2428 *(uint32_t*)&dst[16+ y*stride]= color4;
2429 *(uint32_t*)&dst[20+ y*stride]= color4;
2430 *(uint32_t*)&dst[24+ y*stride]= color4;
2431 *(uint32_t*)&dst[28+ y*stride]= color4;
2434 for(y=0; y < b_h; y++){
2435 *(uint32_t*)&dst[0 + y*stride]= color4;
2436 *(uint32_t*)&dst[4 + y*stride]= color4;
2437 *(uint32_t*)&dst[8 + y*stride]= color4;
2438 *(uint32_t*)&dst[12+ y*stride]= color4;
2441 for(y=0; y < b_h; y++){
2442 *(uint32_t*)&dst[0 + y*stride]= color4;
2443 *(uint32_t*)&dst[4 + y*stride]= color4;
2446 for(y=0; y < b_h; y++){
2447 *(uint32_t*)&dst[0 + y*stride]= color4;
2450 for(y=0; y < b_h; y++){
2451 for(x=0; x < b_w; x++){
2452 dst[x + y*stride]= color;
2457 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2458 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2459 int mx= block->mx*scale;
2460 int my= block->my*scale;
2461 const int dx= mx&15;
2462 const int dy= my&15;
2463 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2466 src += sx + sy*stride;
2467 if( (unsigned)sx >= w - b_w - 4
2468 || (unsigned)sy >= h - b_h - 4){
2469 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2472 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2473 // assert(!(b_w&(b_w-1)));
2474 assert(b_w>1 && b_h>1);
2475 assert(tab_index>=0 && tab_index<4 || b_w==32);
2476 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2477 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2480 for(y=0; y<b_h; y+=16){
2481 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2482 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2485 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2486 else if(b_w==2*b_h){
2487 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2488 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2491 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2492 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2497 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2498 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2501 for(y=0; y<b_h; y++){
2502 //FIXME ugly missue of obmc_stride
2503 uint8_t *obmc1= obmc + y*obmc_stride;
2504 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2505 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2506 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2507 dst = slice_buffer_get_line(sb, src_y + y);
2508 for(x=0; x<b_w; x++){
2509 int v= obmc1[x] * block[3][x + y*src_stride]
2510 +obmc2[x] * block[2][x + y*src_stride]
2511 +obmc3[x] * block[1][x + y*src_stride]
2512 +obmc4[x] * block[0][x + y*src_stride];
2514 v <<= 8 - LOG2_OBMC_MAX;
2516 v += 1<<(7 - FRAC_BITS);
2517 v >>= 8 - FRAC_BITS;
2520 v += dst[x + src_x];
2521 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2522 if(v&(~255)) v= ~(v>>31);
2523 dst8[x + y*src_stride] = v;
2525 dst[x + src_x] -= v;
2531 //FIXME name clenup (b_w, block_w, b_width stuff)
2532 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2533 DWTELEM * dst = NULL;
2534 const int b_width = s->b_width << s->block_max_depth;
2535 const int b_height= s->b_height << s->block_max_depth;
2536 const int b_stride= b_width;
2537 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2538 BlockNode *rt= lt+1;
2539 BlockNode *lb= lt+b_stride;
2540 BlockNode *rb= lb+1;
2542 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2543 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2550 }else if(b_x + 1 >= b_width){
2557 }else if(b_y + 1 >= b_height){
2562 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2566 }else if(src_x + b_w > w){
2570 obmc -= src_y*obmc_stride;
2573 }else if(src_y + b_h> h){
2577 if(b_w<=0 || b_h<=0) return;
2579 assert(src_stride > 2*MB_SIZE + 5);
2580 // old_dst += src_x + src_y*dst_stride;
2581 dst8+= src_x + src_y*src_stride;
2582 // src += src_x + src_y*src_stride;
2584 ptmp= tmp + 3*tmp_step;
2587 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2589 if(same_block(lt, rt)){
2594 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2597 if(same_block(lt, lb)){
2599 }else if(same_block(rt, lb)){
2604 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2607 if(same_block(lt, rb) ){
2609 }else if(same_block(rt, rb)){
2611 }else if(same_block(lb, rb)){
2615 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2618 for(y=0; y<b_h; y++){
2619 for(x=0; x<b_w; x++){
2620 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2621 if(add) dst[x + y*dst_stride] += v;
2622 else dst[x + y*dst_stride] -= v;
2625 for(y=0; y<b_h; y++){
2626 uint8_t *obmc2= obmc + (obmc_stride>>1);
2627 for(x=0; x<b_w; x++){
2628 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2629 if(add) dst[x + y*dst_stride] += v;
2630 else dst[x + y*dst_stride] -= v;
2633 for(y=0; y<b_h; y++){
2634 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2635 for(x=0; x<b_w; x++){
2636 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2637 if(add) dst[x + y*dst_stride] += v;
2638 else dst[x + y*dst_stride] -= v;
2641 for(y=0; y<b_h; y++){
2642 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2643 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2655 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2656 STOP_TIMER("Inner add y block")
2661 //FIXME name clenup (b_w, block_w, b_width stuff)
2662 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2663 const int b_width = s->b_width << s->block_max_depth;
2664 const int b_height= s->b_height << s->block_max_depth;
2665 const int b_stride= b_width;
2666 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2667 BlockNode *rt= lt+1;
2668 BlockNode *lb= lt+b_stride;
2669 BlockNode *rb= lb+1;
2671 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2672 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2679 }else if(b_x + 1 >= b_width){
2686 }else if(b_y + 1 >= b_height){
2691 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2697 }else if(src_x + b_w > w){
2701 obmc -= src_y*obmc_stride;
2704 dst -= src_y*dst_stride;
2706 }else if(src_y + b_h> h){
2710 if(b_w<=0 || b_h<=0) return;
2712 assert(src_stride > 2*MB_SIZE + 5);
2714 dst += src_x + src_y*dst_stride;
2715 dst8+= src_x + src_y*src_stride;
2716 // src += src_x + src_y*src_stride;
2718 ptmp= tmp + 3*tmp_step;
2721 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2723 if(same_block(lt, rt)){
2728 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2731 if(same_block(lt, lb)){
2733 }else if(same_block(rt, lb)){
2738 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2741 if(same_block(lt, rb) ){
2743 }else if(same_block(rt, rb)){
2745 }else if(same_block(lb, rb)){
2749 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2752 for(y=0; y<b_h; y++){
2753 for(x=0; x<b_w; x++){
2754 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2755 if(add) dst[x + y*dst_stride] += v;
2756 else dst[x + y*dst_stride] -= v;
2759 for(y=0; y<b_h; y++){
2760 uint8_t *obmc2= obmc + (obmc_stride>>1);
2761 for(x=0; x<b_w; x++){
2762 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2763 if(add) dst[x + y*dst_stride] += v;
2764 else dst[x + y*dst_stride] -= v;
2767 for(y=0; y<b_h; y++){
2768 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2769 for(x=0; x<b_w; x++){
2770 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2771 if(add) dst[x + y*dst_stride] += v;
2772 else dst[x + y*dst_stride] -= v;
2775 for(y=0; y<b_h; y++){
2776 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2777 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2778 for(x=0; x<b_w; x++){
2779 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2780 if(add) dst[x + y*dst_stride] += v;
2781 else dst[x + y*dst_stride] -= v;
2785 for(y=0; y<b_h; y++){
2786 //FIXME ugly missue of obmc_stride
2787 uint8_t *obmc1= obmc + y*obmc_stride;
2788 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2789 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2790 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2791 for(x=0; x<b_w; x++){
2792 int v= obmc1[x] * block[3][x + y*src_stride]
2793 +obmc2[x] * block[2][x + y*src_stride]
2794 +obmc3[x] * block[1][x + y*src_stride]
2795 +obmc4[x] * block[0][x + y*src_stride];
2797 v <<= 8 - LOG2_OBMC_MAX;
2799 v += 1<<(7 - FRAC_BITS);
2800 v >>= 8 - FRAC_BITS;
2803 v += dst[x + y*dst_stride];
2804 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2805 if(v&(~255)) v= ~(v>>31);
2806 dst8[x + y*src_stride] = v;
2808 dst[x + y*dst_stride] -= v;
2815 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2816 Plane *p= &s->plane[plane_index];
2817 const int mb_w= s->b_width << s->block_max_depth;
2818 const int mb_h= s->b_height << s->block_max_depth;
2820 int block_size = MB_SIZE >> s->block_max_depth;
2821 int block_w = plane_index ? block_size/2 : block_size;
2822 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2823 int obmc_stride= plane_index ? block_size : 2*block_size;
2824 int ref_stride= s->current_picture.linesize[plane_index];
2825 uint8_t *dst8= s->current_picture.data[plane_index];
2830 if(s->keyframe || (s->avctx->debug&512)){
2835 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2837 // DWTELEM * line = slice_buffer_get_line(sb, y);
2838 DWTELEM * line = sb->line[y];
2841 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2842 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2844 if(v&(~255)) v= ~(v>>31);
2845 dst8[x + y*ref_stride]= v;
2849 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2851 // DWTELEM * line = slice_buffer_get_line(sb, y);
2852 DWTELEM * line = sb->line[y];
2855 line[x] -= 128 << FRAC_BITS;
2856 // buf[x + y*w]-= 128<<FRAC_BITS;
2864 for(mb_x=0; mb_x<=mb_w; mb_x++){
2867 add_yblock_buffered(s, sb, old_buffer, dst8, obmc,
2868 block_w*mb_x - block_w/2,
2869 block_w*mb_y - block_w/2,
2872 w, ref_stride, obmc_stride,
2876 STOP_TIMER("add_yblock")
2879 STOP_TIMER("predict_slice")
2882 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2883 Plane *p= &s->plane[plane_index];
2884 const int mb_w= s->b_width << s->block_max_depth;
2885 const int mb_h= s->b_height << s->block_max_depth;
2887 int block_size = MB_SIZE >> s->block_max_depth;
2888 int block_w = plane_index ? block_size/2 : block_size;
2889 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2890 const int obmc_stride= plane_index ? block_size : 2*block_size;
2891 int ref_stride= s->current_picture.linesize[plane_index];
2892 uint8_t *dst8= s->current_picture.data[plane_index];
2897 if(s->keyframe || (s->avctx->debug&512)){
2902 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2904 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2906 if(v&(~255)) v= ~(v>>31);
2907 dst8[x + y*ref_stride]= v;
2911 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2913 buf[x + y*w]-= 128<<FRAC_BITS;
2921 for(mb_x=0; mb_x<=mb_w; mb_x++){
2924 add_yblock(s, buf, dst8, obmc,
2925 block_w*mb_x - block_w/2,
2926 block_w*mb_y - block_w/2,
2929 w, ref_stride, obmc_stride,
2931 add, 1, plane_index);
2933 STOP_TIMER("add_yblock")
2936 STOP_TIMER("predict_slice")
2939 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2940 const int mb_h= s->b_height << s->block_max_depth;
2942 for(mb_y=0; mb_y<=mb_h; mb_y++)
2943 predict_slice(s, buf, plane_index, add, mb_y);
2946 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2948 Plane *p= &s->plane[plane_index];
2949 const int block_size = MB_SIZE >> s->block_max_depth;
2950 const int block_w = plane_index ? block_size/2 : block_size;
2951 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2952 const int obmc_stride= plane_index ? block_size : 2*block_size;
2953 const int ref_stride= s->current_picture.linesize[plane_index];
2954 uint8_t *src= s-> input_picture.data[plane_index];
2955 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2956 const int b_stride = s->b_width << s->block_max_depth;
2957 const int w= p->width;
2958 const int h= p->height;
2959 int index= mb_x + mb_y*b_stride;
2960 BlockNode *b= &s->block[index];
2961 BlockNode backup= *b;
2965 b->type|= BLOCK_INTRA;
2966 b->color[plane_index]= 0;
2967 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2970 int mb_x2= mb_x + (i &1) - 1;
2971 int mb_y2= mb_y + (i>>1) - 1;
2972 int x= block_w*mb_x2 + block_w/2;
2973 int y= block_w*mb_y2 + block_w/2;
2975 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2976 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2978 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2979 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2980 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2981 int obmc_v= obmc[index];
2983 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2984 if(x<0) obmc_v += obmc[index + block_w];
2985 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2986 if(x+block_w>w) obmc_v += obmc[index - block_w];
2987 //FIXME precalc this or simplify it somehow else
2989 d = -dst[index] + (1<<(FRAC_BITS-1));
2991 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2992 aa += obmc_v * obmc_v; //FIXME precalclate this
2998 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3001 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3002 const int b_stride = s->b_width << s->block_max_depth;
3003 const int b_height = s->b_height<< s->block_max_depth;
3004 int index= x + y*b_stride;
3005 BlockNode *b = &s->block[index];
3006 BlockNode *left = x ? &s->block[index-1] : &null_block;
3007 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3008 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3009 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3011 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3012 // int my_context= av_log2(2*ABS(left->my - top->my));
3014 if(x<0 || x>=b_stride || y>=b_height)
3016 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3017 dmy= b->my - mid_pred(left->my, top->my, tr->my);
3023 00001XXXX 15-30 8-15
3025 //FIXME try accurate rate
3026 //FIXME intra and inter predictors if surrounding blocks arent the same type
3027 if(b->type & BLOCK_INTRA){
3028 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3029 + av_log2(2*ABS(left->color[1] - b->color[1]))
3030 + av_log2(2*ABS(left->color[2] - b->color[2])));
3032 return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
3033 + av_log2(2*ABS(dmy))
3034 + av_log2(2*b->ref));
3037 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3038 Plane *p= &s->plane[plane_index];
3039 const int block_size = MB_SIZE >> s->block_max_depth;
3040 const int block_w = plane_index ? block_size/2 : block_size;
3041 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3042 const int obmc_stride= plane_index ? block_size : 2*block_size;
3043 const int ref_stride= s->current_picture.linesize[plane_index];
3044 uint8_t *dst= s->current_picture.data[plane_index];
3045 uint8_t *src= s-> input_picture.data[plane_index];
3046 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3047 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3048 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3049 const int b_stride = s->b_width << s->block_max_depth;
3050 const int b_height = s->b_height<< s->block_max_depth;
3051 const int w= p->width;
3052 const int h= p->height;
3055 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3056 int sx= block_w*mb_x - block_w/2;
3057 int sy= block_w*mb_y - block_w/2;
3058 int x0= FFMAX(0,-sx);
3059 int y0= FFMAX(0,-sy);
3060 int x1= FFMIN(block_w*2, w-sx);
3061 int y1= FFMIN(block_w*2, h-sy);
3064 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3066 for(y=y0; y<y1; y++){
3067 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3068 const DWTELEM *pred1 = pred + y*obmc_stride;
3069 uint8_t *cur1 = cur + y*ref_stride;
3070 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3071 for(x=x0; x<x1; x++){
3072 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3073 v = (v + pred1[x]) >> FRAC_BITS;
3074 if(v&(~255)) v= ~(v>>31);
3079 /* copy the regions where obmc[] = (uint8_t)256 */
3080 if(LOG2_OBMC_MAX == 8
3081 && (mb_x == 0 || mb_x == b_stride-1)
3082 && (mb_y == 0 || mb_y == b_height-1)){
3091 for(y=y0; y<y1; y++)
3092 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3096 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3097 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3098 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3099 * so improving the score of one block is not strictly guaranteed to
3100 * improve the score of the whole frame, so iterative motion est
3101 * doesn't always converge. */
3102 if(s->avctx->me_cmp == FF_CMP_W97)
3103 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3104 else if(s->avctx->me_cmp == FF_CMP_W53)
3105 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3109 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3110 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3115 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3124 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3126 if(mb_x == b_stride-2)
3127 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3129 return distortion + rate*penalty_factor;
3132 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3134 Plane *p= &s->plane[plane_index];
3135 const int block_size = MB_SIZE >> s->block_max_depth;
3136 const int block_w = plane_index ? block_size/2 : block_size;
3137 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3138 const int obmc_stride= plane_index ? block_size : 2*block_size;
3139 const int ref_stride= s->current_picture.linesize[plane_index];
3140 uint8_t *dst= s->current_picture.data[plane_index];
3141 uint8_t *src= s-> input_picture.data[plane_index];
3142 const static DWTELEM zero_dst[4096]; //FIXME
3143 const int b_stride = s->b_width << s->block_max_depth;
3144 const int b_height = s->b_height<< s->block_max_depth;
3145 const int w= p->width;
3146 const int h= p->height;
3149 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3152 int mb_x2= mb_x + (i%3) - 1;
3153 int mb_y2= mb_y + (i/3) - 1;
3154 int x= block_w*mb_x2 + block_w/2;
3155 int y= block_w*mb_y2 + block_w/2;
3157 add_yblock(s, zero_dst, dst, obmc,
3158 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3160 //FIXME find a cleaner/simpler way to skip the outside stuff
3161 for(y2= y; y2<0; y2++)
3162 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3163 for(y2= h; y2<y+block_w; y2++)
3164 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3166 for(y2= y; y2<y+block_w; y2++)
3167 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3170 for(y2= y; y2<y+block_w; y2++)
3171 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3174 assert(block_w== 8 || block_w==16);
3175 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3179 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3180 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3188 rate = get_block_bits(s, mb_x, mb_y, 2);
3189 for(i=merged?4:0; i<9; i++){
3190 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3191 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3194 return distortion + rate*penalty_factor;
3197 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3198 const int b_stride= s->b_width << s->block_max_depth;
3199 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3200 BlockNode backup= *block;
3201 int rd, index, value;
3203 assert(mb_x>=0 && mb_y>=0);
3204 assert(mb_x<b_stride);
3207 block->color[0] = p[0];
3208 block->color[1] = p[1];
3209 block->color[2] = p[2];
3210 block->type |= BLOCK_INTRA;
3212 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3213 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3214 if(s->me_cache[index] == value)
3216 s->me_cache[index]= value;
3220 block->type &= ~BLOCK_INTRA;
3223 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3235 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3236 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3237 int p[2] = {p0, p1};
3238 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3241 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3242 const int b_stride= s->b_width << s->block_max_depth;
3243 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3244 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3245 int rd, index, value;
3247 assert(mb_x>=0 && mb_y>=0);
3248 assert(mb_x<b_stride);
3249 assert(((mb_x|mb_y)&1) == 0);
3251 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3252 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3253 if(s->me_cache[index] == value)
3255 s->me_cache[index]= value;
3260 block->type &= ~BLOCK_INTRA;
3261 block[1]= block[b_stride]= block[b_stride+1]= *block;
3263 rd= get_4block_rd(s, mb_x, mb_y, 0);
3270 block[0]= backup[0];
3271 block[1]= backup[1];
3272 block[b_stride]= backup[2];
3273 block[b_stride+1]= backup[3];
3278 static void iterative_me(SnowContext *s){
3279 int pass, mb_x, mb_y;
3280 const int b_width = s->b_width << s->block_max_depth;
3281 const int b_height= s->b_height << s->block_max_depth;
3282 const int b_stride= b_width;
3286 RangeCoder r = s->c;
3287 uint8_t state[sizeof(s->block_state)];
3288 memcpy(state, s->block_state, sizeof(s->block_state));
3289 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3290 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3291 encode_q_branch(s, 0, mb_x, mb_y);
3293 memcpy(s->block_state, state, sizeof(s->block_state));
3296 for(pass=0; pass<25; pass++){
3299 for(mb_y= 0; mb_y<b_height; mb_y++){
3300 for(mb_x= 0; mb_x<b_width; mb_x++){
3301 int dia_change, i, j, ref;
3302 int best_rd= INT_MAX, ref_rd;
3303 BlockNode backup, ref_b;
3304 const int index= mb_x + mb_y * b_stride;
3305 BlockNode *block= &s->block[index];
3306 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3307 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3308 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3309 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3310 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3311 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3312 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3313 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3314 const int b_w= (MB_SIZE >> s->block_max_depth);
3315 uint8_t obmc_edged[b_w*2][b_w*2];
3317 if(pass && (block->type & BLOCK_OPT))
3319 block->type |= BLOCK_OPT;
3323 if(!s->me_cache_generation)
3324 memset(s->me_cache, 0, sizeof(s->me_cache));
3325 s->me_cache_generation += 1<<22;
3330 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3332 for(y=0; y<b_w*2; y++)
3333 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3334 if(mb_x==b_stride-1)
3335 for(y=0; y<b_w*2; y++)
3336 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3338 for(x=0; x<b_w*2; x++)
3339 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3340 for(y=1; y<b_w; y++)
3341 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3343 if(mb_y==b_height-1){
3344 for(x=0; x<b_w*2; x++)
3345 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3346 for(y=b_w; y<b_w*2-1; y++)
3347 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3351 //skip stuff outside the picture
3352 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3354 uint8_t *src= s-> input_picture.data[0];
3355 uint8_t *dst= s->current_picture.data[0];
3356 const int stride= s->current_picture.linesize[0];
3357 const int block_w= MB_SIZE >> s->block_max_depth;
3358 const int sx= block_w*mb_x - block_w/2;
3359 const int sy= block_w*mb_y - block_w/2;
3360 const int w= s->plane[0].width;
3361 const int h= s->plane[0].height;
3365 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3366 for(y=h; y<sy+block_w*2; y++)
3367 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3369 for(y=sy; y<sy+block_w*2; y++)
3370 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3372 if(sx+block_w*2 > w){
3373 for(y=sy; y<sy+block_w*2; y++)
3374 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3378 // intra(black) = neighbors' contribution to the current block
3380 color[i]= get_dc(s, mb_x, mb_y, i);
3382 // get previous score (cant be cached due to OBMC)
3383 if(pass > 0 && (block->type&BLOCK_INTRA)){
3384 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3385 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3387 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3391 for(ref=0; ref < s->ref_frames; ref++){
3392 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3393 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3398 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3399 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3401 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3403 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3405 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3407 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3410 //FIXME avoid subpel interpol / round to nearest integer
3413 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3415 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3416 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3417 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3418 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3424 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3427 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3429 //FIXME or try the standard 2 pass qpel or similar
3431 mvr[0][0]= block->mx;
3432 mvr[0][1]= block->my;
3433 if(ref_rd > best_rd){
3441 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3442 //FIXME RD style color selection
3444 if(!same_block(block, &backup)){
3445 if(tb ) tb ->type &= ~BLOCK_OPT;
3446 if(lb ) lb ->type &= ~BLOCK_OPT;
3447 if(rb ) rb ->type &= ~BLOCK_OPT;
3448 if(bb ) bb ->type &= ~BLOCK_OPT;
3449 if(tlb) tlb->type &= ~BLOCK_OPT;
3450 if(trb) trb->type &= ~BLOCK_OPT;
3451 if(blb) blb->type &= ~BLOCK_OPT;
3452 if(brb) brb->type &= ~BLOCK_OPT;
3457 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3462 if(s->block_max_depth == 1){
3464 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3465 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3467 int best_rd, init_rd;
3468 const int index= mb_x + mb_y * b_stride;
3471 b[0]= &s->block[index];
3473 b[2]= b[0]+b_stride;
3475 if(same_block(b[0], b[1]) &&
3476 same_block(b[0], b[2]) &&
3477 same_block(b[0], b[3]))
3480 if(!s->me_cache_generation)
3481 memset(s->me_cache, 0, sizeof(s->me_cache));
3482 s->me_cache_generation += 1<<22;
3484 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3486 //FIXME more multiref search?
3487 check_4block_inter(s, mb_x, mb_y,
3488 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3489 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3492 if(!(b[i]->type&BLOCK_INTRA))
3493 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3495 if(init_rd != best_rd)
3499 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3503 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3504 const int level= b->level;
3505 const int w= b->width;
3506 const int h= b->height;
3507 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3508 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3509 int x,y, thres1, thres2;
3512 if(s->qlog == LOSSLESS_QLOG) return;
3514 bias= bias ? 0 : (3*qmul)>>3;
3515 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3521 int i= src[x + y*stride];
3523 if((unsigned)(i+thres1) > thres2){
3526 i/= qmul; //FIXME optimize
3527 src[x + y*stride]= i;
3531 i/= qmul; //FIXME optimize
3532 src[x + y*stride]= -i;
3535 src[x + y*stride]= 0;
3541 int i= src[x + y*stride];
3543 if((unsigned)(i+thres1) > thres2){
3546 i= (i + bias) / qmul; //FIXME optimize
3547 src[x + y*stride]= i;
3551 i= (i + bias) / qmul; //FIXME optimize
3552 src[x + y*stride]= -i;
3555 src[x + y*stride]= 0;
3559 if(level+1 == s->spatial_decomposition_count){
3560 // STOP_TIMER("quantize")
3564 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3565 const int w= b->width;
3566 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3567 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3568 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3572 if(s->qlog == LOSSLESS_QLOG) return;
3574 for(y=start_y; y<end_y; y++){
3575 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3576 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3580 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3582 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3586 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3587 STOP_TIMER("dquant")
3591 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3592 const int w= b->width;
3593 const int h= b->height;
3594 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3595 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3596 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3600 if(s->qlog == LOSSLESS_QLOG) return;
3604 int i= src[x + y*stride];
3606 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3608 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3612 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3613 STOP_TIMER("dquant")
3617 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3618 const int w= b->width;
3619 const int h= b->height;
3622 for(y=h-1; y>=0; y--){
3623 for(x=w-1; x>=0; x--){
3624 int i= x + y*stride;
3628 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3629 else src[i] -= src[i - 1];
3631 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3632 else src[i] -= src[i - 1];
3635 if(y) src[i] -= src[i - stride];
3641 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3642 const int w= b->width;
3651 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3653 for(y=start_y; y<end_y; y++){
3655 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3656 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3660 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3661 else line[x] += line[x - 1];
3663 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3664 else line[x] += line[x - 1];
3667 if(y) line[x] += prev[x];
3672 // STOP_TIMER("correlate")
3675 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3676 const int w= b->width;
3677 const int h= b->height;
3682 int i= x + y*stride;
3686 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3687 else src[i] += src[i - 1];
3689 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3690 else src[i] += src[i - 1];
3693 if(y) src[i] += src[i - stride];
3699 static void encode_header(SnowContext *s){
3700 int plane_index, level, orientation;
3703 memset(kstate, MID_STATE, sizeof(kstate));
3705 put_rac(&s->c, kstate, s->keyframe);
3706 if(s->keyframe || s->always_reset)
3709 put_symbol(&s->c, s->header_state, s->version, 0);
3710 put_rac(&s->c, s->header_state, s->always_reset);
3711 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3712 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3713 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3714 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3715 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3716 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3717 put_rac(&s->c, s->header_state, s->spatial_scalability);
3718 // put_rac(&s->c, s->header_state, s->rate_scalability);
3719 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3721 for(plane_index=0; plane_index<2; plane_index++){
3722 for(level=0; level<s->spatial_decomposition_count; level++){
3723 for(orientation=level ? 1:0; orientation<4; orientation++){
3724 if(orientation==2) continue;
3725 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3730 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3731 put_symbol(&s->c, s->header_state, s->qlog, 1);
3732 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3733 put_symbol(&s->c, s->header_state, s->qbias, 1);
3734 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3737 static int decode_header(SnowContext *s){
3738 int plane_index, level, orientation;
3741 memset(kstate, MID_STATE, sizeof(kstate));
3743 s->keyframe= get_rac(&s->c, kstate);
3744 if(s->keyframe || s->always_reset)
3747 s->version= get_symbol(&s->c, s->header_state, 0);
3749 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3752 s->always_reset= get_rac(&s->c, s->header_state);
3753 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3754 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3755 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3756 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3757 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3758 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3759 s->spatial_scalability= get_rac(&s->c, s->header_state);
3760 // s->rate_scalability= get_rac(&s->c, s->header_state);
3761 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3763 for(plane_index=0; plane_index<3; plane_index++){
3764 for(level=0; level<s->spatial_decomposition_count; level++){
3765 for(orientation=level ? 1:0; orientation<4; orientation++){
3767 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3768 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3769 else q= get_symbol(&s->c, s->header_state, 1);
3770 s->plane[plane_index].band[level][orientation].qlog= q;
3776 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3777 if(s->spatial_decomposition_type > 2){
3778 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3782 s->qlog= get_symbol(&s->c, s->header_state, 1);
3783 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3784 s->qbias= get_symbol(&s->c, s->header_state, 1);
3785 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3786 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3787 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3788 s->block_max_depth= 0;
3795 static void init_qexp(void){
3799 for(i=0; i<QROOT; i++){
3801 v *= pow(2, 1.0 / QROOT);
3805 static int common_init(AVCodecContext *avctx){
3806 SnowContext *s = avctx->priv_data;
3808 int level, orientation, plane_index, dec;
3812 dsputil_init(&s->dsp, avctx);
3815 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3816 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3817 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3818 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3819 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3820 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3839 #define mcfh(dx,dy)\
3840 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3841 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3842 mc_block_hpel ## dx ## dy ## 16;\
3843 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3844 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3845 mc_block_hpel ## dx ## dy ## 8;
3855 dec= s->spatial_decomposition_count= 5;
3856 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3858 s->chroma_h_shift= 1; //FIXME XXX
3859 s->chroma_v_shift= 1;
3861 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3863 width= s->avctx->width;
3864 height= s->avctx->height;
3866 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3868 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3869 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3871 for(plane_index=0; plane_index<3; plane_index++){
3872 int w= s->avctx->width;
3873 int h= s->avctx->height;
3876 w>>= s->chroma_h_shift;
3877 h>>= s->chroma_v_shift;
3879 s->plane[plane_index].width = w;
3880 s->plane[plane_index].height= h;
3881 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3882 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3883 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3884 SubBand *b= &s->plane[plane_index].band[level][orientation];
3886 b->buf= s->spatial_dwt_buffer;
3888 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3889 b->width = (w + !(orientation&1))>>1;
3890 b->height= (h + !(orientation>1))>>1;
3892 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3893 b->buf_x_offset = 0;
3894 b->buf_y_offset = 0;
3898 b->buf_x_offset = (w+1)>>1;
3901 b->buf += b->stride>>1;
3902 b->buf_y_offset = b->stride_line >> 1;
3906 b->parent= &s->plane[plane_index].band[level-1][orientation];
3907 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3916 width= s->width= avctx->width;
3917 height= s->height= avctx->height;
3919 assert(width && height);
3921 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3926 static int qscale2qlog(int qscale){
3927 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3928 + 61*QROOT/8; //<64 >60
3931 static void ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3933 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3934 * FIXME we know exact mv bits at this point,
3935 * but ratecontrol isn't set up to include them. */
3936 uint32_t coef_sum= 0;
3937 int level, orientation;
3939 for(level=0; level<s->spatial_decomposition_count; level++){
3940 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3941 SubBand *b= &s->plane[0].band[level][orientation];
3942 DWTELEM *buf= b->buf;
3943 const int w= b->width;
3944 const int h= b->height;
3945 const int stride= b->stride;
3946 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3947 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3948 const int qdiv= (1<<16)/qmul;
3951 decorrelate(s, b, buf, stride, 1, 0);
3954 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3956 correlate(s, b, buf, stride, 1, 0);
3960 /* ugly, ratecontrol just takes a sqrt again */
3961 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3962 assert(coef_sum < INT_MAX);
3964 if(pict->pict_type == I_TYPE){
3965 s->m.current_picture.mb_var_sum= coef_sum;
3966 s->m.current_picture.mc_mb_var_sum= 0;
3968 s->m.current_picture.mc_mb_var_sum= coef_sum;
3969 s->m.current_picture.mb_var_sum= 0;
3972 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3973 s->lambda= pict->quality * 3/2;
3974 s->qlog= qscale2qlog(pict->quality);
3977 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3978 int width = p->width;
3979 int height= p->height;
3980 int level, orientation, x, y;
3982 for(level=0; level<s->spatial_decomposition_count; level++){
3983 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3984 SubBand *b= &p->band[level][orientation];
3985 DWTELEM *buf= b->buf;
3988 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3989 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3990 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3991 for(y=0; y<height; y++){
3992 for(x=0; x<width; x++){
3993 int64_t d= s->spatial_dwt_buffer[x + y*width];
3998 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3999 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
4004 static int encode_init(AVCodecContext *avctx)
4006 SnowContext *s = avctx->priv_data;
4009 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
4010 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
4011 "use vstrict=-2 / -strict -2 to use it anyway\n");
4021 s->m.flags = avctx->flags;
4022 s->m.bit_rate= avctx->bit_rate;
4024 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4025 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4026 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4027 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4028 h263_encode_init(&s->m); //mv_penalty
4030 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4032 if(avctx->flags&CODEC_FLAG_PASS1){
4033 if(!avctx->stats_out)
4034 avctx->stats_out = av_mallocz(256);
4036 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
4037 if(ff_rate_control_init(&s->m) < 0)
4040 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4042 for(plane_index=0; plane_index<3; plane_index++){
4043 calculate_vissual_weight(s, &s->plane[plane_index]);
4047 avctx->coded_frame= &s->current_picture;
4048 switch(avctx->pix_fmt){
4049 // case PIX_FMT_YUV444P:
4050 // case PIX_FMT_YUV422P:
4051 case PIX_FMT_YUV420P:
4053 // case PIX_FMT_YUV411P:
4054 // case PIX_FMT_YUV410P:
4055 s->colorspace_type= 0;
4057 /* case PIX_FMT_RGBA32:
4061 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4064 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4065 s->chroma_h_shift= 1;
4066 s->chroma_v_shift= 1;
4068 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4069 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4071 s->avctx->get_buffer(s->avctx, &s->input_picture);
4073 if(s->avctx->me_method == ME_ITER){
4075 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4076 for(i=0; i<s->max_ref_frames; i++){
4077 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4078 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4085 static int frame_start(SnowContext *s){
4087 int w= s->avctx->width; //FIXME round up to x16 ?
4088 int h= s->avctx->height;
4090 if(s->current_picture.data[0]){
4091 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4092 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4093 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4096 tmp= s->last_picture[s->max_ref_frames-1];
4097 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4098 s->last_picture[0]= s->current_picture;
4099 s->current_picture= tmp;
4105 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4106 if(i && s->last_picture[i-1].key_frame)
4111 s->current_picture.reference= 1;
4112 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4113 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4117 s->current_picture.key_frame= s->keyframe;
4122 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4123 SnowContext *s = avctx->priv_data;
4124 RangeCoder * const c= &s->c;
4125 AVFrame *pict = data;
4126 const int width= s->avctx->width;
4127 const int height= s->avctx->height;
4128 int level, orientation, plane_index, i, y;
4130 ff_init_range_encoder(c, buf, buf_size);
4131 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4135 for(y=0; y<(height>>shift); y++)
4136 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4137 &pict->data[i][y * pict->linesize[i]],
4140 s->new_picture = *pict;
4142 s->m.picture_number= avctx->frame_number;
4143 if(avctx->flags&CODEC_FLAG_PASS2){
4145 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4146 s->keyframe= pict->pict_type==FF_I_TYPE;
4147 if(!(avctx->flags&CODEC_FLAG_QSCALE))
4148 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4150 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4152 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4155 if(s->pass1_rc && avctx->frame_number == 0)
4156 pict->quality= 2*FF_QP2LAMBDA;
4158 s->qlog= qscale2qlog(pict->quality);
4159 s->lambda = pict->quality * 3/2;
4161 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4162 s->qlog= LOSSLESS_QLOG;
4164 }//else keep previous frame's qlog until after motion est
4168 s->m.current_picture_ptr= &s->m.current_picture;
4169 if(pict->pict_type == P_TYPE){
4170 int block_width = (width +15)>>4;
4171 int block_height= (height+15)>>4;
4172 int stride= s->current_picture.linesize[0];
4174 assert(s->current_picture.data[0]);
4175 assert(s->last_picture[0].data[0]);
4177 s->m.avctx= s->avctx;
4178 s->m.current_picture.data[0]= s->current_picture.data[0];
4179 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4180 s->m. new_picture.data[0]= s-> input_picture.data[0];
4181 s->m. last_picture_ptr= &s->m. last_picture;
4183 s->m. last_picture.linesize[0]=
4184 s->m. new_picture.linesize[0]=
4185 s->m.current_picture.linesize[0]= stride;
4186 s->m.uvlinesize= s->current_picture.linesize[1];
4188 s->m.height= height;
4189 s->m.mb_width = block_width;
4190 s->m.mb_height= block_height;
4191 s->m.mb_stride= s->m.mb_width+1;
4192 s->m.b8_stride= 2*s->m.mb_width+1;
4194 s->m.pict_type= pict->pict_type;
4195 s->m.me_method= s->avctx->me_method;
4196 s->m.me.scene_change_score=0;
4197 s->m.flags= s->avctx->flags;
4198 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4199 s->m.out_format= FMT_H263;
4200 s->m.unrestricted_mv= 1;
4202 s->m.lambda = s->lambda;
4203 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4204 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4206 s->m.dsp= s->dsp; //move
4213 s->m.pict_type = pict->pict_type;
4214 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4217 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4219 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4221 for(plane_index=0; plane_index<3; plane_index++){
4222 Plane *p= &s->plane[plane_index];
4226 // int bits= put_bits_count(&s->c.pb);
4229 if(pict->data[plane_index]) //FIXME gray hack
4232 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4235 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4238 && pict->pict_type == P_TYPE
4239 && !(avctx->flags&CODEC_FLAG_PASS2)
4240 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4241 ff_init_range_encoder(c, buf, buf_size);
4242 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4243 pict->pict_type= FF_I_TYPE;
4245 s->current_picture.key_frame=1;
4250 if(s->qlog == LOSSLESS_QLOG){
4253 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4258 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4260 if(s->pass1_rc && plane_index==0)
4261 ratecontrol_1pass(s, pict);
4263 for(level=0; level<s->spatial_decomposition_count; level++){
4264 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4265 SubBand *b= &p->band[level][orientation];
4267 quantize(s, b, b->buf, b->stride, s->qbias);
4269 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4270 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4271 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4273 correlate(s, b, b->buf, b->stride, 1, 0);
4276 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4278 for(level=0; level<s->spatial_decomposition_count; level++){
4279 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4280 SubBand *b= &p->band[level][orientation];
4282 dequantize(s, b, b->buf, b->stride);
4286 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4287 if(s->qlog == LOSSLESS_QLOG){
4290 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4295 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4296 STOP_TIMER("pred-conv")}
4297 if(s->avctx->flags&CODEC_FLAG_PSNR){
4300 if(pict->data[plane_index]) //FIXME gray hack
4303 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4307 s->avctx->error[plane_index] += error;
4308 s->current_picture.error[plane_index] = error;
4312 if(s->last_picture[s->max_ref_frames-1].data[0])
4313 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4315 s->current_picture.coded_picture_number = avctx->frame_number;
4316 s->current_picture.pict_type = pict->pict_type;
4317 s->current_picture.quality = pict->quality;
4318 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4319 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4320 s->m.current_picture.display_picture_number =
4321 s->m.current_picture.coded_picture_number = avctx->frame_number;
4322 s->m.current_picture.quality = pict->quality;
4323 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4325 ff_rate_estimate_qscale(&s->m, 0);
4326 if(avctx->flags&CODEC_FLAG_PASS1)
4327 ff_write_pass1_stats(&s->m);
4328 s->m.last_pict_type = s->m.pict_type;
4332 return ff_rac_terminate(c);
4335 static void common_end(SnowContext *s){
4336 int plane_index, level, orientation, i;
4338 av_freep(&s->spatial_dwt_buffer);
4340 av_freep(&s->m.me.scratchpad);
4341 av_freep(&s->m.me.map);
4342 av_freep(&s->m.me.score_map);
4343 av_freep(&s->m.obmc_scratchpad);
4345 av_freep(&s->block);
4347 for(i=0; i<MAX_REF_FRAMES; i++){
4348 av_freep(&s->ref_mvs[i]);
4349 av_freep(&s->ref_scores[i]);
4350 if(s->last_picture[i].data[0])
4351 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4354 for(plane_index=0; plane_index<3; plane_index++){
4355 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4356 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4357 SubBand *b= &s->plane[plane_index].band[level][orientation];
4359 av_freep(&b->x_coeff);
4365 static int encode_end(AVCodecContext *avctx)
4367 SnowContext *s = avctx->priv_data;
4370 av_free(avctx->stats_out);
4375 static int decode_init(AVCodecContext *avctx)
4377 SnowContext *s = avctx->priv_data;
4380 avctx->pix_fmt= PIX_FMT_YUV420P;
4384 block_size = MB_SIZE >> s->block_max_depth;
4385 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4390 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4391 SnowContext *s = avctx->priv_data;
4392 RangeCoder * const c= &s->c;
4394 AVFrame *picture = data;
4395 int level, orientation, plane_index;
4397 ff_init_range_decoder(c, buf, buf_size);
4398 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4400 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4402 if(!s->block) alloc_blocks(s);
4405 //keyframe flag dupliaction mess FIXME
4406 if(avctx->debug&FF_DEBUG_PICT_INFO)
4407 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4411 for(plane_index=0; plane_index<3; plane_index++){
4412 Plane *p= &s->plane[plane_index];
4416 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4418 if(s->avctx->debug&2048){
4419 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4420 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4424 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4425 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4431 for(level=0; level<s->spatial_decomposition_count; level++){
4432 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4433 SubBand *b= &p->band[level][orientation];
4434 unpack_coeffs(s, b, b->parent, orientation);
4437 STOP_TIMER("unpack coeffs");
4441 const int mb_h= s->b_height << s->block_max_depth;
4442 const int block_size = MB_SIZE >> s->block_max_depth;
4443 const int block_w = plane_index ? block_size/2 : block_size;
4445 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4450 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4451 for(mb_y=0; mb_y<=mb_h; mb_y++){
4453 int slice_starty = block_w*mb_y;
4454 int slice_h = block_w*(mb_y+1);
4455 if (!(s->keyframe || s->avctx->debug&512)){
4456 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4457 slice_h -= (block_w >> 1);
4462 for(level=0; level<s->spatial_decomposition_count; level++){
4463 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4464 SubBand *b= &p->band[level][orientation];
4467 int our_mb_start = mb_y;
4468 int our_mb_end = (mb_y + 1);
4470 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4471 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4472 if (!(s->keyframe || s->avctx->debug&512)){
4473 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4474 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4476 start_y = FFMIN(b->height, start_y);
4477 end_y = FFMIN(b->height, end_y);
4479 if (start_y != end_y){
4480 if (orientation == 0){
4481 SubBand * correlate_band = &p->band[0][0];
4482 int correlate_end_y = FFMIN(b->height, end_y + 1);
4483 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4484 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4485 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4486 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4489 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4493 STOP_TIMER("decode_subband_slice");
4497 for(; yd<slice_h; yd+=4){
4498 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4500 STOP_TIMER("idwt slice");}
4503 if(s->qlog == LOSSLESS_QLOG){
4504 for(; yq<slice_h && yq<h; yq++){
4505 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4507 line[x] <<= FRAC_BITS;
4512 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4514 y = FFMIN(p->height, slice_starty);
4515 end_y = FFMIN(p->height, slice_h);
4517 slice_buffer_release(&s->sb, y++);
4520 slice_buffer_flush(&s->sb);
4522 STOP_TIMER("idwt + predict_slices")}
4527 if(s->last_picture[s->max_ref_frames-1].data[0])
4528 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4530 if(!(s->avctx->debug&2048))
4531 *picture= s->current_picture;
4533 *picture= s->mconly_picture;
4535 *data_size = sizeof(AVFrame);
4537 bytes_read= c->bytestream - c->bytestream_start;
4538 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4543 static int decode_end(AVCodecContext *avctx)
4545 SnowContext *s = avctx->priv_data;
4547 slice_buffer_destroy(&s->sb);
4554 AVCodec snow_decoder = {
4558 sizeof(SnowContext),
4563 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4567 #ifdef CONFIG_ENCODERS
4568 AVCodec snow_encoder = {
4572 sizeof(SnowContext),
4588 int buffer[2][width*height];
4591 s.spatial_decomposition_count=6;
4592 s.spatial_decomposition_type=1;
4594 printf("testing 5/3 DWT\n");
4595 for(i=0; i<width*height; i++)
4596 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4598 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4599 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4601 for(i=0; i<width*height; i++)
4602 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4604 printf("testing 9/7 DWT\n");
4605 s.spatial_decomposition_type=0;
4606 for(i=0; i<width*height; i++)
4607 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4609 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4610 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4612 for(i=0; i<width*height; i++)
4613 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4616 printf("testing AC coder\n");
4617 memset(s.header_state, 0, sizeof(s.header_state));
4618 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4619 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4621 for(i=-256; i<256; i++){
4623 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4624 STOP_TIMER("put_symbol")
4626 ff_rac_terminate(&s.c);
4628 memset(s.header_state, 0, sizeof(s.header_state));
4629 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4630 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4632 for(i=-256; i<256; i++){
4635 j= get_symbol(&s.c, s.header_state, 1);
4636 STOP_TIMER("get_symbol")
4637 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4641 int level, orientation, x, y;
4642 int64_t errors[8][4];
4645 memset(errors, 0, sizeof(errors));
4646 s.spatial_decomposition_count=3;
4647 s.spatial_decomposition_type=0;
4648 for(level=0; level<s.spatial_decomposition_count; level++){
4649 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4650 int w= width >> (s.spatial_decomposition_count-level);
4651 int h= height >> (s.spatial_decomposition_count-level);
4652 int stride= width << (s.spatial_decomposition_count-level);
4653 DWTELEM *buf= buffer[0];
4656 if(orientation&1) buf+=w;
4657 if(orientation>1) buf+=stride>>1;
4659 memset(buffer[0], 0, sizeof(int)*width*height);
4660 buf[w/2 + h/2*stride]= 256*256;
4661 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4662 for(y=0; y<height; y++){
4663 for(x=0; x<width; x++){
4664 int64_t d= buffer[0][x + y*width];
4666 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4668 if(ABS(height/2-y)<9 && level==2) printf("\n");
4670 error= (int)(sqrt(error)+0.5);
4671 errors[level][orientation]= error;
4672 if(g) g=ff_gcd(g, error);
4676 printf("static int const visual_weight[][4]={\n");
4677 for(level=0; level<s.spatial_decomposition_count; level++){
4679 for(orientation=0; orientation<4; orientation++){
4680 printf("%8lld,", errors[level][orientation]/g);
4688 int w= width >> (s.spatial_decomposition_count-level);
4689 int h= height >> (s.spatial_decomposition_count-level);
4690 int stride= width << (s.spatial_decomposition_count-level);
4691 DWTELEM *buf= buffer[0];
4697 memset(buffer[0], 0, sizeof(int)*width*height);
4699 for(y=0; y<height; y++){
4700 for(x=0; x<width; x++){
4701 int tab[4]={0,2,3,1};
4702 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4705 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4709 buf[x + y*stride ]=169;
4710 buf[x + y*stride-w]=64;
4713 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4715 for(y=0; y<height; y++){
4716 for(x=0; x<width; x++){
4717 int64_t d= buffer[0][x + y*width];
4719 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4721 if(ABS(height/2-y)<9) printf("\n");