2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49 static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103 static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121 static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139 static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157 static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232 static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 static const uint8_t obmc16[256]={
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
287 static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
344 static const uint8_t obmc8[64]={
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
357 static const uint8_t obmc4[16]={
365 static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
369 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
371 typedef struct BlockNode{
377 //#define TYPE_SPLIT 1
378 #define BLOCK_INTRA 1
380 //#define TYPE_NOCOLOR 4
381 uint8_t level; //FIXME merge into type?
384 static const BlockNode null_block= { //FIXME add border maybe
385 .color= {128,128,128},
393 #define LOG2_MB_SIZE 4
394 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 typedef struct x_and_coeff{
401 typedef struct SubBand{
406 int qlog; ///< log(qscale)/log[2^(1/6)]
410 int stride_line; ///< Stride measured in lines, not pixels.
411 x_and_coeff * x_coeff;
412 struct SubBand *parent;
413 uint8_t state[/*7*2*/ 7 + 512][32];
416 typedef struct Plane{
419 SubBand band[MAX_DECOMPOSITIONS][4];
422 typedef struct SnowContext{
423 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
425 AVCodecContext *avctx;
429 AVFrame input_picture; ///< new_picture with the internal linesizes
430 AVFrame current_picture;
431 AVFrame last_picture[MAX_REF_FRAMES];
432 AVFrame mconly_picture;
433 // uint8_t q_context[16];
434 uint8_t header_state[32];
435 uint8_t block_state[128 + 32*128];
439 int spatial_decomposition_type;
440 int temporal_decomposition_type;
441 int spatial_decomposition_count;
442 int temporal_decomposition_count;
445 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
446 uint32_t *ref_scores[MAX_REF_FRAMES];
447 DWTELEM *spatial_dwt_buffer;
451 int spatial_scalability;
458 #define QBIAS_SHIFT 3
462 Plane plane[MAX_PLANES];
464 #define ME_CACHE_SIZE 1024
465 int me_cache[ME_CACHE_SIZE];
466 int me_cache_generation;
469 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
480 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
481 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
483 static void iterative_me(SnowContext *s);
485 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
489 buf->base_buffer = base_buffer;
490 buf->line_count = line_count;
491 buf->line_width = line_width;
492 buf->data_count = max_allocated_lines;
493 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
494 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
496 for (i = 0; i < max_allocated_lines; i++)
498 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
501 buf->data_stack_top = max_allocated_lines - 1;
504 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
509 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
511 assert(buf->data_stack_top >= 0);
512 // assert(!buf->line[line]);
514 return buf->line[line];
516 offset = buf->line_width * line;
517 buffer = buf->data_stack[buf->data_stack_top];
518 buf->data_stack_top--;
519 buf->line[line] = buffer;
521 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
526 static void slice_buffer_release(slice_buffer * buf, int line)
531 assert(line >= 0 && line < buf->line_count);
532 assert(buf->line[line]);
534 offset = buf->line_width * line;
535 buffer = buf->line[line];
536 buf->data_stack_top++;
537 buf->data_stack[buf->data_stack_top] = buffer;
538 buf->line[line] = NULL;
540 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
543 static void slice_buffer_flush(slice_buffer * buf)
546 for (i = 0; i < buf->line_count; i++)
550 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
551 slice_buffer_release(buf, i);
556 static void slice_buffer_destroy(slice_buffer * buf)
559 slice_buffer_flush(buf);
561 for (i = buf->data_count - 1; i >= 0; i--)
563 assert(buf->data_stack[i]);
564 av_freep(&buf->data_stack[i]);
566 assert(buf->data_stack);
567 av_freep(&buf->data_stack);
569 av_freep(&buf->line);
573 // Avoid a name clash on SGI IRIX
576 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
577 static uint8_t qexp[QROOT];
579 static inline int mirror(int v, int m){
580 while((unsigned)v > (unsigned)m){
587 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
592 const int e= av_log2(a);
594 const int el= FFMIN(e, 10);
595 put_rac(c, state+0, 0);
598 put_rac(c, state+1+i, 1); //1..10
601 put_rac(c, state+1+9, 1); //1..10
603 put_rac(c, state+1+FFMIN(i,9), 0);
605 for(i=e-1; i>=el; i--){
606 put_rac(c, state+22+9, (a>>i)&1); //22..31
609 put_rac(c, state+22+i, (a>>i)&1); //22..31
613 put_rac(c, state+11 + el, v < 0); //11..21
616 put_rac(c, state+0, 0);
619 put_rac(c, state+1+i, 1); //1..10
621 put_rac(c, state+1+i, 0);
623 for(i=e-1; i>=0; i--){
624 put_rac(c, state+22+i, (a>>i)&1); //22..31
628 put_rac(c, state+11 + e, v < 0); //11..21
631 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
633 put_rac(c, state+1+FFMIN(i,9), 0);
635 for(i=e-1; i>=0; i--){
636 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
640 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
644 put_rac(c, state+0, 1);
648 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
649 if(get_rac(c, state+0))
654 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
659 for(i=e-1; i>=0; i--){
660 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
663 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
670 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
672 int r= log2>=0 ? 1<<log2 : 1;
678 put_rac(c, state+4+log2, 1);
683 put_rac(c, state+4+log2, 0);
685 for(i=log2-1; i>=0; i--){
686 put_rac(c, state+31-i, (v>>i)&1);
690 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
692 int r= log2>=0 ? 1<<log2 : 1;
697 while(get_rac(c, state+4+log2)){
703 for(i=log2-1; i>=0; i--){
704 v+= get_rac(c, state+31-i)<<i;
710 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
711 const int mirror_left= !highpass;
712 const int mirror_right= (width&1) ^ highpass;
713 const int w= (width>>1) - 1 + (highpass & width);
716 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
718 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
724 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
728 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
733 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
734 const int mirror_left= !highpass;
735 const int mirror_right= (width&1) ^ highpass;
736 const int w= (width>>1) - 1 + (highpass & width);
743 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
749 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
752 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
756 int r= 3*2*ref[w*ref_step];
759 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
765 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
766 const int mirror_left= !highpass;
767 const int mirror_right= (width&1) ^ highpass;
768 const int w= (width>>1) - 1 + (highpass & width);
772 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
774 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
780 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
784 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
790 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
793 for(x=start; x<width; x+=2){
797 int x2= x + 2*i - n + 1;
799 else if(x2>=width) x2= 2*width-x2-2;
800 sum += coeffs[i]*(int64_t)dst[x2];
802 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
803 else dst[x] += (sum + (1<<shift)/2)>>shift;
807 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
809 for(y=start; y<height; y+=2){
810 for(x=0; x<width; x++){
814 int y2= y + 2*i - n + 1;
816 else if(y2>=height) y2= 2*height-y2-2;
817 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
819 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
820 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
829 #if 0 // more accurate 9/7
832 #define COEFFS1 (int[]){-25987,-25987}
835 #define COEFFS2 (int[]){-27777,-27777}
838 #define COEFFS3 (int[]){28931,28931}
841 #define COEFFS4 (int[]){14533,14533}
845 #define COEFFS1 (int[]){1,-9,-9,1}
848 #define COEFFS2 (int[]){-1,5,5,-1}
861 #define COEFFS1 (int[]){1,1}
864 #define COEFFS2 (int[]){-1,-1}
877 #define COEFFS2 (int[]){-1,-1}
880 #define COEFFS3 (int[]){-1,-1}
883 #define COEFFS4 (int[]){-5,29,29,-5}
888 #define COEFFS1 (int[]){-203,-203}
891 #define COEFFS2 (int[]){-217,-217}
894 #define COEFFS3 (int[]){113,113}
897 #define COEFFS4 (int[]){227,227}
905 #define COEFFS2 (int[]){-1,-1}
908 #define COEFFS3 (int[]){-1,-1}
911 #define COEFFS4 (int[]){3,3}
915 #define COEFFS1 (int[]){1,-9,-9,1}
918 #define COEFFS2 (int[]){1,1}
928 #define COEFFS1 (int[]){1,-9,-9,1}
931 #define COEFFS2 (int[]){-1,5,5,-1}
939 static void horizontal_decomposeX(DWTELEM *b, int width){
941 const int width2= width>>1;
942 const int w2= (width+1)>>1;
945 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
946 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
947 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
948 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
950 for(x=0; x<width2; x++){
952 temp[x+w2]= b[2*x + 1];
956 memcpy(b, temp, width*sizeof(int));
959 static void horizontal_composeX(DWTELEM *b, int width){
961 const int width2= width>>1;
963 const int w2= (width+1)>>1;
965 memcpy(temp, b, width*sizeof(int));
966 for(x=0; x<width2; x++){
968 b[2*x + 1]= temp[x+w2];
973 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
974 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
975 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
976 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
979 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
982 for(y=0; y<height; y++){
983 for(x=0; x<width; x++){
984 buffer[y*stride + x] *= SCALEX;
988 for(y=0; y<height; y++){
989 horizontal_decomposeX(buffer + y*stride, width);
992 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
993 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
994 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
995 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
998 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1002 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1003 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1004 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1006 for(y=0; y<height; y++){
1007 horizontal_composeX(buffer + y*stride, width);
1010 for(y=0; y<height; y++){
1011 for(x=0; x<width; x++){
1012 buffer[y*stride + x] /= SCALEX;
1017 static void horizontal_decompose53i(DWTELEM *b, int width){
1018 DWTELEM temp[width];
1019 const int width2= width>>1;
1021 const int w2= (width+1)>>1;
1023 for(x=0; x<width2; x++){
1025 temp[x+w2]= b[2*x + 1];
1039 for(x=1; x+1<width2; x+=2){
1043 A2 += (A1 + A3 + 2)>>2;
1047 A1= temp[x+1+width2];
1050 A4 += (A1 + A3 + 2)>>2;
1056 A2 += (A1 + A3 + 2)>>2;
1061 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1062 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1066 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1069 for(i=0; i<width; i++){
1070 b1[i] -= (b0[i] + b2[i])>>1;
1074 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1077 for(i=0; i<width; i++){
1078 b1[i] += (b0[i] + b2[i] + 2)>>2;
1082 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1084 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1085 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1087 for(y=-2; y<height; y+=2){
1088 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1089 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1092 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1093 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1094 STOP_TIMER("horizontal_decompose53i")}
1097 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1098 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1099 STOP_TIMER("vertical_decompose53i*")}
1106 static void horizontal_decompose97i(DWTELEM *b, int width){
1107 DWTELEM temp[width];
1108 const int w2= (width+1)>>1;
1110 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1111 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1112 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1113 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1117 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1120 for(i=0; i<width; i++){
1121 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1125 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1128 for(i=0; i<width; i++){
1130 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1132 int r= 3*(b0[i] + b2[i]);
1135 b1[i] += (r+W_CO)>>W_CS;
1140 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1143 for(i=0; i<width; i++){
1145 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1147 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1152 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1155 for(i=0; i<width; i++){
1156 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1160 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1162 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1163 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1164 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1165 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1167 for(y=-4; y<height; y+=2){
1168 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1169 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1172 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1173 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1175 STOP_TIMER("horizontal_decompose97i")
1179 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1180 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1181 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1182 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1185 STOP_TIMER("vertical_decompose97i")
1195 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1198 for(level=0; level<decomposition_count; level++){
1200 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1201 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1202 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1207 static void horizontal_compose53i(DWTELEM *b, int width){
1208 DWTELEM temp[width];
1209 const int width2= width>>1;
1210 const int w2= (width+1)>>1;
1222 for(x=1; x+1<width2; x+=2){
1226 A2 += (A1 + A3 + 2)>>2;
1230 A1= temp[x+1+width2];
1233 A4 += (A1 + A3 + 2)>>2;
1239 A2 += (A1 + A3 + 2)>>2;
1243 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1244 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1246 for(x=0; x<width2; x++){
1248 b[2*x + 1]= temp[x+w2];
1254 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1257 for(i=0; i<width; i++){
1258 b1[i] += (b0[i] + b2[i])>>1;
1262 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1265 for(i=0; i<width; i++){
1266 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1270 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1271 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1272 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1276 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1277 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1278 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1282 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1285 DWTELEM *b0= cs->b0;
1286 DWTELEM *b1= cs->b1;
1287 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1288 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1291 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1292 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1293 STOP_TIMER("vertical_compose53i*")}
1296 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1297 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1298 STOP_TIMER("horizontal_compose53i")}
1305 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1307 DWTELEM *b0= cs->b0;
1308 DWTELEM *b1= cs->b1;
1309 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1310 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1313 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1314 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1315 STOP_TIMER("vertical_compose53i*")}
1318 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1319 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1320 STOP_TIMER("horizontal_compose53i")}
1327 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1329 spatial_compose53i_init(&cs, buffer, height, stride);
1330 while(cs.y <= height)
1331 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1335 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1336 DWTELEM temp[width];
1337 const int w2= (width+1)>>1;
1339 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1340 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1341 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1342 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1345 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1348 for(i=0; i<width; i++){
1349 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1353 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1356 for(i=0; i<width; i++){
1358 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1360 int r= 3*(b0[i] + b2[i]);
1363 b1[i] -= (r+W_CO)>>W_CS;
1368 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1371 for(i=0; i<width; i++){
1373 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1375 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1380 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1383 for(i=0; i<width; i++){
1384 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1388 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1391 for(i=0; i<width; i++){
1395 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1397 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1399 r= 3*(b2[i] + b4[i]);
1402 b3[i] -= (r+W_CO)>>W_CS;
1405 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1407 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1409 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1413 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1414 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1415 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1416 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1417 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1421 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1422 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1423 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1424 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1425 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1429 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1432 DWTELEM *b0= cs->b0;
1433 DWTELEM *b1= cs->b1;
1434 DWTELEM *b2= cs->b2;
1435 DWTELEM *b3= cs->b3;
1436 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1437 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1440 if(y>0 && y+4<height){
1441 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1443 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1444 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1445 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1446 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1449 STOP_TIMER("vertical_compose97i")}}
1452 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1453 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1454 if(width>400 && y+0<(unsigned)height){
1455 STOP_TIMER("horizontal_compose97i")}}
1464 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1466 DWTELEM *b0= cs->b0;
1467 DWTELEM *b1= cs->b1;
1468 DWTELEM *b2= cs->b2;
1469 DWTELEM *b3= cs->b3;
1470 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1471 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1474 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1475 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1476 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1477 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1479 STOP_TIMER("vertical_compose97i")}}
1482 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1483 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1484 if(width>400 && b0 <= b2){
1485 STOP_TIMER("horizontal_compose97i")}}
1494 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1496 spatial_compose97i_init(&cs, buffer, height, stride);
1497 while(cs.y <= height)
1498 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1501 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1503 for(level=decomposition_count-1; level>=0; level--){
1505 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1506 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1507 /* not slicified yet */
1508 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1509 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1514 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1516 for(level=decomposition_count-1; level>=0; level--){
1518 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1519 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1520 /* not slicified yet */
1521 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1526 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1527 const int support = type==1 ? 3 : 5;
1531 for(level=decomposition_count-1; level>=0; level--){
1532 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1534 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1536 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1544 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1545 const int support = type==1 ? 3 : 5;
1549 for(level=decomposition_count-1; level>=0; level--){
1550 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1552 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1554 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1562 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1565 for(level=decomposition_count-1; level>=0; level--)
1566 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1568 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1570 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1571 for(y=0; y<height; y+=4)
1572 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1576 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1577 const int w= b->width;
1578 const int h= b->height;
1590 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1591 v= src[x + y*stride];
1594 t= src[x + (y-1)*stride];
1596 lt= src[x - 1 + (y-1)*stride];
1599 rt= src[x + 1 + (y-1)*stride];
1603 l= src[x - 1 + y*stride];
1605 if(orientation==1) ll= src[y + (x-2)*stride];
1606 else ll= src[x - 2 + y*stride];
1612 if(px<b->parent->width && py<b->parent->height)
1613 p= parent[px + py*2*stride];
1615 if(!(/*ll|*/l|lt|t|rt|p)){
1617 runs[run_index++]= run;
1625 max_index= run_index;
1626 runs[run_index++]= run;
1628 run= runs[run_index++];
1630 put_symbol2(&s->c, b->state[30], max_index, 0);
1631 if(run_index <= max_index)
1632 put_symbol2(&s->c, b->state[1], run, 3);
1635 if(s->c.bytestream_end - s->c.bytestream < w*40){
1636 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1641 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1642 v= src[x + y*stride];
1645 t= src[x + (y-1)*stride];
1647 lt= src[x - 1 + (y-1)*stride];
1650 rt= src[x + 1 + (y-1)*stride];
1654 l= src[x - 1 + y*stride];
1656 if(orientation==1) ll= src[y + (x-2)*stride];
1657 else ll= src[x - 2 + y*stride];
1663 if(px<b->parent->width && py<b->parent->height)
1664 p= parent[px + py*2*stride];
1666 if(/*ll|*/l|lt|t|rt|p){
1667 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1669 put_rac(&s->c, &b->state[0][context], !!v);
1672 run= runs[run_index++];
1674 if(run_index <= max_index)
1675 put_symbol2(&s->c, b->state[1], run, 3);
1683 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1684 int l2= 2*ABS(l) + (l<0);
1685 int t2= 2*ABS(t) + (t<0);
1687 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1688 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1696 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1697 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1698 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1699 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1700 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1703 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1704 const int w= b->width;
1705 const int h= b->height;
1710 x_and_coeff *xc= b->x_coeff;
1711 x_and_coeff *prev_xc= NULL;
1712 x_and_coeff *prev2_xc= xc;
1713 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1714 x_and_coeff *prev_parent_xc= parent_xc;
1716 runs= get_symbol2(&s->c, b->state[30], 0);
1717 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1722 int lt=0, t=0, rt=0;
1724 if(y && prev_xc->x == 0){
1736 if(prev_xc->x == x + 1)
1742 if(x>>1 > parent_xc->x){
1745 if(x>>1 == parent_xc->x){
1746 p= parent_xc->coeff;
1749 if(/*ll|*/l|lt|t|rt|p){
1750 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1752 v=get_rac(&s->c, &b->state[0][context]);
1754 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1755 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1762 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1764 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1765 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1774 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1775 else max_run= FFMIN(run, w-x-1);
1777 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1783 (xc++)->x= w+1; //end marker
1789 while(parent_xc->x != parent->width+1)
1792 prev_parent_xc= parent_xc;
1794 parent_xc= prev_parent_xc;
1799 (xc++)->x= w+1; //end marker
1803 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1804 const int w= b->width;
1806 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1807 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1808 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1813 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1818 /* If we are on the second or later slice, restore our index. */
1820 new_index = save_state[0];
1823 for(y=start_y; y<h; y++){
1826 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1827 memset(line, 0, b->width*sizeof(DWTELEM));
1828 v = b->x_coeff[new_index].coeff;
1829 x = b->x_coeff[new_index++].x;
1832 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1833 register int u= -(v&1);
1834 line[x] = (t^u) - u;
1836 v = b->x_coeff[new_index].coeff;
1837 x = b->x_coeff[new_index++].x;
1840 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1841 STOP_TIMER("decode_subband")
1844 /* Save our variables for the next slice. */
1845 save_state[0] = new_index;
1850 static void reset_contexts(SnowContext *s){
1851 int plane_index, level, orientation;
1853 for(plane_index=0; plane_index<3; plane_index++){
1854 for(level=0; level<s->spatial_decomposition_count; level++){
1855 for(orientation=level ? 1:0; orientation<4; orientation++){
1856 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1860 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1861 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1864 static int alloc_blocks(SnowContext *s){
1865 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1866 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1871 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1875 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1876 uint8_t *bytestream= d->bytestream;
1877 uint8_t *bytestream_start= d->bytestream_start;
1879 d->bytestream= bytestream;
1880 d->bytestream_start= bytestream_start;
1883 //near copy & paste from dsputil, FIXME
1884 static int pix_sum(uint8_t * pix, int line_size, int w)
1889 for (i = 0; i < w; i++) {
1890 for (j = 0; j < w; j++) {
1894 pix += line_size - w;
1899 //near copy & paste from dsputil, FIXME
1900 static int pix_norm1(uint8_t * pix, int line_size, int w)
1903 uint32_t *sq = squareTbl + 256;
1906 for (i = 0; i < w; i++) {
1907 for (j = 0; j < w; j ++) {
1911 pix += line_size - w;
1916 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1917 const int w= s->b_width << s->block_max_depth;
1918 const int rem_depth= s->block_max_depth - level;
1919 const int index= (x + y*w) << rem_depth;
1920 const int block_w= 1<<rem_depth;
1933 for(j=0; j<block_w; j++){
1934 for(i=0; i<block_w; i++){
1935 s->block[index + i + j*w]= block;
1940 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1941 const int offset[3]= {
1943 ((y*c->uvstride + x)>>1),
1944 ((y*c->uvstride + x)>>1),
1948 c->src[0][i]= src [i];
1949 c->ref[0][i]= ref [i] + offset[i];
1954 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1955 BlockNode *left, BlockNode *top, BlockNode *tr){
1956 if(s->ref_frames == 1){
1957 *mx = mid_pred(left->mx, top->mx, tr->mx);
1958 *my = mid_pred(left->my, top->my, tr->my);
1960 const int *scale = scale_mv_ref[ref];
1961 *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1962 top ->mx * scale[top ->ref] + 128 >>8,
1963 tr ->mx * scale[tr ->ref] + 128 >>8);
1964 *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1965 top ->my * scale[top ->ref] + 128 >>8,
1966 tr ->my * scale[tr ->ref] + 128 >>8);
1973 #define P_TOPRIGHT P[3]
1974 #define P_MEDIAN P[4]
1976 #define FLAG_QPEL 1 //must be 1
1978 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1979 uint8_t p_buffer[1024];
1980 uint8_t i_buffer[1024];
1981 uint8_t p_state[sizeof(s->block_state)];
1982 uint8_t i_state[sizeof(s->block_state)];
1984 uint8_t *pbbak= s->c.bytestream;
1985 uint8_t *pbbak_start= s->c.bytestream_start;
1986 int score, score2, iscore, i_len, p_len, block_s, sum;
1987 const int w= s->b_width << s->block_max_depth;
1988 const int h= s->b_height << s->block_max_depth;
1989 const int rem_depth= s->block_max_depth - level;
1990 const int index= (x + y*w) << rem_depth;
1991 const int block_w= 1<<(LOG2_MB_SIZE - level);
1992 int trx= (x+1)<<rem_depth;
1993 int try= (y+1)<<rem_depth;
1994 BlockNode *left = x ? &s->block[index-1] : &null_block;
1995 BlockNode *top = y ? &s->block[index-w] : &null_block;
1996 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1997 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1998 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1999 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2000 int pl = left->color[0];
2001 int pcb= left->color[1];
2002 int pcr= left->color[2];
2006 const int stride= s->current_picture.linesize[0];
2007 const int uvstride= s->current_picture.linesize[1];
2008 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2009 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2010 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2012 int16_t last_mv[3][2];
2013 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2014 const int shift= 1+qpel;
2015 MotionEstContext *c= &s->m.me;
2016 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2017 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2018 int my_context= av_log2(2*ABS(left->my - top->my));
2019 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2020 int ref, best_ref, ref_score, ref_mx, ref_my;
2022 assert(sizeof(s->block_state) >= 256);
2024 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2028 // clip predictors / edge ?
2030 P_LEFT[0]= left->mx;
2031 P_LEFT[1]= left->my;
2034 P_TOPRIGHT[0]= tr->mx;
2035 P_TOPRIGHT[1]= tr->my;
2037 last_mv[0][0]= s->block[index].mx;
2038 last_mv[0][1]= s->block[index].my;
2039 last_mv[1][0]= right->mx;
2040 last_mv[1][1]= right->my;
2041 last_mv[2][0]= bottom->mx;
2042 last_mv[2][1]= bottom->my;
2049 assert(s->m.me. stride == stride);
2050 assert(s->m.me.uvstride == uvstride);
2052 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2053 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2054 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2055 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2057 c->xmin = - x*block_w - 16+2;
2058 c->ymin = - y*block_w - 16+2;
2059 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2060 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2062 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2063 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2064 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2065 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2066 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2067 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2068 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2070 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2071 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2074 c->pred_x= P_LEFT[0];
2075 c->pred_y= P_LEFT[1];
2077 c->pred_x = P_MEDIAN[0];
2078 c->pred_y = P_MEDIAN[1];
2083 for(ref=0; ref<s->ref_frames; ref++){
2084 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2086 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2087 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2089 assert(ref_mx >= c->xmin);
2090 assert(ref_mx <= c->xmax);
2091 assert(ref_my >= c->ymin);
2092 assert(ref_my <= c->ymax);
2094 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2095 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2096 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2097 if(s->ref_mvs[ref]){
2098 s->ref_mvs[ref][index][0]= ref_mx;
2099 s->ref_mvs[ref][index][1]= ref_my;
2100 s->ref_scores[ref][index]= ref_score;
2102 if(score > ref_score){
2109 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2113 pc.bytestream_start=
2114 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2115 memcpy(p_state, s->block_state, sizeof(s->block_state));
2117 if(level!=s->block_max_depth)
2118 put_rac(&pc, &p_state[4 + s_context], 1);
2119 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2120 if(s->ref_frames > 1)
2121 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2122 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2123 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2124 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2125 p_len= pc.bytestream - pc.bytestream_start;
2126 score += (s->lambda2*(p_len*8
2127 + (pc.outstanding_count - s->c.outstanding_count)*8
2128 + (-av_log2(pc.range) + av_log2(s->c.range))
2129 ))>>FF_LAMBDA_SHIFT;
2131 block_s= block_w*block_w;
2132 sum = pix_sum(current_data[0], stride, block_w);
2133 l= (sum + block_s/2)/block_s;
2134 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2136 block_s= block_w*block_w>>2;
2137 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2138 cb= (sum + block_s/2)/block_s;
2139 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2140 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2141 cr= (sum + block_s/2)/block_s;
2142 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2145 ic.bytestream_start=
2146 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2147 memcpy(i_state, s->block_state, sizeof(s->block_state));
2148 if(level!=s->block_max_depth)
2149 put_rac(&ic, &i_state[4 + s_context], 1);
2150 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2151 put_symbol(&ic, &i_state[32], l-pl , 1);
2152 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2153 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2154 i_len= ic.bytestream - ic.bytestream_start;
2155 iscore += (s->lambda2*(i_len*8
2156 + (ic.outstanding_count - s->c.outstanding_count)*8
2157 + (-av_log2(ic.range) + av_log2(s->c.range))
2158 ))>>FF_LAMBDA_SHIFT;
2160 // assert(score==256*256*256*64-1);
2161 assert(iscore < 255*255*256 + s->lambda2*10);
2162 assert(iscore >= 0);
2163 assert(l>=0 && l<=255);
2164 assert(pl>=0 && pl<=255);
2167 int varc= iscore >> 8;
2168 int vard= score >> 8;
2169 if (vard <= 64 || vard < varc)
2170 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2172 c->scene_change_score+= s->m.qscale;
2175 if(level!=s->block_max_depth){
2176 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2177 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2178 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2179 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2180 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2181 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2183 if(score2 < score && score2 < iscore)
2188 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2189 memcpy(pbbak, i_buffer, i_len);
2191 s->c.bytestream_start= pbbak_start;
2192 s->c.bytestream= pbbak + i_len;
2193 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2194 memcpy(s->block_state, i_state, sizeof(s->block_state));
2197 memcpy(pbbak, p_buffer, p_len);
2199 s->c.bytestream_start= pbbak_start;
2200 s->c.bytestream= pbbak + p_len;
2201 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2202 memcpy(s->block_state, p_state, sizeof(s->block_state));
2207 static always_inline int same_block(BlockNode *a, BlockNode *b){
2208 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2209 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2211 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2215 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2216 const int w= s->b_width << s->block_max_depth;
2217 const int rem_depth= s->block_max_depth - level;
2218 const int index= (x + y*w) << rem_depth;
2219 int trx= (x+1)<<rem_depth;
2220 BlockNode *b= &s->block[index];
2221 BlockNode *left = x ? &s->block[index-1] : &null_block;
2222 BlockNode *top = y ? &s->block[index-w] : &null_block;
2223 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2224 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2225 int pl = left->color[0];
2226 int pcb= left->color[1];
2227 int pcr= left->color[2];
2229 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2230 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2231 int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2232 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2235 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2239 if(level!=s->block_max_depth){
2240 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2241 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2243 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2244 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2245 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2246 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2247 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2251 if(b->type & BLOCK_INTRA){
2252 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2253 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2254 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2255 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2256 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2257 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2259 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2260 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2261 if(s->ref_frames > 1)
2262 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2263 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2264 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2265 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2269 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2270 const int w= s->b_width << s->block_max_depth;
2271 const int rem_depth= s->block_max_depth - level;
2272 const int index= (x + y*w) << rem_depth;
2273 int trx= (x+1)<<rem_depth;
2274 BlockNode *left = x ? &s->block[index-1] : &null_block;
2275 BlockNode *top = y ? &s->block[index-w] : &null_block;
2276 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2277 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2278 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2281 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2285 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2287 int l = left->color[0];
2288 int cb= left->color[1];
2289 int cr= left->color[2];
2290 int mx= mid_pred(left->mx, top->mx, tr->mx);
2291 int my= mid_pred(left->my, top->my, tr->my);
2293 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2294 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2295 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2297 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2300 pred_mv(s, &mx, &my, 0, left, top, tr);
2301 l += get_symbol(&s->c, &s->block_state[32], 1);
2302 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2303 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2305 if(s->ref_frames > 1)
2306 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2307 pred_mv(s, &mx, &my, ref, left, top, tr);
2308 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2309 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2311 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2313 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2314 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2315 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2316 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2320 static void encode_blocks(SnowContext *s, int search){
2325 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2329 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2330 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2334 if(s->avctx->me_method == ME_ITER || !search)
2335 encode_q_branch2(s, 0, x, y);
2337 encode_q_branch (s, 0, x, y);
2342 static void decode_blocks(SnowContext *s){
2349 decode_q_branch(s, 0, x, y);
2354 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2357 for(y=0; y < b_h+5; y++){
2358 for(x=0; x < b_w; x++){
2365 // int am= 9*(a1+a2) - (a0+a3);
2366 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2367 // int am= 18*(a2+a3) - 2*(a1+a4);
2368 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2369 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2371 // if(b_w==16) am= 8*(a1+a2);
2373 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2374 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2376 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2377 if(am&(~255)) am= ~(am>>31);
2381 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2382 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2383 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2384 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2389 tmp -= (b_h+5)*stride;
2391 for(y=0; y < b_h; y++){
2392 for(x=0; x < b_w; x++){
2393 int a0= tmp[x + 0*stride];
2394 int a1= tmp[x + 1*stride];
2395 int a2= tmp[x + 2*stride];
2396 int a3= tmp[x + 3*stride];
2397 int a4= tmp[x + 4*stride];
2398 int a5= tmp[x + 5*stride];
2399 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2400 // int am= 18*(a2+a3) - 2*(a1+a4);
2401 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2402 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2404 // if(b_w==16) am= 8*(a1+a2);
2406 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2407 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2409 if(am&(~255)) am= ~(am>>31);
2412 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2413 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2414 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2415 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2420 STOP_TIMER("mc_block")
2423 #define mca(dx,dy,b_w)\
2424 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2425 uint8_t tmp[stride*(b_w+5)];\
2427 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2439 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2440 if(block->type & BLOCK_INTRA){
2442 const int color = block->color[plane_index];
2443 const int color4= color*0x01010101;
2445 for(y=0; y < b_h; y++){
2446 *(uint32_t*)&dst[0 + y*stride]= color4;
2447 *(uint32_t*)&dst[4 + y*stride]= color4;
2448 *(uint32_t*)&dst[8 + y*stride]= color4;
2449 *(uint32_t*)&dst[12+ y*stride]= color4;
2450 *(uint32_t*)&dst[16+ y*stride]= color4;
2451 *(uint32_t*)&dst[20+ y*stride]= color4;
2452 *(uint32_t*)&dst[24+ y*stride]= color4;
2453 *(uint32_t*)&dst[28+ y*stride]= color4;
2456 for(y=0; y < b_h; y++){
2457 *(uint32_t*)&dst[0 + y*stride]= color4;
2458 *(uint32_t*)&dst[4 + y*stride]= color4;
2459 *(uint32_t*)&dst[8 + y*stride]= color4;
2460 *(uint32_t*)&dst[12+ y*stride]= color4;
2463 for(y=0; y < b_h; y++){
2464 *(uint32_t*)&dst[0 + y*stride]= color4;
2465 *(uint32_t*)&dst[4 + y*stride]= color4;
2468 for(y=0; y < b_h; y++){
2469 *(uint32_t*)&dst[0 + y*stride]= color4;
2472 for(y=0; y < b_h; y++){
2473 for(x=0; x < b_w; x++){
2474 dst[x + y*stride]= color;
2479 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2480 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2481 int mx= block->mx*scale;
2482 int my= block->my*scale;
2483 const int dx= mx&15;
2484 const int dy= my&15;
2485 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2488 src += sx + sy*stride;
2489 if( (unsigned)sx >= w - b_w - 4
2490 || (unsigned)sy >= h - b_h - 4){
2491 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2494 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2495 // assert(!(b_w&(b_w-1)));
2496 assert(b_w>1 && b_h>1);
2497 assert(tab_index>=0 && tab_index<4 || b_w==32);
2498 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2499 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2502 for(y=0; y<b_h; y+=16){
2503 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2504 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2507 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2508 else if(b_w==2*b_h){
2509 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2510 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2519 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2520 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2523 for(y=0; y<b_h; y++){
2524 //FIXME ugly missue of obmc_stride
2525 uint8_t *obmc1= obmc + y*obmc_stride;
2526 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2527 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2528 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2529 dst = slice_buffer_get_line(sb, src_y + y);
2530 for(x=0; x<b_w; x++){
2531 int v= obmc1[x] * block[3][x + y*src_stride]
2532 +obmc2[x] * block[2][x + y*src_stride]
2533 +obmc3[x] * block[1][x + y*src_stride]
2534 +obmc4[x] * block[0][x + y*src_stride];
2536 v <<= 8 - LOG2_OBMC_MAX;
2538 v += 1<<(7 - FRAC_BITS);
2539 v >>= 8 - FRAC_BITS;
2542 v += dst[x + src_x];
2543 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2544 if(v&(~255)) v= ~(v>>31);
2545 dst8[x + y*src_stride] = v;
2547 dst[x + src_x] -= v;
2553 //FIXME name clenup (b_w, block_w, b_width stuff)
2554 static always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2555 const int b_width = s->b_width << s->block_max_depth;
2556 const int b_height= s->b_height << s->block_max_depth;
2557 const int b_stride= b_width;
2558 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2559 BlockNode *rt= lt+1;
2560 BlockNode *lb= lt+b_stride;
2561 BlockNode *rb= lb+1;
2563 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2564 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2571 }else if(b_x + 1 >= b_width){
2578 }else if(b_y + 1 >= b_height){
2583 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2586 if(!sliced && !offset_dst)
2589 }else if(src_x + b_w > w){
2593 obmc -= src_y*obmc_stride;
2595 if(!sliced && !offset_dst)
2596 dst -= src_y*dst_stride;
2598 }else if(src_y + b_h> h){
2602 if(b_w<=0 || b_h<=0) return;
2604 assert(src_stride > 2*MB_SIZE + 5);
2605 if(!sliced && offset_dst)
2606 dst += src_x + src_y*dst_stride;
2607 dst8+= src_x + src_y*src_stride;
2608 // src += src_x + src_y*src_stride;
2610 ptmp= tmp + 3*tmp_step;
2613 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2615 if(same_block(lt, rt)){
2620 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2623 if(same_block(lt, lb)){
2625 }else if(same_block(rt, lb)){
2630 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2633 if(same_block(lt, rb) ){
2635 }else if(same_block(rt, rb)){
2637 }else if(same_block(lb, rb)){
2641 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2644 for(y=0; y<b_h; y++){
2645 for(x=0; x<b_w; x++){
2646 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2647 if(add) dst[x + y*dst_stride] += v;
2648 else dst[x + y*dst_stride] -= v;
2651 for(y=0; y<b_h; y++){
2652 uint8_t *obmc2= obmc + (obmc_stride>>1);
2653 for(x=0; x<b_w; x++){
2654 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2655 if(add) dst[x + y*dst_stride] += v;
2656 else dst[x + y*dst_stride] -= v;
2659 for(y=0; y<b_h; y++){
2660 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2661 for(x=0; x<b_w; x++){
2662 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2663 if(add) dst[x + y*dst_stride] += v;
2664 else dst[x + y*dst_stride] -= v;
2667 for(y=0; y<b_h; y++){
2668 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2669 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2670 for(x=0; x<b_w; x++){
2671 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2672 if(add) dst[x + y*dst_stride] += v;
2673 else dst[x + y*dst_stride] -= v;
2680 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2681 STOP_TIMER("inner_add_yblock")
2683 for(y=0; y<b_h; y++){
2684 //FIXME ugly missue of obmc_stride
2685 uint8_t *obmc1= obmc + y*obmc_stride;
2686 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2687 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2688 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2689 for(x=0; x<b_w; x++){
2690 int v= obmc1[x] * block[3][x + y*src_stride]
2691 +obmc2[x] * block[2][x + y*src_stride]
2692 +obmc3[x] * block[1][x + y*src_stride]
2693 +obmc4[x] * block[0][x + y*src_stride];
2695 v <<= 8 - LOG2_OBMC_MAX;
2697 v += 1<<(7 - FRAC_BITS);
2698 v >>= 8 - FRAC_BITS;
2701 v += dst[x + y*dst_stride];
2702 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2703 if(v&(~255)) v= ~(v>>31);
2704 dst8[x + y*src_stride] = v;
2706 dst[x + y*dst_stride] -= v;
2713 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2714 Plane *p= &s->plane[plane_index];
2715 const int mb_w= s->b_width << s->block_max_depth;
2716 const int mb_h= s->b_height << s->block_max_depth;
2718 int block_size = MB_SIZE >> s->block_max_depth;
2719 int block_w = plane_index ? block_size/2 : block_size;
2720 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2721 int obmc_stride= plane_index ? block_size : 2*block_size;
2722 int ref_stride= s->current_picture.linesize[plane_index];
2723 uint8_t *dst8= s->current_picture.data[plane_index];
2728 if(s->keyframe || (s->avctx->debug&512)){
2733 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2735 // DWTELEM * line = slice_buffer_get_line(sb, y);
2736 DWTELEM * line = sb->line[y];
2739 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2740 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2742 if(v&(~255)) v= ~(v>>31);
2743 dst8[x + y*ref_stride]= v;
2747 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2749 // DWTELEM * line = slice_buffer_get_line(sb, y);
2750 DWTELEM * line = sb->line[y];
2753 line[x] -= 128 << FRAC_BITS;
2754 // buf[x + y*w]-= 128<<FRAC_BITS;
2762 for(mb_x=0; mb_x<=mb_w; mb_x++){
2765 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2766 block_w*mb_x - block_w/2,
2767 block_w*mb_y - block_w/2,
2770 w, ref_stride, obmc_stride,
2772 add, 0, plane_index);
2774 STOP_TIMER("add_yblock")
2777 STOP_TIMER("predict_slice")
2780 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2781 Plane *p= &s->plane[plane_index];
2782 const int mb_w= s->b_width << s->block_max_depth;
2783 const int mb_h= s->b_height << s->block_max_depth;
2785 int block_size = MB_SIZE >> s->block_max_depth;
2786 int block_w = plane_index ? block_size/2 : block_size;
2787 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2788 const int obmc_stride= plane_index ? block_size : 2*block_size;
2789 int ref_stride= s->current_picture.linesize[plane_index];
2790 uint8_t *dst8= s->current_picture.data[plane_index];
2795 if(s->keyframe || (s->avctx->debug&512)){
2800 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2802 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2804 if(v&(~255)) v= ~(v>>31);
2805 dst8[x + y*ref_stride]= v;
2809 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2811 buf[x + y*w]-= 128<<FRAC_BITS;
2819 for(mb_x=0; mb_x<=mb_w; mb_x++){
2822 add_yblock(s, 0, NULL, buf, dst8, obmc,
2823 block_w*mb_x - block_w/2,
2824 block_w*mb_y - block_w/2,
2827 w, ref_stride, obmc_stride,
2829 add, 1, plane_index);
2831 STOP_TIMER("add_yblock")
2834 STOP_TIMER("predict_slice")
2837 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2838 const int mb_h= s->b_height << s->block_max_depth;
2840 for(mb_y=0; mb_y<=mb_h; mb_y++)
2841 predict_slice(s, buf, plane_index, add, mb_y);
2844 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2846 Plane *p= &s->plane[plane_index];
2847 const int block_size = MB_SIZE >> s->block_max_depth;
2848 const int block_w = plane_index ? block_size/2 : block_size;
2849 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2850 const int obmc_stride= plane_index ? block_size : 2*block_size;
2851 const int ref_stride= s->current_picture.linesize[plane_index];
2852 uint8_t *src= s-> input_picture.data[plane_index];
2853 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2854 const int b_stride = s->b_width << s->block_max_depth;
2855 const int w= p->width;
2856 const int h= p->height;
2857 int index= mb_x + mb_y*b_stride;
2858 BlockNode *b= &s->block[index];
2859 BlockNode backup= *b;
2863 b->type|= BLOCK_INTRA;
2864 b->color[plane_index]= 0;
2865 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2868 int mb_x2= mb_x + (i &1) - 1;
2869 int mb_y2= mb_y + (i>>1) - 1;
2870 int x= block_w*mb_x2 + block_w/2;
2871 int y= block_w*mb_y2 + block_w/2;
2873 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2874 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2876 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2877 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2878 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2879 int obmc_v= obmc[index];
2881 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2882 if(x<0) obmc_v += obmc[index + block_w];
2883 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2884 if(x+block_w>w) obmc_v += obmc[index - block_w];
2885 //FIXME precalc this or simplify it somehow else
2887 d = -dst[index] + (1<<(FRAC_BITS-1));
2889 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2890 aa += obmc_v * obmc_v; //FIXME precalclate this
2896 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2899 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2900 const int b_stride = s->b_width << s->block_max_depth;
2901 const int b_height = s->b_height<< s->block_max_depth;
2902 int index= x + y*b_stride;
2903 BlockNode *b = &s->block[index];
2904 BlockNode *left = x ? &s->block[index-1] : &null_block;
2905 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2906 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2907 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2909 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
2910 // int my_context= av_log2(2*ABS(left->my - top->my));
2912 if(x<0 || x>=b_stride || y>=b_height)
2919 00001XXXX 15-30 8-15
2921 //FIXME try accurate rate
2922 //FIXME intra and inter predictors if surrounding blocks arent the same type
2923 if(b->type & BLOCK_INTRA){
2924 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
2925 + av_log2(2*ABS(left->color[1] - b->color[1]))
2926 + av_log2(2*ABS(left->color[2] - b->color[2])));
2928 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2931 return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
2932 + av_log2(2*ABS(dmy))
2933 + av_log2(2*b->ref));
2937 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2938 Plane *p= &s->plane[plane_index];
2939 const int block_size = MB_SIZE >> s->block_max_depth;
2940 const int block_w = plane_index ? block_size/2 : block_size;
2941 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2942 const int obmc_stride= plane_index ? block_size : 2*block_size;
2943 const int ref_stride= s->current_picture.linesize[plane_index];
2944 uint8_t *dst= s->current_picture.data[plane_index];
2945 uint8_t *src= s-> input_picture.data[plane_index];
2946 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2947 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2948 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2949 const int b_stride = s->b_width << s->block_max_depth;
2950 const int b_height = s->b_height<< s->block_max_depth;
2951 const int w= p->width;
2952 const int h= p->height;
2955 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2956 int sx= block_w*mb_x - block_w/2;
2957 int sy= block_w*mb_y - block_w/2;
2958 int x0= FFMAX(0,-sx);
2959 int y0= FFMAX(0,-sy);
2960 int x1= FFMIN(block_w*2, w-sx);
2961 int y1= FFMIN(block_w*2, h-sy);
2964 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2966 for(y=y0; y<y1; y++){
2967 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2968 const DWTELEM *pred1 = pred + y*obmc_stride;
2969 uint8_t *cur1 = cur + y*ref_stride;
2970 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2971 for(x=x0; x<x1; x++){
2972 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2973 v = (v + pred1[x]) >> FRAC_BITS;
2974 if(v&(~255)) v= ~(v>>31);
2979 /* copy the regions where obmc[] = (uint8_t)256 */
2980 if(LOG2_OBMC_MAX == 8
2981 && (mb_x == 0 || mb_x == b_stride-1)
2982 && (mb_y == 0 || mb_y == b_height-1)){
2991 for(y=y0; y<y1; y++)
2992 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2996 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2997 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2998 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2999 * so improving the score of one block is not strictly guaranteed to
3000 * improve the score of the whole frame, so iterative motion est
3001 * doesn't always converge. */
3002 if(s->avctx->me_cmp == FF_CMP_W97)
3003 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3004 else if(s->avctx->me_cmp == FF_CMP_W53)
3005 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3009 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3010 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3015 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3024 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3026 if(mb_x == b_stride-2)
3027 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3029 return distortion + rate*penalty_factor;
3032 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3034 Plane *p= &s->plane[plane_index];
3035 const int block_size = MB_SIZE >> s->block_max_depth;
3036 const int block_w = plane_index ? block_size/2 : block_size;
3037 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3038 const int obmc_stride= plane_index ? block_size : 2*block_size;
3039 const int ref_stride= s->current_picture.linesize[plane_index];
3040 uint8_t *dst= s->current_picture.data[plane_index];
3041 uint8_t *src= s-> input_picture.data[plane_index];
3042 static const DWTELEM zero_dst[4096]; //FIXME
3043 const int b_stride = s->b_width << s->block_max_depth;
3044 const int b_height = s->b_height<< s->block_max_depth;
3045 const int w= p->width;
3046 const int h= p->height;
3049 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3052 int mb_x2= mb_x + (i%3) - 1;
3053 int mb_y2= mb_y + (i/3) - 1;
3054 int x= block_w*mb_x2 + block_w/2;
3055 int y= block_w*mb_y2 + block_w/2;
3057 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3058 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3060 //FIXME find a cleaner/simpler way to skip the outside stuff
3061 for(y2= y; y2<0; y2++)
3062 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3063 for(y2= h; y2<y+block_w; y2++)
3064 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3066 for(y2= y; y2<y+block_w; y2++)
3067 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3070 for(y2= y; y2<y+block_w; y2++)
3071 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3074 assert(block_w== 8 || block_w==16);
3075 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3079 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3080 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3088 rate = get_block_bits(s, mb_x, mb_y, 2);
3089 for(i=merged?4:0; i<9; i++){
3090 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3091 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3094 return distortion + rate*penalty_factor;
3097 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3098 const int b_stride= s->b_width << s->block_max_depth;
3099 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3100 BlockNode backup= *block;
3101 int rd, index, value;
3103 assert(mb_x>=0 && mb_y>=0);
3104 assert(mb_x<b_stride);
3107 block->color[0] = p[0];
3108 block->color[1] = p[1];
3109 block->color[2] = p[2];
3110 block->type |= BLOCK_INTRA;
3112 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3113 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3114 if(s->me_cache[index] == value)
3116 s->me_cache[index]= value;
3120 block->type &= ~BLOCK_INTRA;
3123 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3135 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3136 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3137 int p[2] = {p0, p1};
3138 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3141 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3142 const int b_stride= s->b_width << s->block_max_depth;
3143 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3144 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3145 int rd, index, value;
3147 assert(mb_x>=0 && mb_y>=0);
3148 assert(mb_x<b_stride);
3149 assert(((mb_x|mb_y)&1) == 0);
3151 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3152 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3153 if(s->me_cache[index] == value)
3155 s->me_cache[index]= value;
3160 block->type &= ~BLOCK_INTRA;
3161 block[1]= block[b_stride]= block[b_stride+1]= *block;
3163 rd= get_4block_rd(s, mb_x, mb_y, 0);
3170 block[0]= backup[0];
3171 block[1]= backup[1];
3172 block[b_stride]= backup[2];
3173 block[b_stride+1]= backup[3];
3178 static void iterative_me(SnowContext *s){
3179 int pass, mb_x, mb_y;
3180 const int b_width = s->b_width << s->block_max_depth;
3181 const int b_height= s->b_height << s->block_max_depth;
3182 const int b_stride= b_width;
3186 RangeCoder r = s->c;
3187 uint8_t state[sizeof(s->block_state)];
3188 memcpy(state, s->block_state, sizeof(s->block_state));
3189 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3190 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3191 encode_q_branch(s, 0, mb_x, mb_y);
3193 memcpy(s->block_state, state, sizeof(s->block_state));
3196 for(pass=0; pass<25; pass++){
3199 for(mb_y= 0; mb_y<b_height; mb_y++){
3200 for(mb_x= 0; mb_x<b_width; mb_x++){
3201 int dia_change, i, j, ref;
3202 int best_rd= INT_MAX, ref_rd;
3203 BlockNode backup, ref_b;
3204 const int index= mb_x + mb_y * b_stride;
3205 BlockNode *block= &s->block[index];
3206 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3207 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3208 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3209 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3210 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3211 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3212 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3213 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3214 const int b_w= (MB_SIZE >> s->block_max_depth);
3215 uint8_t obmc_edged[b_w*2][b_w*2];
3217 if(pass && (block->type & BLOCK_OPT))
3219 block->type |= BLOCK_OPT;
3223 if(!s->me_cache_generation)
3224 memset(s->me_cache, 0, sizeof(s->me_cache));
3225 s->me_cache_generation += 1<<22;
3230 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3232 for(y=0; y<b_w*2; y++)
3233 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3234 if(mb_x==b_stride-1)
3235 for(y=0; y<b_w*2; y++)
3236 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3238 for(x=0; x<b_w*2; x++)
3239 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3240 for(y=1; y<b_w; y++)
3241 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3243 if(mb_y==b_height-1){
3244 for(x=0; x<b_w*2; x++)
3245 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3246 for(y=b_w; y<b_w*2-1; y++)
3247 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3251 //skip stuff outside the picture
3252 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3254 uint8_t *src= s-> input_picture.data[0];
3255 uint8_t *dst= s->current_picture.data[0];
3256 const int stride= s->current_picture.linesize[0];
3257 const int block_w= MB_SIZE >> s->block_max_depth;
3258 const int sx= block_w*mb_x - block_w/2;
3259 const int sy= block_w*mb_y - block_w/2;
3260 const int w= s->plane[0].width;
3261 const int h= s->plane[0].height;
3265 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3266 for(y=h; y<sy+block_w*2; y++)
3267 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3269 for(y=sy; y<sy+block_w*2; y++)
3270 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3272 if(sx+block_w*2 > w){
3273 for(y=sy; y<sy+block_w*2; y++)
3274 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3278 // intra(black) = neighbors' contribution to the current block
3280 color[i]= get_dc(s, mb_x, mb_y, i);
3282 // get previous score (cant be cached due to OBMC)
3283 if(pass > 0 && (block->type&BLOCK_INTRA)){
3284 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3285 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3287 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3291 for(ref=0; ref < s->ref_frames; ref++){
3292 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3293 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3298 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3299 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3301 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3303 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3305 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3307 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3310 //FIXME avoid subpel interpol / round to nearest integer
3313 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3315 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3316 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3317 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3318 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3324 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3327 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3329 //FIXME or try the standard 2 pass qpel or similar
3331 mvr[0][0]= block->mx;
3332 mvr[0][1]= block->my;
3333 if(ref_rd > best_rd){
3341 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3342 //FIXME RD style color selection
3344 if(!same_block(block, &backup)){
3345 if(tb ) tb ->type &= ~BLOCK_OPT;
3346 if(lb ) lb ->type &= ~BLOCK_OPT;
3347 if(rb ) rb ->type &= ~BLOCK_OPT;
3348 if(bb ) bb ->type &= ~BLOCK_OPT;
3349 if(tlb) tlb->type &= ~BLOCK_OPT;
3350 if(trb) trb->type &= ~BLOCK_OPT;
3351 if(blb) blb->type &= ~BLOCK_OPT;
3352 if(brb) brb->type &= ~BLOCK_OPT;
3357 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3362 if(s->block_max_depth == 1){
3364 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3365 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3367 int best_rd, init_rd;
3368 const int index= mb_x + mb_y * b_stride;
3371 b[0]= &s->block[index];
3373 b[2]= b[0]+b_stride;
3375 if(same_block(b[0], b[1]) &&
3376 same_block(b[0], b[2]) &&
3377 same_block(b[0], b[3]))
3380 if(!s->me_cache_generation)
3381 memset(s->me_cache, 0, sizeof(s->me_cache));
3382 s->me_cache_generation += 1<<22;
3384 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3386 //FIXME more multiref search?
3387 check_4block_inter(s, mb_x, mb_y,
3388 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3389 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3392 if(!(b[i]->type&BLOCK_INTRA))
3393 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3395 if(init_rd != best_rd)
3399 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3403 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3404 const int level= b->level;
3405 const int w= b->width;
3406 const int h= b->height;
3407 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3408 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3409 int x,y, thres1, thres2;
3412 if(s->qlog == LOSSLESS_QLOG) return;
3414 bias= bias ? 0 : (3*qmul)>>3;
3415 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3421 int i= src[x + y*stride];
3423 if((unsigned)(i+thres1) > thres2){
3426 i/= qmul; //FIXME optimize
3427 src[x + y*stride]= i;
3431 i/= qmul; //FIXME optimize
3432 src[x + y*stride]= -i;
3435 src[x + y*stride]= 0;
3441 int i= src[x + y*stride];
3443 if((unsigned)(i+thres1) > thres2){
3446 i= (i + bias) / qmul; //FIXME optimize
3447 src[x + y*stride]= i;
3451 i= (i + bias) / qmul; //FIXME optimize
3452 src[x + y*stride]= -i;
3455 src[x + y*stride]= 0;
3459 if(level+1 == s->spatial_decomposition_count){
3460 // STOP_TIMER("quantize")
3464 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3465 const int w= b->width;
3466 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3467 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3468 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3472 if(s->qlog == LOSSLESS_QLOG) return;
3474 for(y=start_y; y<end_y; y++){
3475 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3476 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3480 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3482 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3486 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3487 STOP_TIMER("dquant")
3491 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3492 const int w= b->width;
3493 const int h= b->height;
3494 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3495 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3496 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3500 if(s->qlog == LOSSLESS_QLOG) return;
3504 int i= src[x + y*stride];
3506 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3508 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3512 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3513 STOP_TIMER("dquant")
3517 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3518 const int w= b->width;
3519 const int h= b->height;
3522 for(y=h-1; y>=0; y--){
3523 for(x=w-1; x>=0; x--){
3524 int i= x + y*stride;
3528 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3529 else src[i] -= src[i - 1];
3531 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3532 else src[i] -= src[i - 1];
3535 if(y) src[i] -= src[i - stride];
3541 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3542 const int w= b->width;
3551 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3553 for(y=start_y; y<end_y; y++){
3555 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3556 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3560 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3561 else line[x] += line[x - 1];
3563 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3564 else line[x] += line[x - 1];
3567 if(y) line[x] += prev[x];
3572 // STOP_TIMER("correlate")
3575 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3576 const int w= b->width;
3577 const int h= b->height;
3582 int i= x + y*stride;
3586 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3587 else src[i] += src[i - 1];
3589 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3590 else src[i] += src[i - 1];
3593 if(y) src[i] += src[i - stride];
3599 static void encode_header(SnowContext *s){
3600 int plane_index, level, orientation;
3603 memset(kstate, MID_STATE, sizeof(kstate));
3605 put_rac(&s->c, kstate, s->keyframe);
3606 if(s->keyframe || s->always_reset)
3609 put_symbol(&s->c, s->header_state, s->version, 0);
3610 put_rac(&s->c, s->header_state, s->always_reset);
3611 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3612 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3613 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3614 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3615 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3616 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3617 put_rac(&s->c, s->header_state, s->spatial_scalability);
3618 // put_rac(&s->c, s->header_state, s->rate_scalability);
3619 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3621 for(plane_index=0; plane_index<2; plane_index++){
3622 for(level=0; level<s->spatial_decomposition_count; level++){
3623 for(orientation=level ? 1:0; orientation<4; orientation++){
3624 if(orientation==2) continue;
3625 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3630 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3631 put_symbol(&s->c, s->header_state, s->qlog, 1);
3632 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3633 put_symbol(&s->c, s->header_state, s->qbias, 1);
3634 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3637 static int decode_header(SnowContext *s){
3638 int plane_index, level, orientation;
3641 memset(kstate, MID_STATE, sizeof(kstate));
3643 s->keyframe= get_rac(&s->c, kstate);
3644 if(s->keyframe || s->always_reset)
3647 s->version= get_symbol(&s->c, s->header_state, 0);
3649 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3652 s->always_reset= get_rac(&s->c, s->header_state);
3653 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3654 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3655 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3656 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3657 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3658 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3659 s->spatial_scalability= get_rac(&s->c, s->header_state);
3660 // s->rate_scalability= get_rac(&s->c, s->header_state);
3661 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3663 for(plane_index=0; plane_index<3; plane_index++){
3664 for(level=0; level<s->spatial_decomposition_count; level++){
3665 for(orientation=level ? 1:0; orientation<4; orientation++){
3667 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3668 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3669 else q= get_symbol(&s->c, s->header_state, 1);
3670 s->plane[plane_index].band[level][orientation].qlog= q;
3676 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3677 if(s->spatial_decomposition_type > 2){
3678 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3682 s->qlog= get_symbol(&s->c, s->header_state, 1);
3683 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3684 s->qbias= get_symbol(&s->c, s->header_state, 1);
3685 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3686 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3687 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3688 s->block_max_depth= 0;
3695 static void init_qexp(void){
3699 for(i=0; i<QROOT; i++){
3701 v *= pow(2, 1.0 / QROOT);
3705 static int common_init(AVCodecContext *avctx){
3706 SnowContext *s = avctx->priv_data;
3708 int level, orientation, plane_index, dec;
3713 dsputil_init(&s->dsp, avctx);
3716 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3717 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3718 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3719 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3720 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3721 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3740 #define mcfh(dx,dy)\
3741 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3742 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3743 mc_block_hpel ## dx ## dy ## 16;\
3744 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3745 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3746 mc_block_hpel ## dx ## dy ## 8;
3756 dec= s->spatial_decomposition_count= 5;
3757 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3759 s->chroma_h_shift= 1; //FIXME XXX
3760 s->chroma_v_shift= 1;
3762 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3764 width= s->avctx->width;
3765 height= s->avctx->height;
3767 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3769 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3770 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3772 for(plane_index=0; plane_index<3; plane_index++){
3773 int w= s->avctx->width;
3774 int h= s->avctx->height;
3777 w>>= s->chroma_h_shift;
3778 h>>= s->chroma_v_shift;
3780 s->plane[plane_index].width = w;
3781 s->plane[plane_index].height= h;
3782 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3783 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3784 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3785 SubBand *b= &s->plane[plane_index].band[level][orientation];
3787 b->buf= s->spatial_dwt_buffer;
3789 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3790 b->width = (w + !(orientation&1))>>1;
3791 b->height= (h + !(orientation>1))>>1;
3793 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3794 b->buf_x_offset = 0;
3795 b->buf_y_offset = 0;
3799 b->buf_x_offset = (w+1)>>1;
3802 b->buf += b->stride>>1;
3803 b->buf_y_offset = b->stride_line >> 1;
3807 b->parent= &s->plane[plane_index].band[level-1][orientation];
3808 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3815 for(i=0; i<MAX_REF_FRAMES; i++)
3816 for(j=0; j<MAX_REF_FRAMES; j++)
3817 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3821 width= s->width= avctx->width;
3822 height= s->height= avctx->height;
3824 assert(width && height);
3826 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3831 static int qscale2qlog(int qscale){
3832 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3833 + 61*QROOT/8; //<64 >60
3836 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3838 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3839 * FIXME we know exact mv bits at this point,
3840 * but ratecontrol isn't set up to include them. */
3841 uint32_t coef_sum= 0;
3842 int level, orientation, delta_qlog;
3844 for(level=0; level<s->spatial_decomposition_count; level++){
3845 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3846 SubBand *b= &s->plane[0].band[level][orientation];
3847 DWTELEM *buf= b->buf;
3848 const int w= b->width;
3849 const int h= b->height;
3850 const int stride= b->stride;
3851 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3852 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3853 const int qdiv= (1<<16)/qmul;
3856 decorrelate(s, b, buf, stride, 1, 0);
3859 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3861 correlate(s, b, buf, stride, 1, 0);
3865 /* ugly, ratecontrol just takes a sqrt again */
3866 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3867 assert(coef_sum < INT_MAX);
3869 if(pict->pict_type == I_TYPE){
3870 s->m.current_picture.mb_var_sum= coef_sum;
3871 s->m.current_picture.mc_mb_var_sum= 0;
3873 s->m.current_picture.mc_mb_var_sum= coef_sum;
3874 s->m.current_picture.mb_var_sum= 0;
3877 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3878 if (pict->quality < 0)
3880 s->lambda= pict->quality * 3/2;
3881 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3882 s->qlog+= delta_qlog;
3886 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3887 int width = p->width;
3888 int height= p->height;
3889 int level, orientation, x, y;
3891 for(level=0; level<s->spatial_decomposition_count; level++){
3892 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3893 SubBand *b= &p->band[level][orientation];
3894 DWTELEM *buf= b->buf;
3897 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3898 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3899 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3900 for(y=0; y<height; y++){
3901 for(x=0; x<width; x++){
3902 int64_t d= s->spatial_dwt_buffer[x + y*width];
3907 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3908 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3913 static int encode_init(AVCodecContext *avctx)
3915 SnowContext *s = avctx->priv_data;
3918 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3919 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3920 "use vstrict=-2 / -strict -2 to use it anyway\n");
3924 if(avctx->prediction_method == DWT_97
3925 && (avctx->flags & CODEC_FLAG_QSCALE)
3926 && avctx->global_quality == 0){
3927 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3937 s->m.flags = avctx->flags;
3938 s->m.bit_rate= avctx->bit_rate;
3940 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3941 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3942 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3943 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3944 h263_encode_init(&s->m); //mv_penalty
3946 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3948 if(avctx->flags&CODEC_FLAG_PASS1){
3949 if(!avctx->stats_out)
3950 avctx->stats_out = av_mallocz(256);
3952 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3953 if(ff_rate_control_init(&s->m) < 0)
3956 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3958 for(plane_index=0; plane_index<3; plane_index++){
3959 calculate_vissual_weight(s, &s->plane[plane_index]);
3963 avctx->coded_frame= &s->current_picture;
3964 switch(avctx->pix_fmt){
3965 // case PIX_FMT_YUV444P:
3966 // case PIX_FMT_YUV422P:
3967 case PIX_FMT_YUV420P:
3969 // case PIX_FMT_YUV411P:
3970 // case PIX_FMT_YUV410P:
3971 s->colorspace_type= 0;
3973 /* case PIX_FMT_RGBA32:
3977 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3980 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3981 s->chroma_h_shift= 1;
3982 s->chroma_v_shift= 1;
3984 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3985 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3987 s->avctx->get_buffer(s->avctx, &s->input_picture);
3989 if(s->avctx->me_method == ME_ITER){
3991 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3992 for(i=0; i<s->max_ref_frames; i++){
3993 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3994 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4001 static int frame_start(SnowContext *s){
4003 int w= s->avctx->width; //FIXME round up to x16 ?
4004 int h= s->avctx->height;
4006 if(s->current_picture.data[0]){
4007 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4008 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4009 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4012 tmp= s->last_picture[s->max_ref_frames-1];
4013 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4014 s->last_picture[0]= s->current_picture;
4015 s->current_picture= tmp;
4021 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4022 if(i && s->last_picture[i-1].key_frame)
4027 s->current_picture.reference= 1;
4028 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4029 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4033 s->current_picture.key_frame= s->keyframe;
4038 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4039 SnowContext *s = avctx->priv_data;
4040 RangeCoder * const c= &s->c;
4041 AVFrame *pict = data;
4042 const int width= s->avctx->width;
4043 const int height= s->avctx->height;
4044 int level, orientation, plane_index, i, y;
4045 uint8_t rc_header_bak[sizeof(s->header_state)];
4046 uint8_t rc_block_bak[sizeof(s->block_state)];
4048 ff_init_range_encoder(c, buf, buf_size);
4049 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4053 for(y=0; y<(height>>shift); y++)
4054 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4055 &pict->data[i][y * pict->linesize[i]],
4058 s->new_picture = *pict;
4060 s->m.picture_number= avctx->frame_number;
4061 if(avctx->flags&CODEC_FLAG_PASS2){
4063 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4064 s->keyframe= pict->pict_type==FF_I_TYPE;
4065 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4066 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4067 if (pict->quality < 0)
4071 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4073 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4076 if(s->pass1_rc && avctx->frame_number == 0)
4077 pict->quality= 2*FF_QP2LAMBDA;
4079 s->qlog= qscale2qlog(pict->quality);
4080 s->lambda = pict->quality * 3/2;
4082 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4083 s->qlog= LOSSLESS_QLOG;
4085 }//else keep previous frame's qlog until after motion est
4089 s->m.current_picture_ptr= &s->m.current_picture;
4090 if(pict->pict_type == P_TYPE){
4091 int block_width = (width +15)>>4;
4092 int block_height= (height+15)>>4;
4093 int stride= s->current_picture.linesize[0];
4095 assert(s->current_picture.data[0]);
4096 assert(s->last_picture[0].data[0]);
4098 s->m.avctx= s->avctx;
4099 s->m.current_picture.data[0]= s->current_picture.data[0];
4100 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4101 s->m. new_picture.data[0]= s-> input_picture.data[0];
4102 s->m. last_picture_ptr= &s->m. last_picture;
4104 s->m. last_picture.linesize[0]=
4105 s->m. new_picture.linesize[0]=
4106 s->m.current_picture.linesize[0]= stride;
4107 s->m.uvlinesize= s->current_picture.linesize[1];
4109 s->m.height= height;
4110 s->m.mb_width = block_width;
4111 s->m.mb_height= block_height;
4112 s->m.mb_stride= s->m.mb_width+1;
4113 s->m.b8_stride= 2*s->m.mb_width+1;
4115 s->m.pict_type= pict->pict_type;
4116 s->m.me_method= s->avctx->me_method;
4117 s->m.me.scene_change_score=0;
4118 s->m.flags= s->avctx->flags;
4119 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4120 s->m.out_format= FMT_H263;
4121 s->m.unrestricted_mv= 1;
4123 s->m.lambda = s->lambda;
4124 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4125 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4127 s->m.dsp= s->dsp; //move
4133 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4134 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4139 s->m.pict_type = pict->pict_type;
4140 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4143 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4144 encode_blocks(s, 1);
4145 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4147 for(plane_index=0; plane_index<3; plane_index++){
4148 Plane *p= &s->plane[plane_index];
4152 // int bits= put_bits_count(&s->c.pb);
4154 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4156 if(pict->data[plane_index]) //FIXME gray hack
4159 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4162 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4165 && pict->pict_type == P_TYPE
4166 && !(avctx->flags&CODEC_FLAG_PASS2)
4167 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4168 ff_init_range_encoder(c, buf, buf_size);
4169 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4170 pict->pict_type= FF_I_TYPE;
4172 s->current_picture.key_frame=1;
4177 if(s->qlog == LOSSLESS_QLOG){
4180 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4185 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4187 if(s->pass1_rc && plane_index==0){
4188 int delta_qlog = ratecontrol_1pass(s, pict);
4192 //reordering qlog in the bitstream would eliminate this reset
4193 ff_init_range_encoder(c, buf, buf_size);
4194 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4195 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4197 encode_blocks(s, 0);
4201 for(level=0; level<s->spatial_decomposition_count; level++){
4202 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4203 SubBand *b= &p->band[level][orientation];
4205 quantize(s, b, b->buf, b->stride, s->qbias);
4207 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4208 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4209 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4211 correlate(s, b, b->buf, b->stride, 1, 0);
4214 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4216 for(level=0; level<s->spatial_decomposition_count; level++){
4217 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4218 SubBand *b= &p->band[level][orientation];
4220 dequantize(s, b, b->buf, b->stride);
4224 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4225 if(s->qlog == LOSSLESS_QLOG){
4228 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4233 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4234 STOP_TIMER("pred-conv")}
4237 if(pict->pict_type == I_TYPE){
4240 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4241 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4245 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4246 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4249 if(s->avctx->flags&CODEC_FLAG_PSNR){
4252 if(pict->data[plane_index]) //FIXME gray hack
4255 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4259 s->avctx->error[plane_index] += error;
4260 s->current_picture.error[plane_index] = error;
4264 if(s->last_picture[s->max_ref_frames-1].data[0])
4265 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4267 s->current_picture.coded_picture_number = avctx->frame_number;
4268 s->current_picture.pict_type = pict->pict_type;
4269 s->current_picture.quality = pict->quality;
4270 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4271 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4272 s->m.current_picture.display_picture_number =
4273 s->m.current_picture.coded_picture_number = avctx->frame_number;
4274 s->m.current_picture.quality = pict->quality;
4275 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4277 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4279 if(avctx->flags&CODEC_FLAG_PASS1)
4280 ff_write_pass1_stats(&s->m);
4281 s->m.last_pict_type = s->m.pict_type;
4285 return ff_rac_terminate(c);
4288 static void common_end(SnowContext *s){
4289 int plane_index, level, orientation, i;
4291 av_freep(&s->spatial_dwt_buffer);
4293 av_freep(&s->m.me.scratchpad);
4294 av_freep(&s->m.me.map);
4295 av_freep(&s->m.me.score_map);
4296 av_freep(&s->m.obmc_scratchpad);
4298 av_freep(&s->block);
4300 for(i=0; i<MAX_REF_FRAMES; i++){
4301 av_freep(&s->ref_mvs[i]);
4302 av_freep(&s->ref_scores[i]);
4303 if(s->last_picture[i].data[0])
4304 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4307 for(plane_index=0; plane_index<3; plane_index++){
4308 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4309 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4310 SubBand *b= &s->plane[plane_index].band[level][orientation];
4312 av_freep(&b->x_coeff);
4318 static int encode_end(AVCodecContext *avctx)
4320 SnowContext *s = avctx->priv_data;
4323 av_free(avctx->stats_out);
4328 static int decode_init(AVCodecContext *avctx)
4330 SnowContext *s = avctx->priv_data;
4333 avctx->pix_fmt= PIX_FMT_YUV420P;
4337 block_size = MB_SIZE >> s->block_max_depth;
4338 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4343 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4344 SnowContext *s = avctx->priv_data;
4345 RangeCoder * const c= &s->c;
4347 AVFrame *picture = data;
4348 int level, orientation, plane_index;
4350 ff_init_range_decoder(c, buf, buf_size);
4351 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4353 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4355 if(!s->block) alloc_blocks(s);
4358 //keyframe flag dupliaction mess FIXME
4359 if(avctx->debug&FF_DEBUG_PICT_INFO)
4360 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4364 for(plane_index=0; plane_index<3; plane_index++){
4365 Plane *p= &s->plane[plane_index];
4369 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4371 if(s->avctx->debug&2048){
4372 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4373 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4377 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4378 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4384 for(level=0; level<s->spatial_decomposition_count; level++){
4385 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4386 SubBand *b= &p->band[level][orientation];
4387 unpack_coeffs(s, b, b->parent, orientation);
4390 STOP_TIMER("unpack coeffs");
4394 const int mb_h= s->b_height << s->block_max_depth;
4395 const int block_size = MB_SIZE >> s->block_max_depth;
4396 const int block_w = plane_index ? block_size/2 : block_size;
4398 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4403 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4404 for(mb_y=0; mb_y<=mb_h; mb_y++){
4406 int slice_starty = block_w*mb_y;
4407 int slice_h = block_w*(mb_y+1);
4408 if (!(s->keyframe || s->avctx->debug&512)){
4409 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4410 slice_h -= (block_w >> 1);
4415 for(level=0; level<s->spatial_decomposition_count; level++){
4416 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4417 SubBand *b= &p->band[level][orientation];
4420 int our_mb_start = mb_y;
4421 int our_mb_end = (mb_y + 1);
4423 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4424 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4425 if (!(s->keyframe || s->avctx->debug&512)){
4426 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4427 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4429 start_y = FFMIN(b->height, start_y);
4430 end_y = FFMIN(b->height, end_y);
4432 if (start_y != end_y){
4433 if (orientation == 0){
4434 SubBand * correlate_band = &p->band[0][0];
4435 int correlate_end_y = FFMIN(b->height, end_y + 1);
4436 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4437 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4438 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4439 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4442 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4446 STOP_TIMER("decode_subband_slice");
4450 for(; yd<slice_h; yd+=4){
4451 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4453 STOP_TIMER("idwt slice");}
4456 if(s->qlog == LOSSLESS_QLOG){
4457 for(; yq<slice_h && yq<h; yq++){
4458 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4460 line[x] <<= FRAC_BITS;
4465 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4467 y = FFMIN(p->height, slice_starty);
4468 end_y = FFMIN(p->height, slice_h);
4470 slice_buffer_release(&s->sb, y++);
4473 slice_buffer_flush(&s->sb);
4475 STOP_TIMER("idwt + predict_slices")}
4480 if(s->last_picture[s->max_ref_frames-1].data[0])
4481 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4483 if(!(s->avctx->debug&2048))
4484 *picture= s->current_picture;
4486 *picture= s->mconly_picture;
4488 *data_size = sizeof(AVFrame);
4490 bytes_read= c->bytestream - c->bytestream_start;
4491 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4496 static int decode_end(AVCodecContext *avctx)
4498 SnowContext *s = avctx->priv_data;
4500 slice_buffer_destroy(&s->sb);
4507 AVCodec snow_decoder = {
4511 sizeof(SnowContext),
4516 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4520 #ifdef CONFIG_ENCODERS
4521 AVCodec snow_encoder = {
4525 sizeof(SnowContext),
4541 int buffer[2][width*height];
4544 s.spatial_decomposition_count=6;
4545 s.spatial_decomposition_type=1;
4547 printf("testing 5/3 DWT\n");
4548 for(i=0; i<width*height; i++)
4549 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4551 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4552 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4554 for(i=0; i<width*height; i++)
4555 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4557 printf("testing 9/7 DWT\n");
4558 s.spatial_decomposition_type=0;
4559 for(i=0; i<width*height; i++)
4560 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4562 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4563 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4565 for(i=0; i<width*height; i++)
4566 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4569 printf("testing AC coder\n");
4570 memset(s.header_state, 0, sizeof(s.header_state));
4571 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4572 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4574 for(i=-256; i<256; i++){
4576 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4577 STOP_TIMER("put_symbol")
4579 ff_rac_terminate(&s.c);
4581 memset(s.header_state, 0, sizeof(s.header_state));
4582 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4583 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4585 for(i=-256; i<256; i++){
4588 j= get_symbol(&s.c, s.header_state, 1);
4589 STOP_TIMER("get_symbol")
4590 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4594 int level, orientation, x, y;
4595 int64_t errors[8][4];
4598 memset(errors, 0, sizeof(errors));
4599 s.spatial_decomposition_count=3;
4600 s.spatial_decomposition_type=0;
4601 for(level=0; level<s.spatial_decomposition_count; level++){
4602 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4603 int w= width >> (s.spatial_decomposition_count-level);
4604 int h= height >> (s.spatial_decomposition_count-level);
4605 int stride= width << (s.spatial_decomposition_count-level);
4606 DWTELEM *buf= buffer[0];
4609 if(orientation&1) buf+=w;
4610 if(orientation>1) buf+=stride>>1;
4612 memset(buffer[0], 0, sizeof(int)*width*height);
4613 buf[w/2 + h/2*stride]= 256*256;
4614 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4615 for(y=0; y<height; y++){
4616 for(x=0; x<width; x++){
4617 int64_t d= buffer[0][x + y*width];
4619 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4621 if(ABS(height/2-y)<9 && level==2) printf("\n");
4623 error= (int)(sqrt(error)+0.5);
4624 errors[level][orientation]= error;
4625 if(g) g=ff_gcd(g, error);
4629 printf("static int const visual_weight[][4]={\n");
4630 for(level=0; level<s.spatial_decomposition_count; level++){
4632 for(orientation=0; orientation<4; orientation++){
4633 printf("%8lld,", errors[level][orientation]/g);
4641 int w= width >> (s.spatial_decomposition_count-level);
4642 int h= height >> (s.spatial_decomposition_count-level);
4643 int stride= width << (s.spatial_decomposition_count-level);
4644 DWTELEM *buf= buffer[0];
4650 memset(buffer[0], 0, sizeof(int)*width*height);
4652 for(y=0; y<height; y++){
4653 for(x=0; x<width; x++){
4654 int tab[4]={0,2,3,1};
4655 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4658 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4662 buf[x + y*stride ]=169;
4663 buf[x + y*stride-w]=64;
4666 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4668 for(y=0; y<height; y++){
4669 for(x=0; x<width; x++){
4670 int64_t d= buffer[0][x + y*width];
4672 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4674 if(ABS(height/2-y)<9) printf("\n");