2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 #include "rangecoder.h"
26 #include "mpegvideo.h"
31 static const int8_t quant3[256]={
32 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
33 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
49 static const int8_t quant3b[256]={
50 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
51 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 static const int8_t quant3bA[256]={
68 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
69 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 static const int8_t quant5[256]={
86 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
87 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
103 static const int8_t quant7[256]={
104 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
105 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
107 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
118 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
121 static const int8_t quant9[256]={
122 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
123 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
124 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
137 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
139 static const int8_t quant11[256]={
140 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
141 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
142 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
143 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
154 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
155 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
157 static const int8_t quant13[256]={
158 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
159 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
160 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
162 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
171 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
177 static const uint8_t obmc32[1024]={
178 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
180 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
181 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
182 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
183 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
184 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
185 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
186 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
187 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
188 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
189 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
190 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
191 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
192 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
193 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
196 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
197 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
198 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
199 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
200 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
201 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
202 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
203 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
204 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
205 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
206 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
207 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
208 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
209 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 static const uint8_t obmc16[256]={
213 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
214 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
215 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
216 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
217 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
218 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
219 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
220 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
223 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
224 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
225 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
226 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
227 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
228 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
232 static const uint8_t obmc32[1024]={
233 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
234 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
235 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
236 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
237 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
238 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
239 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
240 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
241 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
242 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
243 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
244 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
245 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
246 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
247 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
248 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
251 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
252 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
253 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
254 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
255 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
256 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
257 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
258 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
259 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
260 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
261 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
262 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
263 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
264 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
267 static const uint8_t obmc16[256]={
268 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
269 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
270 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
271 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
272 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
273 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
274 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
275 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
278 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
279 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
280 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
281 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
282 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
283 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
287 static const uint8_t obmc32[1024]={
288 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
289 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
291 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
292 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
293 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
294 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
295 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
296 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
297 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
298 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
299 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
300 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
301 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
302 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
303 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
306 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
307 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
308 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
309 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
310 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
311 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
312 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
313 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
314 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
315 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
316 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
317 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
318 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
322 static const uint8_t obmc16[256]={
323 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
324 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
325 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
326 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
327 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
328 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
329 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
330 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
333 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
334 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
335 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
336 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
337 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
338 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
344 static const uint8_t obmc8[64]={
345 4, 12, 20, 28, 28, 20, 12, 4,
346 12, 36, 60, 84, 84, 60, 36, 12,
347 20, 60,100,140,140,100, 60, 20,
348 28, 84,140,196,196,140, 84, 28,
349 28, 84,140,196,196,140, 84, 28,
350 20, 60,100,140,140,100, 60, 20,
351 12, 36, 60, 84, 84, 60, 36, 12,
352 4, 12, 20, 28, 28, 20, 12, 4,
357 static const uint8_t obmc4[16]={
365 static const uint8_t *obmc_tab[4]={
366 obmc32, obmc16, obmc8, obmc4
369 typedef struct BlockNode{
375 //#define TYPE_SPLIT 1
376 #define BLOCK_INTRA 1
378 //#define TYPE_NOCOLOR 4
379 uint8_t level; //FIXME merge into type?
382 static const BlockNode null_block= { //FIXME add border maybe
383 .color= {128,128,128},
391 #define LOG2_MB_SIZE 4
392 #define MB_SIZE (1<<LOG2_MB_SIZE)
394 typedef struct x_and_coeff{
399 typedef struct SubBand{
404 int qlog; ///< log(qscale)/log[2^(1/6)]
408 int stride_line; ///< Stride measured in lines, not pixels.
409 x_and_coeff * x_coeff;
410 struct SubBand *parent;
411 uint8_t state[/*7*2*/ 7 + 512][32];
414 typedef struct Plane{
417 SubBand band[MAX_DECOMPOSITIONS][4];
420 typedef struct SnowContext{
421 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
423 AVCodecContext *avctx;
427 AVFrame input_picture; ///< new_picture with the internal linesizes
428 AVFrame current_picture;
429 AVFrame last_picture[MAX_REF_FRAMES];
430 AVFrame mconly_picture;
431 // uint8_t q_context[16];
432 uint8_t header_state[32];
433 uint8_t block_state[128 + 32*128];
437 int spatial_decomposition_type;
438 int temporal_decomposition_type;
439 int spatial_decomposition_count;
440 int temporal_decomposition_count;
443 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
444 uint32_t *ref_scores[MAX_REF_FRAMES];
445 DWTELEM *spatial_dwt_buffer;
449 int spatial_scalability;
456 #define QBIAS_SHIFT 3
460 Plane plane[MAX_PLANES];
462 #define ME_CACHE_SIZE 1024
463 int me_cache[ME_CACHE_SIZE];
464 int me_cache_generation;
467 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
478 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
479 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
481 static void iterative_me(SnowContext *s);
483 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
487 buf->base_buffer = base_buffer;
488 buf->line_count = line_count;
489 buf->line_width = line_width;
490 buf->data_count = max_allocated_lines;
491 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
492 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
494 for (i = 0; i < max_allocated_lines; i++)
496 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
499 buf->data_stack_top = max_allocated_lines - 1;
502 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
507 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
509 assert(buf->data_stack_top >= 0);
510 // assert(!buf->line[line]);
512 return buf->line[line];
514 offset = buf->line_width * line;
515 buffer = buf->data_stack[buf->data_stack_top];
516 buf->data_stack_top--;
517 buf->line[line] = buffer;
519 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
524 static void slice_buffer_release(slice_buffer * buf, int line)
529 assert(line >= 0 && line < buf->line_count);
530 assert(buf->line[line]);
532 offset = buf->line_width * line;
533 buffer = buf->line[line];
534 buf->data_stack_top++;
535 buf->data_stack[buf->data_stack_top] = buffer;
536 buf->line[line] = NULL;
538 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
541 static void slice_buffer_flush(slice_buffer * buf)
544 for (i = 0; i < buf->line_count; i++)
548 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
549 slice_buffer_release(buf, i);
554 static void slice_buffer_destroy(slice_buffer * buf)
557 slice_buffer_flush(buf);
559 for (i = buf->data_count - 1; i >= 0; i--)
561 assert(buf->data_stack[i]);
562 av_freep(&buf->data_stack[i]);
564 assert(buf->data_stack);
565 av_freep(&buf->data_stack);
567 av_freep(&buf->line);
571 // Avoid a name clash on SGI IRIX
574 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
575 static uint8_t qexp[QROOT];
577 static inline int mirror(int v, int m){
578 while((unsigned)v > (unsigned)m){
585 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
590 const int e= av_log2(a);
592 const int el= FFMIN(e, 10);
593 put_rac(c, state+0, 0);
596 put_rac(c, state+1+i, 1); //1..10
599 put_rac(c, state+1+9, 1); //1..10
601 put_rac(c, state+1+FFMIN(i,9), 0);
603 for(i=e-1; i>=el; i--){
604 put_rac(c, state+22+9, (a>>i)&1); //22..31
607 put_rac(c, state+22+i, (a>>i)&1); //22..31
611 put_rac(c, state+11 + el, v < 0); //11..21
614 put_rac(c, state+0, 0);
617 put_rac(c, state+1+i, 1); //1..10
619 put_rac(c, state+1+i, 0);
621 for(i=e-1; i>=0; i--){
622 put_rac(c, state+22+i, (a>>i)&1); //22..31
626 put_rac(c, state+11 + e, v < 0); //11..21
629 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
631 put_rac(c, state+1+FFMIN(i,9), 0);
633 for(i=e-1; i>=0; i--){
634 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
638 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
642 put_rac(c, state+0, 1);
646 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
647 if(get_rac(c, state+0))
652 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
657 for(i=e-1; i>=0; i--){
658 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
661 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
668 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
670 int r= log2>=0 ? 1<<log2 : 1;
676 put_rac(c, state+4+log2, 1);
681 put_rac(c, state+4+log2, 0);
683 for(i=log2-1; i>=0; i--){
684 put_rac(c, state+31-i, (v>>i)&1);
688 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
690 int r= log2>=0 ? 1<<log2 : 1;
695 while(get_rac(c, state+4+log2)){
701 for(i=log2-1; i>=0; i--){
702 v+= get_rac(c, state+31-i)<<i;
708 static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
709 const int mirror_left= !highpass;
710 const int mirror_right= (width&1) ^ highpass;
711 const int w= (width>>1) - 1 + (highpass & width);
714 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
716 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
722 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
726 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
731 static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
732 const int mirror_left= !highpass;
733 const int mirror_right= (width&1) ^ highpass;
734 const int w= (width>>1) - 1 + (highpass & width);
741 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
747 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
750 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
754 int r= 3*2*ref[w*ref_step];
757 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
763 static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
764 const int mirror_left= !highpass;
765 const int mirror_right= (width&1) ^ highpass;
766 const int w= (width>>1) - 1 + (highpass & width);
770 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
772 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
778 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
782 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
788 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
791 for(x=start; x<width; x+=2){
795 int x2= x + 2*i - n + 1;
797 else if(x2>=width) x2= 2*width-x2-2;
798 sum += coeffs[i]*(int64_t)dst[x2];
800 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
801 else dst[x] += (sum + (1<<shift)/2)>>shift;
805 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
807 for(y=start; y<height; y+=2){
808 for(x=0; x<width; x++){
812 int y2= y + 2*i - n + 1;
814 else if(y2>=height) y2= 2*height-y2-2;
815 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
817 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
818 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
827 #if 0 // more accurate 9/7
830 #define COEFFS1 (int[]){-25987,-25987}
833 #define COEFFS2 (int[]){-27777,-27777}
836 #define COEFFS3 (int[]){28931,28931}
839 #define COEFFS4 (int[]){14533,14533}
843 #define COEFFS1 (int[]){1,-9,-9,1}
846 #define COEFFS2 (int[]){-1,5,5,-1}
859 #define COEFFS1 (int[]){1,1}
862 #define COEFFS2 (int[]){-1,-1}
875 #define COEFFS2 (int[]){-1,-1}
878 #define COEFFS3 (int[]){-1,-1}
881 #define COEFFS4 (int[]){-5,29,29,-5}
886 #define COEFFS1 (int[]){-203,-203}
889 #define COEFFS2 (int[]){-217,-217}
892 #define COEFFS3 (int[]){113,113}
895 #define COEFFS4 (int[]){227,227}
903 #define COEFFS2 (int[]){-1,-1}
906 #define COEFFS3 (int[]){-1,-1}
909 #define COEFFS4 (int[]){3,3}
913 #define COEFFS1 (int[]){1,-9,-9,1}
916 #define COEFFS2 (int[]){1,1}
926 #define COEFFS1 (int[]){1,-9,-9,1}
929 #define COEFFS2 (int[]){-1,5,5,-1}
937 static void horizontal_decomposeX(DWTELEM *b, int width){
939 const int width2= width>>1;
940 const int w2= (width+1)>>1;
943 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
944 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
945 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
946 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
948 for(x=0; x<width2; x++){
950 temp[x+w2]= b[2*x + 1];
954 memcpy(b, temp, width*sizeof(int));
957 static void horizontal_composeX(DWTELEM *b, int width){
959 const int width2= width>>1;
961 const int w2= (width+1)>>1;
963 memcpy(temp, b, width*sizeof(int));
964 for(x=0; x<width2; x++){
966 b[2*x + 1]= temp[x+w2];
971 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
972 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
973 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
974 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
977 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
980 for(y=0; y<height; y++){
981 for(x=0; x<width; x++){
982 buffer[y*stride + x] *= SCALEX;
986 for(y=0; y<height; y++){
987 horizontal_decomposeX(buffer + y*stride, width);
990 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
991 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
992 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
993 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
996 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
999 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1001 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1002 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1004 for(y=0; y<height; y++){
1005 horizontal_composeX(buffer + y*stride, width);
1008 for(y=0; y<height; y++){
1009 for(x=0; x<width; x++){
1010 buffer[y*stride + x] /= SCALEX;
1015 static void horizontal_decompose53i(DWTELEM *b, int width){
1016 DWTELEM temp[width];
1017 const int width2= width>>1;
1019 const int w2= (width+1)>>1;
1021 for(x=0; x<width2; x++){
1023 temp[x+w2]= b[2*x + 1];
1037 for(x=1; x+1<width2; x+=2){
1041 A2 += (A1 + A3 + 2)>>2;
1045 A1= temp[x+1+width2];
1048 A4 += (A1 + A3 + 2)>>2;
1054 A2 += (A1 + A3 + 2)>>2;
1059 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1060 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1064 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1067 for(i=0; i<width; i++){
1068 b1[i] -= (b0[i] + b2[i])>>1;
1072 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] += (b0[i] + b2[i] + 2)>>2;
1080 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1082 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1083 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1085 for(y=-2; y<height; y+=2){
1086 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1087 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1090 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1091 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1092 STOP_TIMER("horizontal_decompose53i")}
1095 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1096 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1097 STOP_TIMER("vertical_decompose53i*")}
1104 static void horizontal_decompose97i(DWTELEM *b, int width){
1105 DWTELEM temp[width];
1106 const int w2= (width+1)>>1;
1108 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1109 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1110 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1111 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1115 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1118 for(i=0; i<width; i++){
1119 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1123 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126 for(i=0; i<width; i++){
1128 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1130 int r= 3*(b0[i] + b2[i]);
1133 b1[i] += (r+W_CO)>>W_CS;
1138 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1141 for(i=0; i<width; i++){
1143 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1145 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1150 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1153 for(i=0; i<width; i++){
1154 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1158 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1160 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1161 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1162 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1163 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1165 for(y=-4; y<height; y+=2){
1166 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1167 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1170 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1171 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1173 STOP_TIMER("horizontal_decompose97i")
1177 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1178 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1179 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1180 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1183 STOP_TIMER("vertical_decompose97i")
1193 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1196 for(level=0; level<decomposition_count; level++){
1198 case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1199 case 1: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1200 case 2: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1205 static void horizontal_compose53i(DWTELEM *b, int width){
1206 DWTELEM temp[width];
1207 const int width2= width>>1;
1208 const int w2= (width+1)>>1;
1220 for(x=1; x+1<width2; x+=2){
1224 A2 += (A1 + A3 + 2)>>2;
1228 A1= temp[x+1+width2];
1231 A4 += (A1 + A3 + 2)>>2;
1237 A2 += (A1 + A3 + 2)>>2;
1241 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1242 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1244 for(x=0; x<width2; x++){
1246 b[2*x + 1]= temp[x+w2];
1252 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1255 for(i=0; i<width; i++){
1256 b1[i] += (b0[i] + b2[i])>>1;
1260 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263 for(i=0; i<width; i++){
1264 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1268 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1269 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1270 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1274 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1275 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1276 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1280 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1283 DWTELEM *b0= cs->b0;
1284 DWTELEM *b1= cs->b1;
1285 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1286 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1289 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1290 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1291 STOP_TIMER("vertical_compose53i*")}
1294 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1295 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1296 STOP_TIMER("horizontal_compose53i")}
1303 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1305 DWTELEM *b0= cs->b0;
1306 DWTELEM *b1= cs->b1;
1307 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1308 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1311 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1312 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1313 STOP_TIMER("vertical_compose53i*")}
1316 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1317 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1318 STOP_TIMER("horizontal_compose53i")}
1325 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1327 spatial_compose53i_init(&cs, buffer, height, stride);
1328 while(cs.y <= height)
1329 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1333 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1334 DWTELEM temp[width];
1335 const int w2= (width+1)>>1;
1337 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1338 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1339 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1340 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1343 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1346 for(i=0; i<width; i++){
1347 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1351 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354 for(i=0; i<width; i++){
1356 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1358 int r= 3*(b0[i] + b2[i]);
1361 b1[i] -= (r+W_CO)>>W_CS;
1366 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1369 for(i=0; i<width; i++){
1371 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1373 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1378 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1381 for(i=0; i<width; i++){
1382 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1386 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1389 for(i=0; i<width; i++){
1393 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1395 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1397 r= 3*(b2[i] + b4[i]);
1400 b3[i] -= (r+W_CO)>>W_CS;
1403 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1405 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1407 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1411 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1412 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1413 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1414 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1415 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1419 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1420 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1421 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1422 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1423 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1427 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1430 DWTELEM *b0= cs->b0;
1431 DWTELEM *b1= cs->b1;
1432 DWTELEM *b2= cs->b2;
1433 DWTELEM *b3= cs->b3;
1434 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1435 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1438 if(y>0 && y+4<height){
1439 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1441 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1442 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1443 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1444 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1447 STOP_TIMER("vertical_compose97i")}}
1450 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1451 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1452 if(width>400 && y+0<(unsigned)height){
1453 STOP_TIMER("horizontal_compose97i")}}
1462 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1464 DWTELEM *b0= cs->b0;
1465 DWTELEM *b1= cs->b1;
1466 DWTELEM *b2= cs->b2;
1467 DWTELEM *b3= cs->b3;
1468 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1469 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1472 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1473 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1474 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1475 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1477 STOP_TIMER("vertical_compose97i")}}
1480 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1481 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1482 if(width>400 && b0 <= b2){
1483 STOP_TIMER("horizontal_compose97i")}}
1492 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1494 spatial_compose97i_init(&cs, buffer, height, stride);
1495 while(cs.y <= height)
1496 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1499 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1501 for(level=decomposition_count-1; level>=0; level--){
1503 case 0: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1504 case 1: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1505 /* not slicified yet */
1506 case 2: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1507 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1512 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1514 for(level=decomposition_count-1; level>=0; level--){
1516 case 0: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1517 case 1: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1518 /* not slicified yet */
1519 case 2: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1524 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1525 const int support = type==1 ? 3 : 5;
1529 for(level=decomposition_count-1; level>=0; level--){
1530 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1532 case 0: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1534 case 1: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1542 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1543 const int support = type==1 ? 3 : 5;
1547 for(level=decomposition_count-1; level>=0; level--){
1548 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1550 case 0: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1552 case 1: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1560 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1563 for(level=decomposition_count-1; level>=0; level--)
1564 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1566 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1568 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1569 for(y=0; y<height; y+=4)
1570 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1574 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1575 const int w= b->width;
1576 const int h= b->height;
1588 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1589 v= src[x + y*stride];
1592 t= src[x + (y-1)*stride];
1594 lt= src[x - 1 + (y-1)*stride];
1597 rt= src[x + 1 + (y-1)*stride];
1601 l= src[x - 1 + y*stride];
1603 if(orientation==1) ll= src[y + (x-2)*stride];
1604 else ll= src[x - 2 + y*stride];
1610 if(px<b->parent->width && py<b->parent->height)
1611 p= parent[px + py*2*stride];
1613 if(!(/*ll|*/l|lt|t|rt|p)){
1615 runs[run_index++]= run;
1623 max_index= run_index;
1624 runs[run_index++]= run;
1626 run= runs[run_index++];
1628 put_symbol2(&s->c, b->state[30], max_index, 0);
1629 if(run_index <= max_index)
1630 put_symbol2(&s->c, b->state[1], run, 3);
1633 if(s->c.bytestream_end - s->c.bytestream < w*40){
1634 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1639 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1640 v= src[x + y*stride];
1643 t= src[x + (y-1)*stride];
1645 lt= src[x - 1 + (y-1)*stride];
1648 rt= src[x + 1 + (y-1)*stride];
1652 l= src[x - 1 + y*stride];
1654 if(orientation==1) ll= src[y + (x-2)*stride];
1655 else ll= src[x - 2 + y*stride];
1661 if(px<b->parent->width && py<b->parent->height)
1662 p= parent[px + py*2*stride];
1664 if(/*ll|*/l|lt|t|rt|p){
1665 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1667 put_rac(&s->c, &b->state[0][context], !!v);
1670 run= runs[run_index++];
1672 if(run_index <= max_index)
1673 put_symbol2(&s->c, b->state[1], run, 3);
1681 int context= av_log2(/*ABS(ll) + */3*ABS(l) + ABS(lt) + 2*ABS(t) + ABS(rt) + ABS(p));
1682 int l2= 2*ABS(l) + (l<0);
1683 int t2= 2*ABS(t) + (t<0);
1685 put_symbol2(&s->c, b->state[context + 2], ABS(v)-1, context-4);
1686 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1694 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1695 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1696 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1697 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1698 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1701 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1702 const int w= b->width;
1703 const int h= b->height;
1708 x_and_coeff *xc= b->x_coeff;
1709 x_and_coeff *prev_xc= NULL;
1710 x_and_coeff *prev2_xc= xc;
1711 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1712 x_and_coeff *prev_parent_xc= parent_xc;
1714 runs= get_symbol2(&s->c, b->state[30], 0);
1715 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1720 int lt=0, t=0, rt=0;
1722 if(y && prev_xc->x == 0){
1734 if(prev_xc->x == x + 1)
1740 if(x>>1 > parent_xc->x){
1743 if(x>>1 == parent_xc->x){
1744 p= parent_xc->coeff;
1747 if(/*ll|*/l|lt|t|rt|p){
1748 int context= av_log2(/*ABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1750 v=get_rac(&s->c, &b->state[0][context]);
1752 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1753 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1760 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1762 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1763 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1772 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1773 else max_run= FFMIN(run, w-x-1);
1775 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1781 (xc++)->x= w+1; //end marker
1787 while(parent_xc->x != parent->width+1)
1790 prev_parent_xc= parent_xc;
1792 parent_xc= prev_parent_xc;
1797 (xc++)->x= w+1; //end marker
1801 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1802 const int w= b->width;
1804 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1805 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1806 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1811 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1816 /* If we are on the second or later slice, restore our index. */
1818 new_index = save_state[0];
1821 for(y=start_y; y<h; y++){
1824 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1825 memset(line, 0, b->width*sizeof(DWTELEM));
1826 v = b->x_coeff[new_index].coeff;
1827 x = b->x_coeff[new_index++].x;
1830 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1831 register int u= -(v&1);
1832 line[x] = (t^u) - u;
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1838 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1839 STOP_TIMER("decode_subband")
1842 /* Save our variables for the next slice. */
1843 save_state[0] = new_index;
1848 static void reset_contexts(SnowContext *s){
1849 int plane_index, level, orientation;
1851 for(plane_index=0; plane_index<3; plane_index++){
1852 for(level=0; level<s->spatial_decomposition_count; level++){
1853 for(orientation=level ? 1:0; orientation<4; orientation++){
1854 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1858 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1859 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1862 static int alloc_blocks(SnowContext *s){
1863 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1864 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1869 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1873 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1874 uint8_t *bytestream= d->bytestream;
1875 uint8_t *bytestream_start= d->bytestream_start;
1877 d->bytestream= bytestream;
1878 d->bytestream_start= bytestream_start;
1881 //near copy & paste from dsputil, FIXME
1882 static int pix_sum(uint8_t * pix, int line_size, int w)
1887 for (i = 0; i < w; i++) {
1888 for (j = 0; j < w; j++) {
1892 pix += line_size - w;
1897 //near copy & paste from dsputil, FIXME
1898 static int pix_norm1(uint8_t * pix, int line_size, int w)
1901 uint32_t *sq = squareTbl + 256;
1904 for (i = 0; i < w; i++) {
1905 for (j = 0; j < w; j ++) {
1909 pix += line_size - w;
1914 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1915 const int w= s->b_width << s->block_max_depth;
1916 const int rem_depth= s->block_max_depth - level;
1917 const int index= (x + y*w) << rem_depth;
1918 const int block_w= 1<<rem_depth;
1931 for(j=0; j<block_w; j++){
1932 for(i=0; i<block_w; i++){
1933 s->block[index + i + j*w]= block;
1938 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1939 const int offset[3]= {
1941 ((y*c->uvstride + x)>>1),
1942 ((y*c->uvstride + x)>>1),
1946 c->src[0][i]= src [i];
1947 c->ref[0][i]= ref [i] + offset[i];
1955 #define P_TOPRIGHT P[3]
1956 #define P_MEDIAN P[4]
1958 #define FLAG_QPEL 1 //must be 1
1960 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1961 uint8_t p_buffer[1024];
1962 uint8_t i_buffer[1024];
1963 uint8_t p_state[sizeof(s->block_state)];
1964 uint8_t i_state[sizeof(s->block_state)];
1966 uint8_t *pbbak= s->c.bytestream;
1967 uint8_t *pbbak_start= s->c.bytestream_start;
1968 int score, score2, iscore, i_len, p_len, block_s, sum;
1969 const int w= s->b_width << s->block_max_depth;
1970 const int h= s->b_height << s->block_max_depth;
1971 const int rem_depth= s->block_max_depth - level;
1972 const int index= (x + y*w) << rem_depth;
1973 const int block_w= 1<<(LOG2_MB_SIZE - level);
1974 int trx= (x+1)<<rem_depth;
1975 int try= (y+1)<<rem_depth;
1976 BlockNode *left = x ? &s->block[index-1] : &null_block;
1977 BlockNode *top = y ? &s->block[index-w] : &null_block;
1978 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1979 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1980 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1981 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1982 int pl = left->color[0];
1983 int pcb= left->color[1];
1984 int pcr= left->color[2];
1985 int pmx= mid_pred(left->mx, top->mx, tr->mx);
1986 int pmy= mid_pred(left->my, top->my, tr->my);
1989 const int stride= s->current_picture.linesize[0];
1990 const int uvstride= s->current_picture.linesize[1];
1991 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1992 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1993 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1995 int16_t last_mv[3][2];
1996 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1997 const int shift= 1+qpel;
1998 MotionEstContext *c= &s->m.me;
1999 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2000 int mx_context= av_log2(2*ABS(left->mx - top->mx));
2001 int my_context= av_log2(2*ABS(left->my - top->my));
2002 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2003 int ref, best_ref, ref_score, ref_mx, ref_my;
2005 assert(sizeof(s->block_state) >= 256);
2007 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
2011 // clip predictors / edge ?
2013 P_LEFT[0]= left->mx;
2014 P_LEFT[1]= left->my;
2017 P_TOPRIGHT[0]= tr->mx;
2018 P_TOPRIGHT[1]= tr->my;
2020 last_mv[0][0]= s->block[index].mx;
2021 last_mv[0][1]= s->block[index].my;
2022 last_mv[1][0]= right->mx;
2023 last_mv[1][1]= right->my;
2024 last_mv[2][0]= bottom->mx;
2025 last_mv[2][1]= bottom->my;
2032 assert(s->m.me. stride == stride);
2033 assert(s->m.me.uvstride == uvstride);
2035 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2036 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2037 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2038 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2040 c->xmin = - x*block_w - 16+2;
2041 c->ymin = - y*block_w - 16+2;
2042 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2043 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2045 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2046 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2047 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2048 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2049 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2050 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2051 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2053 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2054 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2057 c->pred_x= P_LEFT[0];
2058 c->pred_y= P_LEFT[1];
2060 c->pred_x = P_MEDIAN[0];
2061 c->pred_y = P_MEDIAN[1];
2066 for(ref=0; ref<s->ref_frames; ref++){
2067 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2069 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2070 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2072 assert(ref_mx >= c->xmin);
2073 assert(ref_mx <= c->xmax);
2074 assert(ref_my >= c->ymin);
2075 assert(ref_my <= c->ymax);
2077 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2078 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2079 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2080 if(s->ref_mvs[ref]){
2081 s->ref_mvs[ref][index][0]= ref_mx;
2082 s->ref_mvs[ref][index][1]= ref_my;
2083 s->ref_scores[ref][index]= ref_score;
2085 if(score > ref_score){
2092 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2096 pc.bytestream_start=
2097 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2098 memcpy(p_state, s->block_state, sizeof(s->block_state));
2100 if(level!=s->block_max_depth)
2101 put_rac(&pc, &p_state[4 + s_context], 1);
2102 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2103 if(s->ref_frames > 1)
2104 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2105 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2106 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2107 p_len= pc.bytestream - pc.bytestream_start;
2108 score += (s->lambda2*(p_len*8
2109 + (pc.outstanding_count - s->c.outstanding_count)*8
2110 + (-av_log2(pc.range) + av_log2(s->c.range))
2111 ))>>FF_LAMBDA_SHIFT;
2113 block_s= block_w*block_w;
2114 sum = pix_sum(current_data[0], stride, block_w);
2115 l= (sum + block_s/2)/block_s;
2116 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2118 block_s= block_w*block_w>>2;
2119 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2120 cb= (sum + block_s/2)/block_s;
2121 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2122 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2123 cr= (sum + block_s/2)/block_s;
2124 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2127 ic.bytestream_start=
2128 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2129 memcpy(i_state, s->block_state, sizeof(s->block_state));
2130 if(level!=s->block_max_depth)
2131 put_rac(&ic, &i_state[4 + s_context], 1);
2132 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2133 put_symbol(&ic, &i_state[32], l-pl , 1);
2134 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2135 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2136 i_len= ic.bytestream - ic.bytestream_start;
2137 iscore += (s->lambda2*(i_len*8
2138 + (ic.outstanding_count - s->c.outstanding_count)*8
2139 + (-av_log2(ic.range) + av_log2(s->c.range))
2140 ))>>FF_LAMBDA_SHIFT;
2142 // assert(score==256*256*256*64-1);
2143 assert(iscore < 255*255*256 + s->lambda2*10);
2144 assert(iscore >= 0);
2145 assert(l>=0 && l<=255);
2146 assert(pl>=0 && pl<=255);
2149 int varc= iscore >> 8;
2150 int vard= score >> 8;
2151 if (vard <= 64 || vard < varc)
2152 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2154 c->scene_change_score+= s->m.qscale;
2157 if(level!=s->block_max_depth){
2158 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2159 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2160 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2161 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2162 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2163 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2165 if(score2 < score && score2 < iscore)
2170 memcpy(pbbak, i_buffer, i_len);
2172 s->c.bytestream_start= pbbak_start;
2173 s->c.bytestream= pbbak + i_len;
2174 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2175 memcpy(s->block_state, i_state, sizeof(s->block_state));
2178 memcpy(pbbak, p_buffer, p_len);
2180 s->c.bytestream_start= pbbak_start;
2181 s->c.bytestream= pbbak + p_len;
2182 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2183 memcpy(s->block_state, p_state, sizeof(s->block_state));
2188 static always_inline int same_block(BlockNode *a, BlockNode *b){
2189 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2190 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2192 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2196 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2197 const int w= s->b_width << s->block_max_depth;
2198 const int rem_depth= s->block_max_depth - level;
2199 const int index= (x + y*w) << rem_depth;
2200 int trx= (x+1)<<rem_depth;
2201 BlockNode *b= &s->block[index];
2202 BlockNode *left = x ? &s->block[index-1] : &null_block;
2203 BlockNode *top = y ? &s->block[index-w] : &null_block;
2204 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2205 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2206 int pl = left->color[0];
2207 int pcb= left->color[1];
2208 int pcr= left->color[2];
2209 int pmx= mid_pred(left->mx, top->mx, tr->mx);
2210 int pmy= mid_pred(left->my, top->my, tr->my);
2211 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2212 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 16*!!b->ref;
2213 int my_context= av_log2(2*ABS(left->my - top->my)) + 16*!!b->ref;
2214 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2217 set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, 0, BLOCK_INTRA);
2221 if(level!=s->block_max_depth){
2222 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2223 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2225 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2226 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2227 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2228 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2229 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2233 if(b->type & BLOCK_INTRA){
2234 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2235 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2236 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2237 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2238 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2240 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2241 if(s->ref_frames > 1)
2242 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2243 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2244 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2245 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2249 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2250 const int w= s->b_width << s->block_max_depth;
2251 const int rem_depth= s->block_max_depth - level;
2252 const int index= (x + y*w) << rem_depth;
2253 int trx= (x+1)<<rem_depth;
2254 BlockNode *left = x ? &s->block[index-1] : &null_block;
2255 BlockNode *top = y ? &s->block[index-w] : &null_block;
2256 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2257 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2258 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2261 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2265 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2267 int l = left->color[0];
2268 int cb= left->color[1];
2269 int cr= left->color[2];
2270 int mx= mid_pred(left->mx, top->mx, tr->mx);
2271 int my= mid_pred(left->my, top->my, tr->my);
2273 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2274 int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
2275 int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
2277 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2280 l += get_symbol(&s->c, &s->block_state[32], 1);
2281 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2282 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2284 if(s->ref_frames > 1)
2285 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2286 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2287 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2289 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2291 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2292 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2293 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2294 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2298 static void encode_blocks(SnowContext *s){
2303 if(s->avctx->me_method == ME_ITER && !s->keyframe)
2307 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2308 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2312 if(s->avctx->me_method == ME_ITER)
2313 encode_q_branch2(s, 0, x, y);
2315 encode_q_branch (s, 0, x, y);
2320 static void decode_blocks(SnowContext *s){
2327 decode_q_branch(s, 0, x, y);
2332 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2335 for(y=0; y < b_h+5; y++){
2336 for(x=0; x < b_w; x++){
2343 // int am= 9*(a1+a2) - (a0+a3);
2344 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2345 // int am= 18*(a2+a3) - 2*(a1+a4);
2346 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2347 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2349 // if(b_w==16) am= 8*(a1+a2);
2351 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2352 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2354 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2355 if(am&(~255)) am= ~(am>>31);
2359 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2360 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2361 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2362 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2367 tmp -= (b_h+5)*stride;
2369 for(y=0; y < b_h; y++){
2370 for(x=0; x < b_w; x++){
2371 int a0= tmp[x + 0*stride];
2372 int a1= tmp[x + 1*stride];
2373 int a2= tmp[x + 2*stride];
2374 int a3= tmp[x + 3*stride];
2375 int a4= tmp[x + 4*stride];
2376 int a5= tmp[x + 5*stride];
2377 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2378 // int am= 18*(a2+a3) - 2*(a1+a4);
2379 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2380 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2382 // if(b_w==16) am= 8*(a1+a2);
2384 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2385 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2387 if(am&(~255)) am= ~(am>>31);
2390 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2391 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2392 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2393 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2398 STOP_TIMER("mc_block")
2401 #define mca(dx,dy,b_w)\
2402 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2403 uint8_t tmp[stride*(b_w+5)];\
2405 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2417 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2418 if(block->type & BLOCK_INTRA){
2420 const int color = block->color[plane_index];
2421 const int color4= color*0x01010101;
2423 for(y=0; y < b_h; y++){
2424 *(uint32_t*)&dst[0 + y*stride]= color4;
2425 *(uint32_t*)&dst[4 + y*stride]= color4;
2426 *(uint32_t*)&dst[8 + y*stride]= color4;
2427 *(uint32_t*)&dst[12+ y*stride]= color4;
2428 *(uint32_t*)&dst[16+ y*stride]= color4;
2429 *(uint32_t*)&dst[20+ y*stride]= color4;
2430 *(uint32_t*)&dst[24+ y*stride]= color4;
2431 *(uint32_t*)&dst[28+ y*stride]= color4;
2434 for(y=0; y < b_h; y++){
2435 *(uint32_t*)&dst[0 + y*stride]= color4;
2436 *(uint32_t*)&dst[4 + y*stride]= color4;
2437 *(uint32_t*)&dst[8 + y*stride]= color4;
2438 *(uint32_t*)&dst[12+ y*stride]= color4;
2441 for(y=0; y < b_h; y++){
2442 *(uint32_t*)&dst[0 + y*stride]= color4;
2443 *(uint32_t*)&dst[4 + y*stride]= color4;
2446 for(y=0; y < b_h; y++){
2447 *(uint32_t*)&dst[0 + y*stride]= color4;
2450 for(y=0; y < b_h; y++){
2451 for(x=0; x < b_w; x++){
2452 dst[x + y*stride]= color;
2457 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2458 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2459 int mx= block->mx*scale;
2460 int my= block->my*scale;
2461 const int dx= mx&15;
2462 const int dy= my&15;
2463 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2466 src += sx + sy*stride;
2467 if( (unsigned)sx >= w - b_w - 4
2468 || (unsigned)sy >= h - b_h - 4){
2469 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2472 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2473 // assert(!(b_w&(b_w-1)));
2474 assert(b_w>1 && b_h>1);
2475 assert(tab_index>=0 && tab_index<4 || b_w==32);
2476 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2477 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2480 for(y=0; y<b_h; y+=16){
2481 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2482 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2485 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2486 else if(b_w==2*b_h){
2487 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2488 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2491 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2492 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2497 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2498 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2501 for(y=0; y<b_h; y++){
2502 //FIXME ugly missue of obmc_stride
2503 uint8_t *obmc1= obmc + y*obmc_stride;
2504 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2505 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2506 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2507 dst = slice_buffer_get_line(sb, src_y + y);
2508 for(x=0; x<b_w; x++){
2509 int v= obmc1[x] * block[3][x + y*src_stride]
2510 +obmc2[x] * block[2][x + y*src_stride]
2511 +obmc3[x] * block[1][x + y*src_stride]
2512 +obmc4[x] * block[0][x + y*src_stride];
2514 v <<= 8 - LOG2_OBMC_MAX;
2516 v += 1<<(7 - FRAC_BITS);
2517 v >>= 8 - FRAC_BITS;
2520 v += dst[x + src_x];
2521 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2522 if(v&(~255)) v= ~(v>>31);
2523 dst8[x + y*src_stride] = v;
2525 dst[x + src_x] -= v;
2531 //FIXME name clenup (b_w, block_w, b_width stuff)
2532 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
2533 DWTELEM * dst = NULL;
2534 const int b_width = s->b_width << s->block_max_depth;
2535 const int b_height= s->b_height << s->block_max_depth;
2536 const int b_stride= b_width;
2537 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2538 BlockNode *rt= lt+1;
2539 BlockNode *lb= lt+b_stride;
2540 BlockNode *rb= lb+1;
2542 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2543 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2550 }else if(b_x + 1 >= b_width){
2557 }else if(b_y + 1 >= b_height){
2562 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2566 }else if(src_x + b_w > w){
2570 obmc -= src_y*obmc_stride;
2573 }else if(src_y + b_h> h){
2577 if(b_w<=0 || b_h<=0) return;
2579 assert(src_stride > 2*MB_SIZE + 5);
2580 // old_dst += src_x + src_y*dst_stride;
2581 dst8+= src_x + src_y*src_stride;
2582 // src += src_x + src_y*src_stride;
2584 ptmp= tmp + 3*tmp_step;
2587 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2589 if(same_block(lt, rt)){
2594 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2597 if(same_block(lt, lb)){
2599 }else if(same_block(rt, lb)){
2604 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2607 if(same_block(lt, rb) ){
2609 }else if(same_block(rt, rb)){
2611 }else if(same_block(lb, rb)){
2615 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2618 for(y=0; y<b_h; y++){
2619 for(x=0; x<b_w; x++){
2620 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2621 if(add) dst[x + y*dst_stride] += v;
2622 else dst[x + y*dst_stride] -= v;
2625 for(y=0; y<b_h; y++){
2626 uint8_t *obmc2= obmc + (obmc_stride>>1);
2627 for(x=0; x<b_w; x++){
2628 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2629 if(add) dst[x + y*dst_stride] += v;
2630 else dst[x + y*dst_stride] -= v;
2633 for(y=0; y<b_h; y++){
2634 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2635 for(x=0; x<b_w; x++){
2636 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2637 if(add) dst[x + y*dst_stride] += v;
2638 else dst[x + y*dst_stride] -= v;
2641 for(y=0; y<b_h; y++){
2642 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2643 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2644 for(x=0; x<b_w; x++){
2645 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2646 if(add) dst[x + y*dst_stride] += v;
2647 else dst[x + y*dst_stride] -= v;
2655 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2656 STOP_TIMER("Inner add y block")
2661 //FIXME name clenup (b_w, block_w, b_width stuff)
2662 static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2663 const int b_width = s->b_width << s->block_max_depth;
2664 const int b_height= s->b_height << s->block_max_depth;
2665 const int b_stride= b_width;
2666 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2667 BlockNode *rt= lt+1;
2668 BlockNode *lb= lt+b_stride;
2669 BlockNode *rb= lb+1;
2671 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2672 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2679 }else if(b_x + 1 >= b_width){
2686 }else if(b_y + 1 >= b_height){
2691 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2697 }else if(src_x + b_w > w){
2701 obmc -= src_y*obmc_stride;
2704 dst -= src_y*dst_stride;
2706 }else if(src_y + b_h> h){
2710 if(b_w<=0 || b_h<=0) return;
2712 assert(src_stride > 2*MB_SIZE + 5);
2714 dst += src_x + src_y*dst_stride;
2715 dst8+= src_x + src_y*src_stride;
2716 // src += src_x + src_y*src_stride;
2718 ptmp= tmp + 3*tmp_step;
2721 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2723 if(same_block(lt, rt)){
2728 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2731 if(same_block(lt, lb)){
2733 }else if(same_block(rt, lb)){
2738 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2741 if(same_block(lt, rb) ){
2743 }else if(same_block(rt, rb)){
2745 }else if(same_block(lb, rb)){
2749 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2752 for(y=0; y<b_h; y++){
2753 for(x=0; x<b_w; x++){
2754 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2755 if(add) dst[x + y*dst_stride] += v;
2756 else dst[x + y*dst_stride] -= v;
2759 for(y=0; y<b_h; y++){
2760 uint8_t *obmc2= obmc + (obmc_stride>>1);
2761 for(x=0; x<b_w; x++){
2762 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2763 if(add) dst[x + y*dst_stride] += v;
2764 else dst[x + y*dst_stride] -= v;
2767 for(y=0; y<b_h; y++){
2768 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2769 for(x=0; x<b_w; x++){
2770 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2771 if(add) dst[x + y*dst_stride] += v;
2772 else dst[x + y*dst_stride] -= v;
2775 for(y=0; y<b_h; y++){
2776 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2777 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2778 for(x=0; x<b_w; x++){
2779 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2780 if(add) dst[x + y*dst_stride] += v;
2781 else dst[x + y*dst_stride] -= v;
2785 for(y=0; y<b_h; y++){
2786 //FIXME ugly missue of obmc_stride
2787 uint8_t *obmc1= obmc + y*obmc_stride;
2788 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2789 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2790 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2791 for(x=0; x<b_w; x++){
2792 int v= obmc1[x] * block[3][x + y*src_stride]
2793 +obmc2[x] * block[2][x + y*src_stride]
2794 +obmc3[x] * block[1][x + y*src_stride]
2795 +obmc4[x] * block[0][x + y*src_stride];
2797 v <<= 8 - LOG2_OBMC_MAX;
2799 v += 1<<(7 - FRAC_BITS);
2800 v >>= 8 - FRAC_BITS;
2803 v += dst[x + y*dst_stride];
2804 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2805 if(v&(~255)) v= ~(v>>31);
2806 dst8[x + y*src_stride] = v;
2808 dst[x + y*dst_stride] -= v;
2815 static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2816 Plane *p= &s->plane[plane_index];
2817 const int mb_w= s->b_width << s->block_max_depth;
2818 const int mb_h= s->b_height << s->block_max_depth;
2820 int block_size = MB_SIZE >> s->block_max_depth;
2821 int block_w = plane_index ? block_size/2 : block_size;
2822 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2823 int obmc_stride= plane_index ? block_size : 2*block_size;
2824 int ref_stride= s->current_picture.linesize[plane_index];
2825 uint8_t *dst8= s->current_picture.data[plane_index];
2830 if(s->keyframe || (s->avctx->debug&512)){
2835 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2837 // DWTELEM * line = slice_buffer_get_line(sb, y);
2838 DWTELEM * line = sb->line[y];
2841 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2842 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2844 if(v&(~255)) v= ~(v>>31);
2845 dst8[x + y*ref_stride]= v;
2849 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2851 // DWTELEM * line = slice_buffer_get_line(sb, y);
2852 DWTELEM * line = sb->line[y];
2855 line[x] -= 128 << FRAC_BITS;
2856 // buf[x + y*w]-= 128<<FRAC_BITS;
2864 for(mb_x=0; mb_x<=mb_w; mb_x++){
2867 add_yblock_buffered(s, sb, old_buffer, dst8, obmc,
2868 block_w*mb_x - block_w/2,
2869 block_w*mb_y - block_w/2,
2872 w, ref_stride, obmc_stride,
2876 STOP_TIMER("add_yblock")
2879 STOP_TIMER("predict_slice")
2882 static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2883 Plane *p= &s->plane[plane_index];
2884 const int mb_w= s->b_width << s->block_max_depth;
2885 const int mb_h= s->b_height << s->block_max_depth;
2887 int block_size = MB_SIZE >> s->block_max_depth;
2888 int block_w = plane_index ? block_size/2 : block_size;
2889 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2890 const int obmc_stride= plane_index ? block_size : 2*block_size;
2891 int ref_stride= s->current_picture.linesize[plane_index];
2892 uint8_t *dst8= s->current_picture.data[plane_index];
2897 if(s->keyframe || (s->avctx->debug&512)){
2902 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2904 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2906 if(v&(~255)) v= ~(v>>31);
2907 dst8[x + y*ref_stride]= v;
2911 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2913 buf[x + y*w]-= 128<<FRAC_BITS;
2921 for(mb_x=0; mb_x<=mb_w; mb_x++){
2924 add_yblock(s, buf, dst8, obmc,
2925 block_w*mb_x - block_w/2,
2926 block_w*mb_y - block_w/2,
2929 w, ref_stride, obmc_stride,
2931 add, 1, plane_index);
2933 STOP_TIMER("add_yblock")
2936 STOP_TIMER("predict_slice")
2939 static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2940 const int mb_h= s->b_height << s->block_max_depth;
2942 for(mb_y=0; mb_y<=mb_h; mb_y++)
2943 predict_slice(s, buf, plane_index, add, mb_y);
2946 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2948 Plane *p= &s->plane[plane_index];
2949 const int block_size = MB_SIZE >> s->block_max_depth;
2950 const int block_w = plane_index ? block_size/2 : block_size;
2951 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2952 const int obmc_stride= plane_index ? block_size : 2*block_size;
2953 const int ref_stride= s->current_picture.linesize[plane_index];
2954 uint8_t *src= s-> input_picture.data[plane_index];
2955 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2956 const int b_stride = s->b_width << s->block_max_depth;
2957 const int w= p->width;
2958 const int h= p->height;
2959 int index= mb_x + mb_y*b_stride;
2960 BlockNode *b= &s->block[index];
2961 BlockNode backup= *b;
2965 b->type|= BLOCK_INTRA;
2966 b->color[plane_index]= 0;
2967 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2970 int mb_x2= mb_x + (i &1) - 1;
2971 int mb_y2= mb_y + (i>>1) - 1;
2972 int x= block_w*mb_x2 + block_w/2;
2973 int y= block_w*mb_y2 + block_w/2;
2975 add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2976 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2978 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2979 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2980 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2981 int obmc_v= obmc[index];
2983 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2984 if(x<0) obmc_v += obmc[index + block_w];
2985 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2986 if(x+block_w>w) obmc_v += obmc[index - block_w];
2987 //FIXME precalc this or simplify it somehow else
2989 d = -dst[index] + (1<<(FRAC_BITS-1));
2991 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2992 aa += obmc_v * obmc_v; //FIXME precalclate this
2998 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
3001 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
3002 const int b_stride = s->b_width << s->block_max_depth;
3003 const int b_height = s->b_height<< s->block_max_depth;
3004 int index= x + y*b_stride;
3005 BlockNode *b = &s->block[index];
3006 BlockNode *left = x ? &s->block[index-1] : &null_block;
3007 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
3008 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
3009 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
3011 // int mx_context= av_log2(2*ABS(left->mx - top->mx));
3012 // int my_context= av_log2(2*ABS(left->my - top->my));
3014 if(x<0 || x>=b_stride || y>=b_height)
3016 dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
3017 dmy= b->my - mid_pred(left->my, top->my, tr->my);
3023 00001XXXX 15-30 8-15
3025 //FIXME try accurate rate
3026 //FIXME intra and inter predictors if surrounding blocks arent the same type
3027 if(b->type & BLOCK_INTRA){
3028 return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
3029 + av_log2(2*ABS(left->color[1] - b->color[1]))
3030 + av_log2(2*ABS(left->color[2] - b->color[2])));
3032 return 2*(1 + av_log2(2*ABS(dmx)) //FIXME kill the 2* can be merged in lambda
3033 + av_log2(2*ABS(dmy))
3034 + av_log2(2*b->ref));
3037 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
3038 Plane *p= &s->plane[plane_index];
3039 const int block_size = MB_SIZE >> s->block_max_depth;
3040 const int block_w = plane_index ? block_size/2 : block_size;
3041 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3042 const int obmc_stride= plane_index ? block_size : 2*block_size;
3043 const int ref_stride= s->current_picture.linesize[plane_index];
3044 uint8_t *dst= s->current_picture.data[plane_index];
3045 uint8_t *src= s-> input_picture.data[plane_index];
3046 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
3047 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
3048 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
3049 const int b_stride = s->b_width << s->block_max_depth;
3050 const int b_height = s->b_height<< s->block_max_depth;
3051 const int w= p->width;
3052 const int h= p->height;
3055 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3056 int sx= block_w*mb_x - block_w/2;
3057 int sy= block_w*mb_y - block_w/2;
3058 int x0= FFMAX(0,-sx);
3059 int y0= FFMAX(0,-sy);
3060 int x1= FFMIN(block_w*2, w-sx);
3061 int y1= FFMIN(block_w*2, h-sy);
3064 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
3066 for(y=y0; y<y1; y++){
3067 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
3068 const DWTELEM *pred1 = pred + y*obmc_stride;
3069 uint8_t *cur1 = cur + y*ref_stride;
3070 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
3071 for(x=x0; x<x1; x++){
3072 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
3073 v = (v + pred1[x]) >> FRAC_BITS;
3074 if(v&(~255)) v= ~(v>>31);
3079 /* copy the regions where obmc[] = (uint8_t)256 */
3080 if(LOG2_OBMC_MAX == 8
3081 && (mb_x == 0 || mb_x == b_stride-1)
3082 && (mb_y == 0 || mb_y == b_height-1)){
3091 for(y=y0; y<y1; y++)
3092 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3095 //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
3099 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3100 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3104 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3113 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3115 if(mb_x == b_stride-2)
3116 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3118 return distortion + rate*penalty_factor;
3121 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3123 Plane *p= &s->plane[plane_index];
3124 const int block_size = MB_SIZE >> s->block_max_depth;
3125 const int block_w = plane_index ? block_size/2 : block_size;
3126 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3127 const int obmc_stride= plane_index ? block_size : 2*block_size;
3128 const int ref_stride= s->current_picture.linesize[plane_index];
3129 uint8_t *dst= s->current_picture.data[plane_index];
3130 uint8_t *src= s-> input_picture.data[plane_index];
3131 const static DWTELEM zero_dst[4096]; //FIXME
3132 const int b_stride = s->b_width << s->block_max_depth;
3133 const int b_height = s->b_height<< s->block_max_depth;
3134 const int w= p->width;
3135 const int h= p->height;
3138 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3141 int mb_x2= mb_x + (i%3) - 1;
3142 int mb_y2= mb_y + (i/3) - 1;
3143 int x= block_w*mb_x2 + block_w/2;
3144 int y= block_w*mb_y2 + block_w/2;
3146 add_yblock(s, zero_dst, dst, obmc,
3147 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3149 //FIXME find a cleaner/simpler way to skip the outside stuff
3150 for(y2= y; y2<0; y2++)
3151 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3152 for(y2= h; y2<y+block_w; y2++)
3153 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3155 for(y2= y; y2<y+block_w; y2++)
3156 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3159 for(y2= y; y2<y+block_w; y2++)
3160 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3163 assert(block_w== 8 || block_w==16);
3164 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3168 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3169 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3177 rate = get_block_bits(s, mb_x, mb_y, 2);
3178 for(i=merged?4:0; i<9; i++){
3179 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3180 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3183 return distortion + rate*penalty_factor;
3186 static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3187 const int b_stride= s->b_width << s->block_max_depth;
3188 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3189 BlockNode backup= *block;
3190 int rd, index, value;
3192 assert(mb_x>=0 && mb_y>=0);
3193 assert(mb_x<b_stride);
3196 block->color[0] = p[0];
3197 block->color[1] = p[1];
3198 block->color[2] = p[2];
3199 block->type |= BLOCK_INTRA;
3201 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3202 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3203 if(s->me_cache[index] == value)
3205 s->me_cache[index]= value;
3209 block->type &= ~BLOCK_INTRA;
3212 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3224 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3225 static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3226 int p[2] = {p0, p1};
3227 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3230 static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3231 const int b_stride= s->b_width << s->block_max_depth;
3232 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3233 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3234 int rd, index, value;
3236 assert(mb_x>=0 && mb_y>=0);
3237 assert(mb_x<b_stride);
3238 assert(((mb_x|mb_y)&1) == 0);
3240 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3241 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3242 if(s->me_cache[index] == value)
3244 s->me_cache[index]= value;
3249 block->type &= ~BLOCK_INTRA;
3250 block[1]= block[b_stride]= block[b_stride+1]= *block;
3252 rd= get_4block_rd(s, mb_x, mb_y, 0);
3259 block[0]= backup[0];
3260 block[1]= backup[1];
3261 block[b_stride]= backup[2];
3262 block[b_stride+1]= backup[3];
3267 static void iterative_me(SnowContext *s){
3268 int pass, mb_x, mb_y;
3269 const int b_width = s->b_width << s->block_max_depth;
3270 const int b_height= s->b_height << s->block_max_depth;
3271 const int b_stride= b_width;
3275 RangeCoder r = s->c;
3276 uint8_t state[sizeof(s->block_state)];
3277 memcpy(state, s->block_state, sizeof(s->block_state));
3278 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3279 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3280 encode_q_branch(s, 0, mb_x, mb_y);
3282 memcpy(s->block_state, state, sizeof(s->block_state));
3285 for(pass=0; pass<50; pass++){
3288 for(mb_y= 0; mb_y<b_height; mb_y++){
3289 for(mb_x= 0; mb_x<b_width; mb_x++){
3290 int dia_change, i, j, ref;
3291 int best_rd= INT_MAX, ref_rd;
3292 BlockNode backup, ref_b;
3293 const int index= mb_x + mb_y * b_stride;
3294 BlockNode *block= &s->block[index];
3295 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : &null_block;
3296 BlockNode *lb = mb_x ? &s->block[index -1] : &null_block;
3297 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : &null_block;
3298 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : &null_block;
3299 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : &null_block;
3300 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : &null_block;
3301 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
3302 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
3303 const int b_w= (MB_SIZE >> s->block_max_depth);
3304 uint8_t obmc_edged[b_w*2][b_w*2];
3306 if(pass && (block->type & BLOCK_OPT))
3308 block->type |= BLOCK_OPT;
3312 if(!s->me_cache_generation)
3313 memset(s->me_cache, 0, sizeof(s->me_cache));
3314 s->me_cache_generation += 1<<22;
3319 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3321 for(y=0; y<b_w*2; y++)
3322 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3323 if(mb_x==b_stride-1)
3324 for(y=0; y<b_w*2; y++)
3325 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3327 for(x=0; x<b_w*2; x++)
3328 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3329 for(y=1; y<b_w; y++)
3330 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3332 if(mb_y==b_height-1){
3333 for(x=0; x<b_w*2; x++)
3334 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3335 for(y=b_w; y<b_w*2-1; y++)
3336 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3340 //skip stuff outside the picture
3341 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3343 uint8_t *src= s-> input_picture.data[0];
3344 uint8_t *dst= s->current_picture.data[0];
3345 const int stride= s->current_picture.linesize[0];
3346 const int block_w= MB_SIZE >> s->block_max_depth;
3347 const int sx= block_w*mb_x - block_w/2;
3348 const int sy= block_w*mb_y - block_w/2;
3349 const int w= s->plane[0].width;
3350 const int h= s->plane[0].height;
3354 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3355 for(y=h; y<sy+block_w*2; y++)
3356 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3358 for(y=sy; y<sy+block_w*2; y++)
3359 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3361 if(sx+block_w*2 > w){
3362 for(y=sy; y<sy+block_w*2; y++)
3363 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3367 // intra(black) = neighbors' contribution to the current block
3369 color[i]= get_dc(s, mb_x, mb_y, i);
3371 // get previous score (cant be cached due to OBMC)
3372 if(pass > 0 && (block->type&BLOCK_INTRA)){
3373 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3374 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3376 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3380 for(ref=0; ref < s->ref_frames; ref++){
3381 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3382 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3387 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3388 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3390 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3392 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3394 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3396 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3399 //FIXME avoid subpel interpol / round to nearest integer
3402 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3404 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3405 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3406 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3407 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3413 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3416 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3418 //FIXME or try the standard 2 pass qpel or similar
3420 mvr[0][0]= block->mx;
3421 mvr[0][1]= block->my;
3422 if(ref_rd > best_rd){
3430 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3431 //FIXME RD style color selection
3433 if(!same_block(block, &backup)){
3434 if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
3435 if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
3436 if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
3437 if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
3438 if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
3439 if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
3440 if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
3441 if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
3446 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3451 if(s->block_max_depth == 1){
3453 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3454 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3455 int dia_change, i, j;
3456 int best_rd, init_rd;
3457 const int index= mb_x + mb_y * b_stride;
3460 b[0]= &s->block[index];
3462 b[2]= b[0]+b_stride;
3464 if(same_block(b[0], b[1]) &&
3465 same_block(b[0], b[2]) &&
3466 same_block(b[0], b[3]))
3469 if(!s->me_cache_generation)
3470 memset(s->me_cache, 0, sizeof(s->me_cache));
3471 s->me_cache_generation += 1<<22;
3473 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3475 //FIXME more multiref search?
3476 check_4block_inter(s, mb_x, mb_y,
3477 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3478 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3481 if(!(b[i]->type&BLOCK_INTRA))
3482 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3484 if(init_rd != best_rd)
3488 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3492 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3493 const int level= b->level;
3494 const int w= b->width;
3495 const int h= b->height;
3496 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3497 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3498 int x,y, thres1, thres2;
3501 if(s->qlog == LOSSLESS_QLOG) return;
3503 bias= bias ? 0 : (3*qmul)>>3;
3504 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3510 int i= src[x + y*stride];
3512 if((unsigned)(i+thres1) > thres2){
3515 i/= qmul; //FIXME optimize
3516 src[x + y*stride]= i;
3520 i/= qmul; //FIXME optimize
3521 src[x + y*stride]= -i;
3524 src[x + y*stride]= 0;
3530 int i= src[x + y*stride];
3532 if((unsigned)(i+thres1) > thres2){
3535 i= (i + bias) / qmul; //FIXME optimize
3536 src[x + y*stride]= i;
3540 i= (i + bias) / qmul; //FIXME optimize
3541 src[x + y*stride]= -i;
3544 src[x + y*stride]= 0;
3548 if(level+1 == s->spatial_decomposition_count){
3549 // STOP_TIMER("quantize")
3553 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3554 const int w= b->width;
3555 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3556 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3557 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3561 if(s->qlog == LOSSLESS_QLOG) return;
3563 for(y=start_y; y<end_y; y++){
3564 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3565 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3569 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3571 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3575 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3576 STOP_TIMER("dquant")
3580 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3581 const int w= b->width;
3582 const int h= b->height;
3583 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3584 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3585 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3589 if(s->qlog == LOSSLESS_QLOG) return;
3593 int i= src[x + y*stride];
3595 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3597 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3601 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3602 STOP_TIMER("dquant")
3606 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3607 const int w= b->width;
3608 const int h= b->height;
3611 for(y=h-1; y>=0; y--){
3612 for(x=w-1; x>=0; x--){
3613 int i= x + y*stride;
3617 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3618 else src[i] -= src[i - 1];
3620 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3621 else src[i] -= src[i - 1];
3624 if(y) src[i] -= src[i - stride];
3630 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3631 const int w= b->width;
3640 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3642 for(y=start_y; y<end_y; y++){
3644 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3645 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3649 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3650 else line[x] += line[x - 1];
3652 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3653 else line[x] += line[x - 1];
3656 if(y) line[x] += prev[x];
3661 // STOP_TIMER("correlate")
3664 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3665 const int w= b->width;
3666 const int h= b->height;
3671 int i= x + y*stride;
3675 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3676 else src[i] += src[i - 1];
3678 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3679 else src[i] += src[i - 1];
3682 if(y) src[i] += src[i - stride];
3688 static void encode_header(SnowContext *s){
3689 int plane_index, level, orientation;
3692 memset(kstate, MID_STATE, sizeof(kstate));
3694 put_rac(&s->c, kstate, s->keyframe);
3695 if(s->keyframe || s->always_reset)
3698 put_symbol(&s->c, s->header_state, s->version, 0);
3699 put_rac(&s->c, s->header_state, s->always_reset);
3700 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3701 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3702 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3703 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3704 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3705 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3706 put_rac(&s->c, s->header_state, s->spatial_scalability);
3707 // put_rac(&s->c, s->header_state, s->rate_scalability);
3708 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3710 for(plane_index=0; plane_index<2; plane_index++){
3711 for(level=0; level<s->spatial_decomposition_count; level++){
3712 for(orientation=level ? 1:0; orientation<4; orientation++){
3713 if(orientation==2) continue;
3714 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3719 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
3720 put_symbol(&s->c, s->header_state, s->qlog, 1);
3721 put_symbol(&s->c, s->header_state, s->mv_scale, 0);
3722 put_symbol(&s->c, s->header_state, s->qbias, 1);
3723 put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
3726 static int decode_header(SnowContext *s){
3727 int plane_index, level, orientation;
3730 memset(kstate, MID_STATE, sizeof(kstate));
3732 s->keyframe= get_rac(&s->c, kstate);
3733 if(s->keyframe || s->always_reset)
3736 s->version= get_symbol(&s->c, s->header_state, 0);
3738 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3741 s->always_reset= get_rac(&s->c, s->header_state);
3742 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3743 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3744 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3745 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3746 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3747 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3748 s->spatial_scalability= get_rac(&s->c, s->header_state);
3749 // s->rate_scalability= get_rac(&s->c, s->header_state);
3750 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3752 for(plane_index=0; plane_index<3; plane_index++){
3753 for(level=0; level<s->spatial_decomposition_count; level++){
3754 for(orientation=level ? 1:0; orientation<4; orientation++){
3756 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3757 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3758 else q= get_symbol(&s->c, s->header_state, 1);
3759 s->plane[plane_index].band[level][orientation].qlog= q;
3765 s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3766 if(s->spatial_decomposition_type > 2){
3767 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3771 s->qlog= get_symbol(&s->c, s->header_state, 1);
3772 s->mv_scale= get_symbol(&s->c, s->header_state, 0);
3773 s->qbias= get_symbol(&s->c, s->header_state, 1);
3774 s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
3775 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3776 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3777 s->block_max_depth= 0;
3784 static void init_qexp(void){
3788 for(i=0; i<QROOT; i++){
3790 v *= pow(2, 1.0 / QROOT);
3794 static int common_init(AVCodecContext *avctx){
3795 SnowContext *s = avctx->priv_data;
3797 int level, orientation, plane_index, dec;
3801 dsputil_init(&s->dsp, avctx);
3804 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3805 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3806 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3807 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3808 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3809 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3828 #define mcfh(dx,dy)\
3829 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3830 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3831 mc_block_hpel ## dx ## dy ## 16;\
3832 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3833 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3834 mc_block_hpel ## dx ## dy ## 8;
3844 dec= s->spatial_decomposition_count= 5;
3845 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3847 s->chroma_h_shift= 1; //FIXME XXX
3848 s->chroma_v_shift= 1;
3850 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3852 width= s->avctx->width;
3853 height= s->avctx->height;
3855 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3857 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3858 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3860 for(plane_index=0; plane_index<3; plane_index++){
3861 int w= s->avctx->width;
3862 int h= s->avctx->height;
3865 w>>= s->chroma_h_shift;
3866 h>>= s->chroma_v_shift;
3868 s->plane[plane_index].width = w;
3869 s->plane[plane_index].height= h;
3870 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3871 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3872 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3873 SubBand *b= &s->plane[plane_index].band[level][orientation];
3875 b->buf= s->spatial_dwt_buffer;
3877 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3878 b->width = (w + !(orientation&1))>>1;
3879 b->height= (h + !(orientation>1))>>1;
3881 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3882 b->buf_x_offset = 0;
3883 b->buf_y_offset = 0;
3887 b->buf_x_offset = (w+1)>>1;
3890 b->buf += b->stride>>1;
3891 b->buf_y_offset = b->stride_line >> 1;
3895 b->parent= &s->plane[plane_index].band[level-1][orientation];
3896 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3905 width= s->width= avctx->width;
3906 height= s->height= avctx->height;
3908 assert(width && height);
3910 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3915 static void ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3917 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3918 * FIXME we know exact mv bits at this point,
3919 * but ratecontrol isn't set up to include them. */
3920 uint32_t coef_sum= 0;
3921 int level, orientation;
3923 for(level=0; level<s->spatial_decomposition_count; level++){
3924 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3925 SubBand *b= &s->plane[0].band[level][orientation];
3926 DWTELEM *buf= b->buf;
3927 const int w= b->width;
3928 const int h= b->height;
3929 const int stride= b->stride;
3930 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3931 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3932 const int qdiv= (1<<16)/qmul;
3935 decorrelate(s, b, buf, stride, 1, 0);
3938 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3940 correlate(s, b, buf, stride, 1, 0);
3944 /* ugly, ratecontrol just takes a sqrt again */
3945 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3946 assert(coef_sum < INT_MAX);
3948 if(pict->pict_type == I_TYPE){
3949 s->m.current_picture.mb_var_sum= coef_sum;
3950 s->m.current_picture.mc_mb_var_sum= 0;
3952 s->m.current_picture.mc_mb_var_sum= coef_sum;
3953 s->m.current_picture.mb_var_sum= 0;
3956 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3957 s->lambda= pict->quality * 3/2;
3960 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3961 int width = p->width;
3962 int height= p->height;
3963 int level, orientation, x, y;
3965 for(level=0; level<s->spatial_decomposition_count; level++){
3966 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3967 SubBand *b= &p->band[level][orientation];
3968 DWTELEM *buf= b->buf;
3971 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3972 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3973 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3974 for(y=0; y<height; y++){
3975 for(x=0; x<width; x++){
3976 int64_t d= s->spatial_dwt_buffer[x + y*width];
3981 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3982 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3987 static int encode_init(AVCodecContext *avctx)
3989 SnowContext *s = avctx->priv_data;
3992 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3993 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3994 "use vstrict=-2 / -strict -2 to use it anyway\n");
4004 s->m.flags = avctx->flags;
4005 s->m.bit_rate= avctx->bit_rate;
4007 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
4008 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4009 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
4010 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
4011 h263_encode_init(&s->m); //mv_penalty
4013 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
4015 if(avctx->flags&CODEC_FLAG_PASS1){
4016 if(!avctx->stats_out)
4017 avctx->stats_out = av_mallocz(256);
4019 if(!(avctx->flags&CODEC_FLAG_QSCALE)){
4020 if(ff_rate_control_init(&s->m) < 0)
4023 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
4025 for(plane_index=0; plane_index<3; plane_index++){
4026 calculate_vissual_weight(s, &s->plane[plane_index]);
4030 avctx->coded_frame= &s->current_picture;
4031 switch(avctx->pix_fmt){
4032 // case PIX_FMT_YUV444P:
4033 // case PIX_FMT_YUV422P:
4034 case PIX_FMT_YUV420P:
4036 // case PIX_FMT_YUV411P:
4037 // case PIX_FMT_YUV410P:
4038 s->colorspace_type= 0;
4040 /* case PIX_FMT_RGBA32:
4044 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4047 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4048 s->chroma_h_shift= 1;
4049 s->chroma_v_shift= 1;
4051 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4052 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4054 s->avctx->get_buffer(s->avctx, &s->input_picture);
4056 if(s->avctx->me_method == ME_ITER){
4058 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4059 for(i=0; i<s->max_ref_frames; i++){
4060 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4061 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4068 static int frame_start(SnowContext *s){
4070 int w= s->avctx->width; //FIXME round up to x16 ?
4071 int h= s->avctx->height;
4073 if(s->current_picture.data[0]){
4074 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4075 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4076 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4079 tmp= s->last_picture[s->max_ref_frames-1];
4080 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4081 s->last_picture[0]= s->current_picture;
4082 s->current_picture= tmp;
4088 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4089 if(i && s->last_picture[i-1].key_frame)
4094 s->current_picture.reference= 1;
4095 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4096 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4100 s->current_picture.key_frame= s->keyframe;
4105 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4106 SnowContext *s = avctx->priv_data;
4107 RangeCoder * const c= &s->c;
4108 AVFrame *pict = data;
4109 const int width= s->avctx->width;
4110 const int height= s->avctx->height;
4111 int level, orientation, plane_index, i, y;
4113 ff_init_range_encoder(c, buf, buf_size);
4114 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4118 for(y=0; y<(height>>shift); y++)
4119 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4120 &pict->data[i][y * pict->linesize[i]],
4123 s->new_picture = *pict;
4125 s->m.picture_number= avctx->frame_number;
4126 if(avctx->flags&CODEC_FLAG_PASS2){
4128 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4129 s->keyframe= pict->pict_type==FF_I_TYPE;
4130 if(!(avctx->flags&CODEC_FLAG_QSCALE))
4131 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4133 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4135 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4138 if(s->pass1_rc && avctx->frame_number == 0)
4139 pict->quality= 2*FF_QP2LAMBDA;
4141 s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
4143 s->qlog += 61*QROOT/8;
4144 s->lambda = pict->quality * 3/2;
4146 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4147 s->qlog= LOSSLESS_QLOG;
4149 }//else keep previous frame's qlog until after motion est
4153 s->m.current_picture_ptr= &s->m.current_picture;
4154 if(pict->pict_type == P_TYPE){
4155 int block_width = (width +15)>>4;
4156 int block_height= (height+15)>>4;
4157 int stride= s->current_picture.linesize[0];
4159 assert(s->current_picture.data[0]);
4160 assert(s->last_picture[0].data[0]);
4162 s->m.avctx= s->avctx;
4163 s->m.current_picture.data[0]= s->current_picture.data[0];
4164 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4165 s->m. new_picture.data[0]= s-> input_picture.data[0];
4166 s->m. last_picture_ptr= &s->m. last_picture;
4168 s->m. last_picture.linesize[0]=
4169 s->m. new_picture.linesize[0]=
4170 s->m.current_picture.linesize[0]= stride;
4171 s->m.uvlinesize= s->current_picture.linesize[1];
4173 s->m.height= height;
4174 s->m.mb_width = block_width;
4175 s->m.mb_height= block_height;
4176 s->m.mb_stride= s->m.mb_width+1;
4177 s->m.b8_stride= 2*s->m.mb_width+1;
4179 s->m.pict_type= pict->pict_type;
4180 s->m.me_method= s->avctx->me_method;
4181 s->m.me.scene_change_score=0;
4182 s->m.flags= s->avctx->flags;
4183 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4184 s->m.out_format= FMT_H263;
4185 s->m.unrestricted_mv= 1;
4187 s->m.lambda = s->lambda;
4188 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4189 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4191 s->m.dsp= s->dsp; //move
4198 s->m.pict_type = pict->pict_type;
4199 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4202 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4204 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4206 for(plane_index=0; plane_index<3; plane_index++){
4207 Plane *p= &s->plane[plane_index];
4211 // int bits= put_bits_count(&s->c.pb);
4214 if(pict->data[plane_index]) //FIXME gray hack
4217 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4220 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4223 && pict->pict_type == P_TYPE
4224 && !(avctx->flags&CODEC_FLAG_PASS2)
4225 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4226 ff_init_range_encoder(c, buf, buf_size);
4227 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4228 pict->pict_type= FF_I_TYPE;
4230 s->current_picture.key_frame=1;
4235 if(s->qlog == LOSSLESS_QLOG){
4238 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4243 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4245 if(s->pass1_rc && plane_index==0)
4246 ratecontrol_1pass(s, pict);
4248 for(level=0; level<s->spatial_decomposition_count; level++){
4249 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4250 SubBand *b= &p->band[level][orientation];
4252 quantize(s, b, b->buf, b->stride, s->qbias);
4254 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4255 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4256 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4258 correlate(s, b, b->buf, b->stride, 1, 0);
4261 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4263 for(level=0; level<s->spatial_decomposition_count; level++){
4264 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4265 SubBand *b= &p->band[level][orientation];
4267 dequantize(s, b, b->buf, b->stride);
4271 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4272 if(s->qlog == LOSSLESS_QLOG){
4275 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4280 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4281 STOP_TIMER("pred-conv")}
4282 if(s->avctx->flags&CODEC_FLAG_PSNR){
4285 if(pict->data[plane_index]) //FIXME gray hack
4288 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4292 s->avctx->error[plane_index] += error;
4293 s->current_picture.error[plane_index] = error;
4297 if(s->last_picture[s->max_ref_frames-1].data[0])
4298 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4300 s->current_picture.coded_picture_number = avctx->frame_number;
4301 s->current_picture.pict_type = pict->pict_type;
4302 s->current_picture.quality = pict->quality;
4303 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4304 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4305 s->m.current_picture.display_picture_number =
4306 s->m.current_picture.coded_picture_number = avctx->frame_number;
4307 s->m.current_picture.quality = pict->quality;
4308 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4310 ff_rate_estimate_qscale(&s->m, 0);
4311 if(avctx->flags&CODEC_FLAG_PASS1)
4312 ff_write_pass1_stats(&s->m);
4313 s->m.last_pict_type = s->m.pict_type;
4317 return ff_rac_terminate(c);
4320 static void common_end(SnowContext *s){
4321 int plane_index, level, orientation, i;
4323 av_freep(&s->spatial_dwt_buffer);
4325 av_freep(&s->m.me.scratchpad);
4326 av_freep(&s->m.me.map);
4327 av_freep(&s->m.me.score_map);
4328 av_freep(&s->m.obmc_scratchpad);
4330 av_freep(&s->block);
4332 for(i=0; i<MAX_REF_FRAMES; i++){
4333 av_freep(&s->ref_mvs[i]);
4334 av_freep(&s->ref_scores[i]);
4335 if(s->last_picture[i].data[0])
4336 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4339 for(plane_index=0; plane_index<3; plane_index++){
4340 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4341 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4342 SubBand *b= &s->plane[plane_index].band[level][orientation];
4344 av_freep(&b->x_coeff);
4350 static int encode_end(AVCodecContext *avctx)
4352 SnowContext *s = avctx->priv_data;
4355 av_free(avctx->stats_out);
4360 static int decode_init(AVCodecContext *avctx)
4362 SnowContext *s = avctx->priv_data;
4365 avctx->pix_fmt= PIX_FMT_YUV420P;
4369 block_size = MB_SIZE >> s->block_max_depth;
4370 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4375 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4376 SnowContext *s = avctx->priv_data;
4377 RangeCoder * const c= &s->c;
4379 AVFrame *picture = data;
4380 int level, orientation, plane_index;
4382 ff_init_range_decoder(c, buf, buf_size);
4383 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4385 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4387 if(!s->block) alloc_blocks(s);
4390 //keyframe flag dupliaction mess FIXME
4391 if(avctx->debug&FF_DEBUG_PICT_INFO)
4392 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4396 for(plane_index=0; plane_index<3; plane_index++){
4397 Plane *p= &s->plane[plane_index];
4401 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4403 if(s->avctx->debug&2048){
4404 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4405 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4409 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4410 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4416 for(level=0; level<s->spatial_decomposition_count; level++){
4417 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4418 SubBand *b= &p->band[level][orientation];
4419 unpack_coeffs(s, b, b->parent, orientation);
4422 STOP_TIMER("unpack coeffs");
4426 const int mb_h= s->b_height << s->block_max_depth;
4427 const int block_size = MB_SIZE >> s->block_max_depth;
4428 const int block_w = plane_index ? block_size/2 : block_size;
4430 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4435 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4436 for(mb_y=0; mb_y<=mb_h; mb_y++){
4438 int slice_starty = block_w*mb_y;
4439 int slice_h = block_w*(mb_y+1);
4440 if (!(s->keyframe || s->avctx->debug&512)){
4441 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4442 slice_h -= (block_w >> 1);
4447 for(level=0; level<s->spatial_decomposition_count; level++){
4448 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4449 SubBand *b= &p->band[level][orientation];
4452 int our_mb_start = mb_y;
4453 int our_mb_end = (mb_y + 1);
4455 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4456 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4457 if (!(s->keyframe || s->avctx->debug&512)){
4458 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4459 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4461 start_y = FFMIN(b->height, start_y);
4462 end_y = FFMIN(b->height, end_y);
4464 if (start_y != end_y){
4465 if (orientation == 0){
4466 SubBand * correlate_band = &p->band[0][0];
4467 int correlate_end_y = FFMIN(b->height, end_y + 1);
4468 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4469 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4470 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4471 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4474 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4478 STOP_TIMER("decode_subband_slice");
4482 for(; yd<slice_h; yd+=4){
4483 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4485 STOP_TIMER("idwt slice");}
4488 if(s->qlog == LOSSLESS_QLOG){
4489 for(; yq<slice_h && yq<h; yq++){
4490 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4492 line[x] <<= FRAC_BITS;
4497 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4499 y = FFMIN(p->height, slice_starty);
4500 end_y = FFMIN(p->height, slice_h);
4502 slice_buffer_release(&s->sb, y++);
4505 slice_buffer_flush(&s->sb);
4507 STOP_TIMER("idwt + predict_slices")}
4512 if(s->last_picture[s->max_ref_frames-1].data[0])
4513 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4515 if(!(s->avctx->debug&2048))
4516 *picture= s->current_picture;
4518 *picture= s->mconly_picture;
4520 *data_size = sizeof(AVFrame);
4522 bytes_read= c->bytestream - c->bytestream_start;
4523 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4528 static int decode_end(AVCodecContext *avctx)
4530 SnowContext *s = avctx->priv_data;
4532 slice_buffer_destroy(&s->sb);
4539 AVCodec snow_decoder = {
4543 sizeof(SnowContext),
4548 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4552 #ifdef CONFIG_ENCODERS
4553 AVCodec snow_encoder = {
4557 sizeof(SnowContext),
4573 int buffer[2][width*height];
4576 s.spatial_decomposition_count=6;
4577 s.spatial_decomposition_type=1;
4579 printf("testing 5/3 DWT\n");
4580 for(i=0; i<width*height; i++)
4581 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4583 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4584 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4586 for(i=0; i<width*height; i++)
4587 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4589 printf("testing 9/7 DWT\n");
4590 s.spatial_decomposition_type=0;
4591 for(i=0; i<width*height; i++)
4592 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4594 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4595 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4597 for(i=0; i<width*height; i++)
4598 if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4601 printf("testing AC coder\n");
4602 memset(s.header_state, 0, sizeof(s.header_state));
4603 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4604 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4606 for(i=-256; i<256; i++){
4608 put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
4609 STOP_TIMER("put_symbol")
4611 ff_rac_terminate(&s.c);
4613 memset(s.header_state, 0, sizeof(s.header_state));
4614 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4615 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4617 for(i=-256; i<256; i++){
4620 j= get_symbol(&s.c, s.header_state, 1);
4621 STOP_TIMER("get_symbol")
4622 if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
4626 int level, orientation, x, y;
4627 int64_t errors[8][4];
4630 memset(errors, 0, sizeof(errors));
4631 s.spatial_decomposition_count=3;
4632 s.spatial_decomposition_type=0;
4633 for(level=0; level<s.spatial_decomposition_count; level++){
4634 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4635 int w= width >> (s.spatial_decomposition_count-level);
4636 int h= height >> (s.spatial_decomposition_count-level);
4637 int stride= width << (s.spatial_decomposition_count-level);
4638 DWTELEM *buf= buffer[0];
4641 if(orientation&1) buf+=w;
4642 if(orientation>1) buf+=stride>>1;
4644 memset(buffer[0], 0, sizeof(int)*width*height);
4645 buf[w/2 + h/2*stride]= 256*256;
4646 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4647 for(y=0; y<height; y++){
4648 for(x=0; x<width; x++){
4649 int64_t d= buffer[0][x + y*width];
4651 if(ABS(width/2-x)<9 && ABS(height/2-y)<9 && level==2) printf("%8lld ", d);
4653 if(ABS(height/2-y)<9 && level==2) printf("\n");
4655 error= (int)(sqrt(error)+0.5);
4656 errors[level][orientation]= error;
4657 if(g) g=ff_gcd(g, error);
4661 printf("static int const visual_weight[][4]={\n");
4662 for(level=0; level<s.spatial_decomposition_count; level++){
4664 for(orientation=0; orientation<4; orientation++){
4665 printf("%8lld,", errors[level][orientation]/g);
4673 int w= width >> (s.spatial_decomposition_count-level);
4674 int h= height >> (s.spatial_decomposition_count-level);
4675 int stride= width << (s.spatial_decomposition_count-level);
4676 DWTELEM *buf= buffer[0];
4682 memset(buffer[0], 0, sizeof(int)*width*height);
4684 for(y=0; y<height; y++){
4685 for(x=0; x<width; x++){
4686 int tab[4]={0,2,3,1};
4687 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4690 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4694 buf[x + y*stride ]=169;
4695 buf[x + y*stride-w]=64;
4698 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4700 for(y=0; y<height; y++){
4701 for(x=0; x<width; x++){
4702 int64_t d= buffer[0][x + y*width];
4704 if(ABS(width/2-x)<9 && ABS(height/2-y)<9) printf("%8lld ", d);
4706 if(ABS(height/2-y)<9) printf("\n");