2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
182 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
183 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
184 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
185 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
186 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
187 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
192 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
193 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
194 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
197 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
198 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
199 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
200 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
201 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
202 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
203 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
204 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
205 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
206 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
207 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
208 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
209 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
210 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
219 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
220 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
221 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
224 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
225 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
226 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
227 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
228 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
229 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
230 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
293 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
294 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
295 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
296 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
297 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
302 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
303 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
304 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
307 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
308 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
309 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
310 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
311 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
312 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
313 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
314 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
315 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
316 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
317 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
318 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
319 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
329 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
330 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
331 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
334 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
335 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
336 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
337 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
338 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
339 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
340 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t *obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
398 typedef struct x_and_coeff{
403 typedef struct SubBand{
408 int qlog; ///< log(qscale)/log[2^(1/6)]
412 int stride_line; ///< Stride measured in lines, not pixels.
413 x_and_coeff * x_coeff;
414 struct SubBand *parent;
415 uint8_t state[/*7*2*/ 7 + 512][32];
418 typedef struct Plane{
421 SubBand band[MAX_DECOMPOSITIONS][4];
424 typedef struct SnowContext{
425 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
427 AVCodecContext *avctx;
431 AVFrame input_picture; ///< new_picture with the internal linesizes
432 AVFrame current_picture;
433 AVFrame last_picture[MAX_REF_FRAMES];
434 AVFrame mconly_picture;
435 // uint8_t q_context[16];
436 uint8_t header_state[32];
437 uint8_t block_state[128 + 32*128];
441 int spatial_decomposition_type;
442 int last_spatial_decomposition_type;
443 int temporal_decomposition_type;
444 int spatial_decomposition_count;
445 int temporal_decomposition_count;
448 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
449 uint32_t *ref_scores[MAX_REF_FRAMES];
450 DWTELEM *spatial_dwt_buffer;
454 int spatial_scalability;
464 #define QBIAS_SHIFT 3
468 int last_block_max_depth;
469 Plane plane[MAX_PLANES];
471 #define ME_CACHE_SIZE 1024
472 int me_cache[ME_CACHE_SIZE];
473 int me_cache_generation;
476 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
487 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
488 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
490 static void iterative_me(SnowContext *s);
492 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
496 buf->base_buffer = base_buffer;
497 buf->line_count = line_count;
498 buf->line_width = line_width;
499 buf->data_count = max_allocated_lines;
500 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
501 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
503 for (i = 0; i < max_allocated_lines; i++)
505 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
508 buf->data_stack_top = max_allocated_lines - 1;
511 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
516 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
518 assert(buf->data_stack_top >= 0);
519 // assert(!buf->line[line]);
521 return buf->line[line];
523 offset = buf->line_width * line;
524 buffer = buf->data_stack[buf->data_stack_top];
525 buf->data_stack_top--;
526 buf->line[line] = buffer;
528 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
533 static void slice_buffer_release(slice_buffer * buf, int line)
538 assert(line >= 0 && line < buf->line_count);
539 assert(buf->line[line]);
541 offset = buf->line_width * line;
542 buffer = buf->line[line];
543 buf->data_stack_top++;
544 buf->data_stack[buf->data_stack_top] = buffer;
545 buf->line[line] = NULL;
547 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
550 static void slice_buffer_flush(slice_buffer * buf)
553 for (i = 0; i < buf->line_count; i++)
557 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
558 slice_buffer_release(buf, i);
563 static void slice_buffer_destroy(slice_buffer * buf)
566 slice_buffer_flush(buf);
568 for (i = buf->data_count - 1; i >= 0; i--)
570 assert(buf->data_stack[i]);
571 av_freep(&buf->data_stack[i]);
573 assert(buf->data_stack);
574 av_freep(&buf->data_stack);
576 av_freep(&buf->line);
580 // Avoid a name clash on SGI IRIX
583 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
584 static uint8_t qexp[QROOT];
586 static inline int mirror(int v, int m){
587 while((unsigned)v > (unsigned)m){
594 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
598 const int a= FFABS(v);
599 const int e= av_log2(a);
601 const int el= FFMIN(e, 10);
602 put_rac(c, state+0, 0);
605 put_rac(c, state+1+i, 1); //1..10
608 put_rac(c, state+1+9, 1); //1..10
610 put_rac(c, state+1+FFMIN(i,9), 0);
612 for(i=e-1; i>=el; i--){
613 put_rac(c, state+22+9, (a>>i)&1); //22..31
616 put_rac(c, state+22+i, (a>>i)&1); //22..31
620 put_rac(c, state+11 + el, v < 0); //11..21
623 put_rac(c, state+0, 0);
626 put_rac(c, state+1+i, 1); //1..10
628 put_rac(c, state+1+i, 0);
630 for(i=e-1; i>=0; i--){
631 put_rac(c, state+22+i, (a>>i)&1); //22..31
635 put_rac(c, state+11 + e, v < 0); //11..21
638 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
640 put_rac(c, state+1+FFMIN(i,9), 0);
642 for(i=e-1; i>=0; i--){
643 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
647 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651 put_rac(c, state+0, 1);
655 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
656 if(get_rac(c, state+0))
661 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
666 for(i=e-1; i>=0; i--){
667 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
677 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
679 int r= log2>=0 ? 1<<log2 : 1;
685 put_rac(c, state+4+log2, 1);
690 put_rac(c, state+4+log2, 0);
692 for(i=log2-1; i>=0; i--){
693 put_rac(c, state+31-i, (v>>i)&1);
697 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
699 int r= log2>=0 ? 1<<log2 : 1;
704 while(get_rac(c, state+4+log2)){
710 for(i=log2-1; i>=0; i--){
711 v+= get_rac(c, state+31-i)<<i;
717 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
718 const int mirror_left= !highpass;
719 const int mirror_right= (width&1) ^ highpass;
720 const int w= (width>>1) - 1 + (highpass & width);
723 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
725 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
735 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
740 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
741 const int mirror_left= !highpass;
742 const int mirror_right= (width&1) ^ highpass;
743 const int w= (width>>1) - 1 + (highpass & width);
750 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
756 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
759 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
763 int r= 3*2*ref[w*ref_step];
766 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
772 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
773 const int mirror_left= !highpass;
774 const int mirror_right= (width&1) ^ highpass;
775 const int w= (width>>1) - 1 + (highpass & width);
779 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
781 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
787 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
791 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
797 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
800 for(x=start; x<width; x+=2){
804 int x2= x + 2*i - n + 1;
806 else if(x2>=width) x2= 2*width-x2-2;
807 sum += coeffs[i]*(int64_t)dst[x2];
809 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
810 else dst[x] += (sum + (1<<shift)/2)>>shift;
814 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
816 for(y=start; y<height; y+=2){
817 for(x=0; x<width; x++){
821 int y2= y + 2*i - n + 1;
823 else if(y2>=height) y2= 2*height-y2-2;
824 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
827 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
836 #if 0 // more accurate 9/7
839 #define COEFFS1 (int[]){-25987,-25987}
842 #define COEFFS2 (int[]){-27777,-27777}
845 #define COEFFS3 (int[]){28931,28931}
848 #define COEFFS4 (int[]){14533,14533}
852 #define COEFFS1 (int[]){1,-9,-9,1}
855 #define COEFFS2 (int[]){-1,5,5,-1}
868 #define COEFFS1 (int[]){1,1}
871 #define COEFFS2 (int[]){-1,-1}
884 #define COEFFS2 (int[]){-1,-1}
887 #define COEFFS3 (int[]){-1,-1}
890 #define COEFFS4 (int[]){-5,29,29,-5}
895 #define COEFFS1 (int[]){-203,-203}
898 #define COEFFS2 (int[]){-217,-217}
901 #define COEFFS3 (int[]){113,113}
904 #define COEFFS4 (int[]){227,227}
912 #define COEFFS2 (int[]){-1,-1}
915 #define COEFFS3 (int[]){-1,-1}
918 #define COEFFS4 (int[]){3,3}
922 #define COEFFS1 (int[]){1,-9,-9,1}
925 #define COEFFS2 (int[]){1,1}
935 #define COEFFS1 (int[]){1,-9,-9,1}
938 #define COEFFS2 (int[]){-1,5,5,-1}
946 static void horizontal_decomposeX(DWTELEM *b, int width){
948 const int width2= width>>1;
949 const int w2= (width+1)>>1;
952 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
953 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
954 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
955 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
957 for(x=0; x<width2; x++){
959 temp[x+w2]= b[2*x + 1];
963 memcpy(b, temp, width*sizeof(int));
966 static void horizontal_composeX(DWTELEM *b, int width){
968 const int width2= width>>1;
970 const int w2= (width+1)>>1;
972 memcpy(temp, b, width*sizeof(int));
973 for(x=0; x<width2; x++){
975 b[2*x + 1]= temp[x+w2];
980 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
981 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
982 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
983 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
986 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
989 for(y=0; y<height; y++){
990 for(x=0; x<width; x++){
991 buffer[y*stride + x] *= SCALEX;
995 for(y=0; y<height; y++){
996 horizontal_decomposeX(buffer + y*stride, width);
999 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1005 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1008 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013 for(y=0; y<height; y++){
1014 horizontal_composeX(buffer + y*stride, width);
1017 for(y=0; y<height; y++){
1018 for(x=0; x<width; x++){
1019 buffer[y*stride + x] /= SCALEX;
1024 static void horizontal_decompose53i(DWTELEM *b, int width){
1025 DWTELEM temp[width];
1026 const int width2= width>>1;
1028 const int w2= (width+1)>>1;
1030 for(x=0; x<width2; x++){
1032 temp[x+w2]= b[2*x + 1];
1046 for(x=1; x+1<width2; x+=2){
1050 A2 += (A1 + A3 + 2)>>2;
1054 A1= temp[x+1+width2];
1057 A4 += (A1 + A3 + 2)>>2;
1063 A2 += (A1 + A3 + 2)>>2;
1068 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1069 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1073 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1076 for(i=0; i<width; i++){
1077 b1[i] -= (b0[i] + b2[i])>>1;
1081 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1084 for(i=0; i<width; i++){
1085 b1[i] += (b0[i] + b2[i] + 2)>>2;
1089 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1091 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1092 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1094 for(y=-2; y<height; y+=2){
1095 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1096 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1099 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1100 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1101 STOP_TIMER("horizontal_decompose53i")}
1104 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1106 STOP_TIMER("vertical_decompose53i*")}
1113 static void horizontal_decompose97i(DWTELEM *b, int width){
1114 DWTELEM temp[width];
1115 const int w2= (width+1)>>1;
1117 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1118 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1119 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1120 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1124 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1127 for(i=0; i<width; i++){
1128 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1132 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1135 for(i=0; i<width; i++){
1137 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1139 int r= 3*(b0[i] + b2[i]);
1142 b1[i] += (r+W_CO)>>W_CS;
1147 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1150 for(i=0; i<width; i++){
1152 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1154 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1159 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 for(i=0; i<width; i++){
1163 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1167 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1169 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1170 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1171 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1172 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1174 for(y=-4; y<height; y+=2){
1175 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1176 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1179 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1180 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1182 STOP_TIMER("horizontal_decompose97i")
1186 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1187 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1188 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1189 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1192 STOP_TIMER("vertical_decompose97i")
1202 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1205 for(level=0; level<decomposition_count; level++){
1207 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1209 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1214 static void horizontal_compose53i(DWTELEM *b, int width){
1215 DWTELEM temp[width];
1216 const int width2= width>>1;
1217 const int w2= (width+1)>>1;
1229 for(x=1; x+1<width2; x+=2){
1233 A2 += (A1 + A3 + 2)>>2;
1237 A1= temp[x+1+width2];
1240 A4 += (A1 + A3 + 2)>>2;
1246 A2 += (A1 + A3 + 2)>>2;
1250 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1251 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1253 for(x=0; x<width2; x++){
1255 b[2*x + 1]= temp[x+w2];
1261 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1264 for(i=0; i<width; i++){
1265 b1[i] += (b0[i] + b2[i])>>1;
1269 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1272 for(i=0; i<width; i++){
1273 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1277 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1278 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1279 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1283 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1284 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1285 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1289 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1292 DWTELEM *b0= cs->b0;
1293 DWTELEM *b1= cs->b1;
1294 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1295 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1298 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1299 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1300 STOP_TIMER("vertical_compose53i*")}
1303 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1304 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1305 STOP_TIMER("horizontal_compose53i")}
1312 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1314 DWTELEM *b0= cs->b0;
1315 DWTELEM *b1= cs->b1;
1316 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1317 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1320 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1321 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1322 STOP_TIMER("vertical_compose53i*")}
1325 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1326 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1327 STOP_TIMER("horizontal_compose53i")}
1334 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1336 spatial_compose53i_init(&cs, buffer, height, stride);
1337 while(cs.y <= height)
1338 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1342 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1343 DWTELEM temp[width];
1344 const int w2= (width+1)>>1;
1346 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1347 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1348 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1349 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1352 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1355 for(i=0; i<width; i++){
1356 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1360 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1363 for(i=0; i<width; i++){
1365 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1367 int r= 3*(b0[i] + b2[i]);
1370 b1[i] -= (r+W_CO)>>W_CS;
1375 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1378 for(i=0; i<width; i++){
1380 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1382 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1387 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1390 for(i=0; i<width; i++){
1391 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1395 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1398 for(i=0; i<width; i++){
1402 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1404 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1406 r= 3*(b2[i] + b4[i]);
1409 b3[i] -= (r+W_CO)>>W_CS;
1412 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1414 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1416 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1420 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1421 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1422 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1423 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1424 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1428 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1429 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1430 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1431 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1432 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1436 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1439 DWTELEM *b0= cs->b0;
1440 DWTELEM *b1= cs->b1;
1441 DWTELEM *b2= cs->b2;
1442 DWTELEM *b3= cs->b3;
1443 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1444 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1447 if(y>0 && y+4<height){
1448 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1450 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1451 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1452 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1453 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1456 STOP_TIMER("vertical_compose97i")}}
1459 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1460 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1461 if(width>400 && y+0<(unsigned)height){
1462 STOP_TIMER("horizontal_compose97i")}}
1471 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1473 DWTELEM *b0= cs->b0;
1474 DWTELEM *b1= cs->b1;
1475 DWTELEM *b2= cs->b2;
1476 DWTELEM *b3= cs->b3;
1477 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1478 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1481 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1482 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1483 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1484 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1486 STOP_TIMER("vertical_compose97i")}}
1489 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1490 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1491 if(width>400 && b0 <= b2){
1492 STOP_TIMER("horizontal_compose97i")}}
1501 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1503 spatial_compose97i_init(&cs, buffer, height, stride);
1504 while(cs.y <= height)
1505 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1508 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1510 for(level=decomposition_count-1; level>=0; level--){
1512 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1514 /* not slicified yet */
1515 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1516 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1521 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1523 for(level=decomposition_count-1; level>=0; level--){
1525 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1527 /* not slicified yet */
1528 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1533 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1534 const int support = type==1 ? 3 : 5;
1538 for(level=decomposition_count-1; level>=0; level--){
1539 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1541 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1543 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1551 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1552 const int support = type==1 ? 3 : 5;
1556 for(level=decomposition_count-1; level>=0; level--){
1557 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1559 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1561 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1569 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1572 for(level=decomposition_count-1; level>=0; level--)
1573 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1575 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1577 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1578 for(y=0; y<height; y+=4)
1579 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1583 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1584 const int w= b->width;
1585 const int h= b->height;
1597 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1598 v= src[x + y*stride];
1601 t= src[x + (y-1)*stride];
1603 lt= src[x - 1 + (y-1)*stride];
1606 rt= src[x + 1 + (y-1)*stride];
1610 l= src[x - 1 + y*stride];
1612 if(orientation==1) ll= src[y + (x-2)*stride];
1613 else ll= src[x - 2 + y*stride];
1619 if(px<b->parent->width && py<b->parent->height)
1620 p= parent[px + py*2*stride];
1622 if(!(/*ll|*/l|lt|t|rt|p)){
1624 runs[run_index++]= run;
1632 max_index= run_index;
1633 runs[run_index++]= run;
1635 run= runs[run_index++];
1637 put_symbol2(&s->c, b->state[30], max_index, 0);
1638 if(run_index <= max_index)
1639 put_symbol2(&s->c, b->state[1], run, 3);
1642 if(s->c.bytestream_end - s->c.bytestream < w*40){
1643 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1648 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1649 v= src[x + y*stride];
1652 t= src[x + (y-1)*stride];
1654 lt= src[x - 1 + (y-1)*stride];
1657 rt= src[x + 1 + (y-1)*stride];
1661 l= src[x - 1 + y*stride];
1663 if(orientation==1) ll= src[y + (x-2)*stride];
1664 else ll= src[x - 2 + y*stride];
1670 if(px<b->parent->width && py<b->parent->height)
1671 p= parent[px + py*2*stride];
1673 if(/*ll|*/l|lt|t|rt|p){
1674 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1676 put_rac(&s->c, &b->state[0][context], !!v);
1679 run= runs[run_index++];
1681 if(run_index <= max_index)
1682 put_symbol2(&s->c, b->state[1], run, 3);
1690 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1691 int l2= 2*FFABS(l) + (l<0);
1692 int t2= 2*FFABS(t) + (t<0);
1694 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1695 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1703 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1704 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1705 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1706 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1707 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1710 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1711 const int w= b->width;
1712 const int h= b->height;
1717 x_and_coeff *xc= b->x_coeff;
1718 x_and_coeff *prev_xc= NULL;
1719 x_and_coeff *prev2_xc= xc;
1720 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1721 x_and_coeff *prev_parent_xc= parent_xc;
1723 runs= get_symbol2(&s->c, b->state[30], 0);
1724 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1729 int lt=0, t=0, rt=0;
1731 if(y && prev_xc->x == 0){
1743 if(prev_xc->x == x + 1)
1749 if(x>>1 > parent_xc->x){
1752 if(x>>1 == parent_xc->x){
1753 p= parent_xc->coeff;
1756 if(/*ll|*/l|lt|t|rt|p){
1757 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1759 v=get_rac(&s->c, &b->state[0][context]);
1761 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1762 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1769 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1771 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1772 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1781 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1782 else max_run= FFMIN(run, w-x-1);
1784 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1790 (xc++)->x= w+1; //end marker
1796 while(parent_xc->x != parent->width+1)
1799 prev_parent_xc= parent_xc;
1801 parent_xc= prev_parent_xc;
1806 (xc++)->x= w+1; //end marker
1810 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1811 const int w= b->width;
1813 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
1814 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1815 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1820 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1825 /* If we are on the second or later slice, restore our index. */
1827 new_index = save_state[0];
1830 for(y=start_y; y<h; y++){
1833 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1834 memset(line, 0, b->width*sizeof(DWTELEM));
1835 v = b->x_coeff[new_index].coeff;
1836 x = b->x_coeff[new_index++].x;
1839 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1840 register int u= -(v&1);
1841 line[x] = (t^u) - u;
1843 v = b->x_coeff[new_index].coeff;
1844 x = b->x_coeff[new_index++].x;
1847 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1848 STOP_TIMER("decode_subband")
1851 /* Save our variables for the next slice. */
1852 save_state[0] = new_index;
1857 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1858 int plane_index, level, orientation;
1860 for(plane_index=0; plane_index<3; plane_index++){
1861 for(level=0; level<s->spatial_decomposition_count; level++){
1862 for(orientation=level ? 1:0; orientation<4; orientation++){
1863 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1867 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1868 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1871 static int alloc_blocks(SnowContext *s){
1872 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1873 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1878 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1882 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1883 uint8_t *bytestream= d->bytestream;
1884 uint8_t *bytestream_start= d->bytestream_start;
1886 d->bytestream= bytestream;
1887 d->bytestream_start= bytestream_start;
1890 //near copy & paste from dsputil, FIXME
1891 static int pix_sum(uint8_t * pix, int line_size, int w)
1896 for (i = 0; i < w; i++) {
1897 for (j = 0; j < w; j++) {
1901 pix += line_size - w;
1906 //near copy & paste from dsputil, FIXME
1907 static int pix_norm1(uint8_t * pix, int line_size, int w)
1910 uint32_t *sq = ff_squareTbl + 256;
1913 for (i = 0; i < w; i++) {
1914 for (j = 0; j < w; j ++) {
1918 pix += line_size - w;
1923 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1924 const int w= s->b_width << s->block_max_depth;
1925 const int rem_depth= s->block_max_depth - level;
1926 const int index= (x + y*w) << rem_depth;
1927 const int block_w= 1<<rem_depth;
1940 for(j=0; j<block_w; j++){
1941 for(i=0; i<block_w; i++){
1942 s->block[index + i + j*w]= block;
1947 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1948 const int offset[3]= {
1950 ((y*c->uvstride + x)>>1),
1951 ((y*c->uvstride + x)>>1),
1955 c->src[0][i]= src [i];
1956 c->ref[0][i]= ref [i] + offset[i];
1961 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1962 BlockNode *left, BlockNode *top, BlockNode *tr){
1963 if(s->ref_frames == 1){
1964 *mx = mid_pred(left->mx, top->mx, tr->mx);
1965 *my = mid_pred(left->my, top->my, tr->my);
1967 const int *scale = scale_mv_ref[ref];
1968 *mx = mid_pred(left->mx * scale[left->ref] + 128 >>8,
1969 top ->mx * scale[top ->ref] + 128 >>8,
1970 tr ->mx * scale[tr ->ref] + 128 >>8);
1971 *my = mid_pred(left->my * scale[left->ref] + 128 >>8,
1972 top ->my * scale[top ->ref] + 128 >>8,
1973 tr ->my * scale[tr ->ref] + 128 >>8);
1980 #define P_TOPRIGHT P[3]
1981 #define P_MEDIAN P[4]
1983 #define FLAG_QPEL 1 //must be 1
1985 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1986 uint8_t p_buffer[1024];
1987 uint8_t i_buffer[1024];
1988 uint8_t p_state[sizeof(s->block_state)];
1989 uint8_t i_state[sizeof(s->block_state)];
1991 uint8_t *pbbak= s->c.bytestream;
1992 uint8_t *pbbak_start= s->c.bytestream_start;
1993 int score, score2, iscore, i_len, p_len, block_s, sum;
1994 const int w= s->b_width << s->block_max_depth;
1995 const int h= s->b_height << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<(LOG2_MB_SIZE - level);
1999 int trx= (x+1)<<rem_depth;
2000 int try= (y+1)<<rem_depth;
2001 BlockNode *left = x ? &s->block[index-1] : &null_block;
2002 BlockNode *top = y ? &s->block[index-w] : &null_block;
2003 BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2004 BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2005 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2006 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2007 int pl = left->color[0];
2008 int pcb= left->color[1];
2009 int pcr= left->color[2];
2013 const int stride= s->current_picture.linesize[0];
2014 const int uvstride= s->current_picture.linesize[1];
2015 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2016 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2017 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2019 int16_t last_mv[3][2];
2020 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2021 const int shift= 1+qpel;
2022 MotionEstContext *c= &s->m.me;
2023 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2024 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2025 int my_context= av_log2(2*FFABS(left->my - top->my));
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027 int ref, best_ref, ref_score, ref_mx, ref_my;
2029 assert(sizeof(s->block_state) >= 256);
2031 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2035 // clip predictors / edge ?
2037 P_LEFT[0]= left->mx;
2038 P_LEFT[1]= left->my;
2041 P_TOPRIGHT[0]= tr->mx;
2042 P_TOPRIGHT[1]= tr->my;
2044 last_mv[0][0]= s->block[index].mx;
2045 last_mv[0][1]= s->block[index].my;
2046 last_mv[1][0]= right->mx;
2047 last_mv[1][1]= right->my;
2048 last_mv[2][0]= bottom->mx;
2049 last_mv[2][1]= bottom->my;
2056 assert(s->m.me. stride == stride);
2057 assert(s->m.me.uvstride == uvstride);
2059 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2060 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2061 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2062 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2064 c->xmin = - x*block_w - 16+2;
2065 c->ymin = - y*block_w - 16+2;
2066 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2067 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2069 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2070 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2071 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2072 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2073 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2074 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2075 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2077 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2078 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2081 c->pred_x= P_LEFT[0];
2082 c->pred_y= P_LEFT[1];
2084 c->pred_x = P_MEDIAN[0];
2085 c->pred_y = P_MEDIAN[1];
2090 for(ref=0; ref<s->ref_frames; ref++){
2091 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2093 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2094 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2096 assert(ref_mx >= c->xmin);
2097 assert(ref_mx <= c->xmax);
2098 assert(ref_my >= c->ymin);
2099 assert(ref_my <= c->ymax);
2101 ref_score= s->m.me.sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2102 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2103 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2104 if(s->ref_mvs[ref]){
2105 s->ref_mvs[ref][index][0]= ref_mx;
2106 s->ref_mvs[ref][index][1]= ref_my;
2107 s->ref_scores[ref][index]= ref_score;
2109 if(score > ref_score){
2116 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2120 pc.bytestream_start=
2121 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2122 memcpy(p_state, s->block_state, sizeof(s->block_state));
2124 if(level!=s->block_max_depth)
2125 put_rac(&pc, &p_state[4 + s_context], 1);
2126 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2127 if(s->ref_frames > 1)
2128 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2129 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2130 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2131 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2132 p_len= pc.bytestream - pc.bytestream_start;
2133 score += (s->lambda2*(p_len*8
2134 + (pc.outstanding_count - s->c.outstanding_count)*8
2135 + (-av_log2(pc.range) + av_log2(s->c.range))
2136 ))>>FF_LAMBDA_SHIFT;
2138 block_s= block_w*block_w;
2139 sum = pix_sum(current_data[0], stride, block_w);
2140 l= (sum + block_s/2)/block_s;
2141 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2143 block_s= block_w*block_w>>2;
2144 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2145 cb= (sum + block_s/2)/block_s;
2146 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2147 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2148 cr= (sum + block_s/2)/block_s;
2149 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2152 ic.bytestream_start=
2153 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2154 memcpy(i_state, s->block_state, sizeof(s->block_state));
2155 if(level!=s->block_max_depth)
2156 put_rac(&ic, &i_state[4 + s_context], 1);
2157 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2158 put_symbol(&ic, &i_state[32], l-pl , 1);
2159 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2160 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2161 i_len= ic.bytestream - ic.bytestream_start;
2162 iscore += (s->lambda2*(i_len*8
2163 + (ic.outstanding_count - s->c.outstanding_count)*8
2164 + (-av_log2(ic.range) + av_log2(s->c.range))
2165 ))>>FF_LAMBDA_SHIFT;
2167 // assert(score==256*256*256*64-1);
2168 assert(iscore < 255*255*256 + s->lambda2*10);
2169 assert(iscore >= 0);
2170 assert(l>=0 && l<=255);
2171 assert(pl>=0 && pl<=255);
2174 int varc= iscore >> 8;
2175 int vard= score >> 8;
2176 if (vard <= 64 || vard < varc)
2177 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2179 c->scene_change_score+= s->m.qscale;
2182 if(level!=s->block_max_depth){
2183 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2184 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2185 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2186 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2187 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2188 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2190 if(score2 < score && score2 < iscore)
2195 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2196 memcpy(pbbak, i_buffer, i_len);
2198 s->c.bytestream_start= pbbak_start;
2199 s->c.bytestream= pbbak + i_len;
2200 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2201 memcpy(s->block_state, i_state, sizeof(s->block_state));
2204 memcpy(pbbak, p_buffer, p_len);
2206 s->c.bytestream_start= pbbak_start;
2207 s->c.bytestream= pbbak + p_len;
2208 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2209 memcpy(s->block_state, p_state, sizeof(s->block_state));
2214 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2215 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2216 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2218 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2222 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2223 const int w= s->b_width << s->block_max_depth;
2224 const int rem_depth= s->block_max_depth - level;
2225 const int index= (x + y*w) << rem_depth;
2226 int trx= (x+1)<<rem_depth;
2227 BlockNode *b= &s->block[index];
2228 BlockNode *left = x ? &s->block[index-1] : &null_block;
2229 BlockNode *top = y ? &s->block[index-w] : &null_block;
2230 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2231 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2232 int pl = left->color[0];
2233 int pcb= left->color[1];
2234 int pcr= left->color[2];
2236 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2237 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2238 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2239 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2242 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2246 if(level!=s->block_max_depth){
2247 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2248 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2250 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2251 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2252 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2253 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2254 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2258 if(b->type & BLOCK_INTRA){
2259 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2260 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2261 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2262 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2263 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2264 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2266 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2267 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2268 if(s->ref_frames > 1)
2269 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2270 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2271 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2272 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2276 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2277 const int w= s->b_width << s->block_max_depth;
2278 const int rem_depth= s->block_max_depth - level;
2279 const int index= (x + y*w) << rem_depth;
2280 int trx= (x+1)<<rem_depth;
2281 BlockNode *left = x ? &s->block[index-1] : &null_block;
2282 BlockNode *top = y ? &s->block[index-w] : &null_block;
2283 BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2284 BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2285 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2288 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2292 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2294 int l = left->color[0];
2295 int cb= left->color[1];
2296 int cr= left->color[2];
2297 int mx= mid_pred(left->mx, top->mx, tr->mx);
2298 int my= mid_pred(left->my, top->my, tr->my);
2300 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2301 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2302 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2304 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2307 pred_mv(s, &mx, &my, 0, left, top, tr);
2308 l += get_symbol(&s->c, &s->block_state[32], 1);
2309 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2310 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2312 if(s->ref_frames > 1)
2313 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2314 pred_mv(s, &mx, &my, ref, left, top, tr);
2315 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2316 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2318 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2320 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2321 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2322 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2323 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2327 static void encode_blocks(SnowContext *s, int search){
2332 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2336 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2337 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2341 if(s->avctx->me_method == ME_ITER || !search)
2342 encode_q_branch2(s, 0, x, y);
2344 encode_q_branch (s, 0, x, y);
2349 static void decode_blocks(SnowContext *s){
2356 decode_q_branch(s, 0, x, y);
2361 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2364 for(y=0; y < b_h+5; y++){
2365 for(x=0; x < b_w; x++){
2372 // int am= 9*(a1+a2) - (a0+a3);
2373 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2374 // int am= 18*(a2+a3) - 2*(a1+a4);
2375 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2376 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2378 // if(b_w==16) am= 8*(a1+a2);
2380 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2381 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2383 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2384 if(am&(~255)) am= ~(am>>31);
2388 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2389 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2390 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2391 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2396 tmp -= (b_h+5)*stride;
2398 for(y=0; y < b_h; y++){
2399 for(x=0; x < b_w; x++){
2400 int a0= tmp[x + 0*stride];
2401 int a1= tmp[x + 1*stride];
2402 int a2= tmp[x + 2*stride];
2403 int a3= tmp[x + 3*stride];
2404 int a4= tmp[x + 4*stride];
2405 int a5= tmp[x + 5*stride];
2406 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2407 // int am= 18*(a2+a3) - 2*(a1+a4);
2408 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2409 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2411 // if(b_w==16) am= 8*(a1+a2);
2413 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2414 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2416 if(am&(~255)) am= ~(am>>31);
2419 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2420 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2421 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2422 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2427 STOP_TIMER("mc_block")
2430 #define mca(dx,dy,b_w)\
2431 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2432 uint8_t tmp[stride*(b_w+5)];\
2434 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2446 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2447 if(block->type & BLOCK_INTRA){
2449 const int color = block->color[plane_index];
2450 const int color4= color*0x01010101;
2452 for(y=0; y < b_h; y++){
2453 *(uint32_t*)&dst[0 + y*stride]= color4;
2454 *(uint32_t*)&dst[4 + y*stride]= color4;
2455 *(uint32_t*)&dst[8 + y*stride]= color4;
2456 *(uint32_t*)&dst[12+ y*stride]= color4;
2457 *(uint32_t*)&dst[16+ y*stride]= color4;
2458 *(uint32_t*)&dst[20+ y*stride]= color4;
2459 *(uint32_t*)&dst[24+ y*stride]= color4;
2460 *(uint32_t*)&dst[28+ y*stride]= color4;
2463 for(y=0; y < b_h; y++){
2464 *(uint32_t*)&dst[0 + y*stride]= color4;
2465 *(uint32_t*)&dst[4 + y*stride]= color4;
2466 *(uint32_t*)&dst[8 + y*stride]= color4;
2467 *(uint32_t*)&dst[12+ y*stride]= color4;
2470 for(y=0; y < b_h; y++){
2471 *(uint32_t*)&dst[0 + y*stride]= color4;
2472 *(uint32_t*)&dst[4 + y*stride]= color4;
2475 for(y=0; y < b_h; y++){
2476 *(uint32_t*)&dst[0 + y*stride]= color4;
2479 for(y=0; y < b_h; y++){
2480 for(x=0; x < b_w; x++){
2481 dst[x + y*stride]= color;
2486 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2487 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2488 int mx= block->mx*scale;
2489 int my= block->my*scale;
2490 const int dx= mx&15;
2491 const int dy= my&15;
2492 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2495 src += sx + sy*stride;
2496 if( (unsigned)sx >= w - b_w - 4
2497 || (unsigned)sy >= h - b_h - 4){
2498 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2501 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2502 // assert(!(b_w&(b_w-1)));
2503 assert(b_w>1 && b_h>1);
2504 assert(tab_index>=0 && tab_index<4 || b_w==32);
2505 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2506 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2509 for(y=0; y<b_h; y+=16){
2510 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2511 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2515 else if(b_w==2*b_h){
2516 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2517 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2520 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2521 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2526 void ff_snow_inner_add_yblock(uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2527 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2530 for(y=0; y<b_h; y++){
2531 //FIXME ugly missue of obmc_stride
2532 uint8_t *obmc1= obmc + y*obmc_stride;
2533 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2534 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2535 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2536 dst = slice_buffer_get_line(sb, src_y + y);
2537 for(x=0; x<b_w; x++){
2538 int v= obmc1[x] * block[3][x + y*src_stride]
2539 +obmc2[x] * block[2][x + y*src_stride]
2540 +obmc3[x] * block[1][x + y*src_stride]
2541 +obmc4[x] * block[0][x + y*src_stride];
2543 v <<= 8 - LOG2_OBMC_MAX;
2545 v += 1<<(7 - FRAC_BITS);
2546 v >>= 8 - FRAC_BITS;
2549 v += dst[x + src_x];
2550 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2551 if(v&(~255)) v= ~(v>>31);
2552 dst8[x + y*src_stride] = v;
2554 dst[x + src_x] -= v;
2560 //FIXME name clenup (b_w, block_w, b_width stuff)
2561 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2562 const int b_width = s->b_width << s->block_max_depth;
2563 const int b_height= s->b_height << s->block_max_depth;
2564 const int b_stride= b_width;
2565 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2566 BlockNode *rt= lt+1;
2567 BlockNode *lb= lt+b_stride;
2568 BlockNode *rb= lb+1;
2570 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2571 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2578 }else if(b_x + 1 >= b_width){
2585 }else if(b_y + 1 >= b_height){
2590 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2593 if(!sliced && !offset_dst)
2596 }else if(src_x + b_w > w){
2600 obmc -= src_y*obmc_stride;
2602 if(!sliced && !offset_dst)
2603 dst -= src_y*dst_stride;
2605 }else if(src_y + b_h> h){
2609 if(b_w<=0 || b_h<=0) return;
2611 assert(src_stride > 2*MB_SIZE + 5);
2612 if(!sliced && offset_dst)
2613 dst += src_x + src_y*dst_stride;
2614 dst8+= src_x + src_y*src_stride;
2615 // src += src_x + src_y*src_stride;
2617 ptmp= tmp + 3*tmp_step;
2620 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2622 if(same_block(lt, rt)){
2627 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2630 if(same_block(lt, lb)){
2632 }else if(same_block(rt, lb)){
2637 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2640 if(same_block(lt, rb) ){
2642 }else if(same_block(rt, rb)){
2644 }else if(same_block(lb, rb)){
2648 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2651 for(y=0; y<b_h; y++){
2652 for(x=0; x<b_w; x++){
2653 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2654 if(add) dst[x + y*dst_stride] += v;
2655 else dst[x + y*dst_stride] -= v;
2658 for(y=0; y<b_h; y++){
2659 uint8_t *obmc2= obmc + (obmc_stride>>1);
2660 for(x=0; x<b_w; x++){
2661 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2662 if(add) dst[x + y*dst_stride] += v;
2663 else dst[x + y*dst_stride] -= v;
2666 for(y=0; y<b_h; y++){
2667 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2668 for(x=0; x<b_w; x++){
2669 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2670 if(add) dst[x + y*dst_stride] += v;
2671 else dst[x + y*dst_stride] -= v;
2674 for(y=0; y<b_h; y++){
2675 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2676 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2677 for(x=0; x<b_w; x++){
2678 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2679 if(add) dst[x + y*dst_stride] += v;
2680 else dst[x + y*dst_stride] -= v;
2687 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2688 STOP_TIMER("inner_add_yblock")
2690 for(y=0; y<b_h; y++){
2691 //FIXME ugly missue of obmc_stride
2692 uint8_t *obmc1= obmc + y*obmc_stride;
2693 uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2694 uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2695 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2696 for(x=0; x<b_w; x++){
2697 int v= obmc1[x] * block[3][x + y*src_stride]
2698 +obmc2[x] * block[2][x + y*src_stride]
2699 +obmc3[x] * block[1][x + y*src_stride]
2700 +obmc4[x] * block[0][x + y*src_stride];
2702 v <<= 8 - LOG2_OBMC_MAX;
2704 v += 1<<(7 - FRAC_BITS);
2705 v >>= 8 - FRAC_BITS;
2708 v += dst[x + y*dst_stride];
2709 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2710 if(v&(~255)) v= ~(v>>31);
2711 dst8[x + y*src_stride] = v;
2713 dst[x + y*dst_stride] -= v;
2720 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2721 Plane *p= &s->plane[plane_index];
2722 const int mb_w= s->b_width << s->block_max_depth;
2723 const int mb_h= s->b_height << s->block_max_depth;
2725 int block_size = MB_SIZE >> s->block_max_depth;
2726 int block_w = plane_index ? block_size/2 : block_size;
2727 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2728 int obmc_stride= plane_index ? block_size : 2*block_size;
2729 int ref_stride= s->current_picture.linesize[plane_index];
2730 uint8_t *dst8= s->current_picture.data[plane_index];
2735 if(s->keyframe || (s->avctx->debug&512)){
2740 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2742 // DWTELEM * line = slice_buffer_get_line(sb, y);
2743 DWTELEM * line = sb->line[y];
2746 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2747 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2749 if(v&(~255)) v= ~(v>>31);
2750 dst8[x + y*ref_stride]= v;
2754 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2756 // DWTELEM * line = slice_buffer_get_line(sb, y);
2757 DWTELEM * line = sb->line[y];
2760 line[x] -= 128 << FRAC_BITS;
2761 // buf[x + y*w]-= 128<<FRAC_BITS;
2769 for(mb_x=0; mb_x<=mb_w; mb_x++){
2772 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2773 block_w*mb_x - block_w/2,
2774 block_w*mb_y - block_w/2,
2777 w, ref_stride, obmc_stride,
2779 add, 0, plane_index);
2781 STOP_TIMER("add_yblock")
2784 STOP_TIMER("predict_slice")
2787 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2788 Plane *p= &s->plane[plane_index];
2789 const int mb_w= s->b_width << s->block_max_depth;
2790 const int mb_h= s->b_height << s->block_max_depth;
2792 int block_size = MB_SIZE >> s->block_max_depth;
2793 int block_w = plane_index ? block_size/2 : block_size;
2794 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2795 const int obmc_stride= plane_index ? block_size : 2*block_size;
2796 int ref_stride= s->current_picture.linesize[plane_index];
2797 uint8_t *dst8= s->current_picture.data[plane_index];
2802 if(s->keyframe || (s->avctx->debug&512)){
2807 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2809 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2811 if(v&(~255)) v= ~(v>>31);
2812 dst8[x + y*ref_stride]= v;
2816 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2818 buf[x + y*w]-= 128<<FRAC_BITS;
2826 for(mb_x=0; mb_x<=mb_w; mb_x++){
2829 add_yblock(s, 0, NULL, buf, dst8, obmc,
2830 block_w*mb_x - block_w/2,
2831 block_w*mb_y - block_w/2,
2834 w, ref_stride, obmc_stride,
2836 add, 1, plane_index);
2838 STOP_TIMER("add_yblock")
2841 STOP_TIMER("predict_slice")
2844 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2845 const int mb_h= s->b_height << s->block_max_depth;
2847 for(mb_y=0; mb_y<=mb_h; mb_y++)
2848 predict_slice(s, buf, plane_index, add, mb_y);
2851 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2853 Plane *p= &s->plane[plane_index];
2854 const int block_size = MB_SIZE >> s->block_max_depth;
2855 const int block_w = plane_index ? block_size/2 : block_size;
2856 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2857 const int obmc_stride= plane_index ? block_size : 2*block_size;
2858 const int ref_stride= s->current_picture.linesize[plane_index];
2859 uint8_t *src= s-> input_picture.data[plane_index];
2860 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2861 const int b_stride = s->b_width << s->block_max_depth;
2862 const int w= p->width;
2863 const int h= p->height;
2864 int index= mb_x + mb_y*b_stride;
2865 BlockNode *b= &s->block[index];
2866 BlockNode backup= *b;
2870 b->type|= BLOCK_INTRA;
2871 b->color[plane_index]= 0;
2872 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2875 int mb_x2= mb_x + (i &1) - 1;
2876 int mb_y2= mb_y + (i>>1) - 1;
2877 int x= block_w*mb_x2 + block_w/2;
2878 int y= block_w*mb_y2 + block_w/2;
2880 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2881 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2883 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2884 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2885 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2886 int obmc_v= obmc[index];
2888 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2889 if(x<0) obmc_v += obmc[index + block_w];
2890 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2891 if(x+block_w>w) obmc_v += obmc[index - block_w];
2892 //FIXME precalc this or simplify it somehow else
2894 d = -dst[index] + (1<<(FRAC_BITS-1));
2896 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2897 aa += obmc_v * obmc_v; //FIXME precalclate this
2903 return clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2906 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2907 const int b_stride = s->b_width << s->block_max_depth;
2908 const int b_height = s->b_height<< s->block_max_depth;
2909 int index= x + y*b_stride;
2910 BlockNode *b = &s->block[index];
2911 BlockNode *left = x ? &s->block[index-1] : &null_block;
2912 BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2913 BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2914 BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2916 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2917 // int my_context= av_log2(2*FFABS(left->my - top->my));
2919 if(x<0 || x>=b_stride || y>=b_height)
2926 00001XXXX 15-30 8-15
2928 //FIXME try accurate rate
2929 //FIXME intra and inter predictors if surrounding blocks arent the same type
2930 if(b->type & BLOCK_INTRA){
2931 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2932 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2933 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2935 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2938 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2939 + av_log2(2*FFABS(dmy))
2940 + av_log2(2*b->ref));
2944 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2945 Plane *p= &s->plane[plane_index];
2946 const int block_size = MB_SIZE >> s->block_max_depth;
2947 const int block_w = plane_index ? block_size/2 : block_size;
2948 const int obmc_stride= plane_index ? block_size : 2*block_size;
2949 const int ref_stride= s->current_picture.linesize[plane_index];
2950 uint8_t *dst= s->current_picture.data[plane_index];
2951 uint8_t *src= s-> input_picture.data[plane_index];
2952 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2953 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2954 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2955 const int b_stride = s->b_width << s->block_max_depth;
2956 const int b_height = s->b_height<< s->block_max_depth;
2957 const int w= p->width;
2958 const int h= p->height;
2961 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2962 int sx= block_w*mb_x - block_w/2;
2963 int sy= block_w*mb_y - block_w/2;
2964 int x0= FFMAX(0,-sx);
2965 int y0= FFMAX(0,-sy);
2966 int x1= FFMIN(block_w*2, w-sx);
2967 int y1= FFMIN(block_w*2, h-sy);
2970 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2972 for(y=y0; y<y1; y++){
2973 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2974 const DWTELEM *pred1 = pred + y*obmc_stride;
2975 uint8_t *cur1 = cur + y*ref_stride;
2976 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2977 for(x=x0; x<x1; x++){
2978 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2979 v = (v + pred1[x]) >> FRAC_BITS;
2980 if(v&(~255)) v= ~(v>>31);
2985 /* copy the regions where obmc[] = (uint8_t)256 */
2986 if(LOG2_OBMC_MAX == 8
2987 && (mb_x == 0 || mb_x == b_stride-1)
2988 && (mb_y == 0 || mb_y == b_height-1)){
2997 for(y=y0; y<y1; y++)
2998 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3002 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3003 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3004 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3005 * so improving the score of one block is not strictly guaranteed to
3006 * improve the score of the whole frame, so iterative motion est
3007 * doesn't always converge. */
3008 if(s->avctx->me_cmp == FF_CMP_W97)
3009 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3010 else if(s->avctx->me_cmp == FF_CMP_W53)
3011 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3015 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3016 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3021 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3030 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3032 if(mb_x == b_stride-2)
3033 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3035 return distortion + rate*penalty_factor;
3038 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3040 Plane *p= &s->plane[plane_index];
3041 const int block_size = MB_SIZE >> s->block_max_depth;
3042 const int block_w = plane_index ? block_size/2 : block_size;
3043 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3044 const int obmc_stride= plane_index ? block_size : 2*block_size;
3045 const int ref_stride= s->current_picture.linesize[plane_index];
3046 uint8_t *dst= s->current_picture.data[plane_index];
3047 uint8_t *src= s-> input_picture.data[plane_index];
3048 static const DWTELEM zero_dst[4096]; //FIXME
3049 const int b_stride = s->b_width << s->block_max_depth;
3050 const int w= p->width;
3051 const int h= p->height;
3054 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3057 int mb_x2= mb_x + (i%3) - 1;
3058 int mb_y2= mb_y + (i/3) - 1;
3059 int x= block_w*mb_x2 + block_w/2;
3060 int y= block_w*mb_y2 + block_w/2;
3062 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3063 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3065 //FIXME find a cleaner/simpler way to skip the outside stuff
3066 for(y2= y; y2<0; y2++)
3067 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3068 for(y2= h; y2<y+block_w; y2++)
3069 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3071 for(y2= y; y2<y+block_w; y2++)
3072 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3075 for(y2= y; y2<y+block_w; y2++)
3076 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3079 assert(block_w== 8 || block_w==16);
3080 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3084 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3085 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3093 rate = get_block_bits(s, mb_x, mb_y, 2);
3094 for(i=merged?4:0; i<9; i++){
3095 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3096 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3099 return distortion + rate*penalty_factor;
3102 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3103 const int b_stride= s->b_width << s->block_max_depth;
3104 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3105 BlockNode backup= *block;
3106 int rd, index, value;
3108 assert(mb_x>=0 && mb_y>=0);
3109 assert(mb_x<b_stride);
3112 block->color[0] = p[0];
3113 block->color[1] = p[1];
3114 block->color[2] = p[2];
3115 block->type |= BLOCK_INTRA;
3117 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3118 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3119 if(s->me_cache[index] == value)
3121 s->me_cache[index]= value;
3125 block->type &= ~BLOCK_INTRA;
3128 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3140 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3141 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3142 int p[2] = {p0, p1};
3143 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3146 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3147 const int b_stride= s->b_width << s->block_max_depth;
3148 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3149 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3150 int rd, index, value;
3152 assert(mb_x>=0 && mb_y>=0);
3153 assert(mb_x<b_stride);
3154 assert(((mb_x|mb_y)&1) == 0);
3156 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3157 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3158 if(s->me_cache[index] == value)
3160 s->me_cache[index]= value;
3165 block->type &= ~BLOCK_INTRA;
3166 block[1]= block[b_stride]= block[b_stride+1]= *block;
3168 rd= get_4block_rd(s, mb_x, mb_y, 0);
3175 block[0]= backup[0];
3176 block[1]= backup[1];
3177 block[b_stride]= backup[2];
3178 block[b_stride+1]= backup[3];
3183 static void iterative_me(SnowContext *s){
3184 int pass, mb_x, mb_y;
3185 const int b_width = s->b_width << s->block_max_depth;
3186 const int b_height= s->b_height << s->block_max_depth;
3187 const int b_stride= b_width;
3191 RangeCoder r = s->c;
3192 uint8_t state[sizeof(s->block_state)];
3193 memcpy(state, s->block_state, sizeof(s->block_state));
3194 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3195 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3196 encode_q_branch(s, 0, mb_x, mb_y);
3198 memcpy(s->block_state, state, sizeof(s->block_state));
3201 for(pass=0; pass<25; pass++){
3204 for(mb_y= 0; mb_y<b_height; mb_y++){
3205 for(mb_x= 0; mb_x<b_width; mb_x++){
3206 int dia_change, i, j, ref;
3207 int best_rd= INT_MAX, ref_rd;
3208 BlockNode backup, ref_b;
3209 const int index= mb_x + mb_y * b_stride;
3210 BlockNode *block= &s->block[index];
3211 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3212 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3213 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3214 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3215 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3216 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3217 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3218 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3219 const int b_w= (MB_SIZE >> s->block_max_depth);
3220 uint8_t obmc_edged[b_w*2][b_w*2];
3222 if(pass && (block->type & BLOCK_OPT))
3224 block->type |= BLOCK_OPT;
3228 if(!s->me_cache_generation)
3229 memset(s->me_cache, 0, sizeof(s->me_cache));
3230 s->me_cache_generation += 1<<22;
3235 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3237 for(y=0; y<b_w*2; y++)
3238 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3239 if(mb_x==b_stride-1)
3240 for(y=0; y<b_w*2; y++)
3241 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3243 for(x=0; x<b_w*2; x++)
3244 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3245 for(y=1; y<b_w; y++)
3246 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3248 if(mb_y==b_height-1){
3249 for(x=0; x<b_w*2; x++)
3250 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3251 for(y=b_w; y<b_w*2-1; y++)
3252 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3256 //skip stuff outside the picture
3257 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3259 uint8_t *src= s-> input_picture.data[0];
3260 uint8_t *dst= s->current_picture.data[0];
3261 const int stride= s->current_picture.linesize[0];
3262 const int block_w= MB_SIZE >> s->block_max_depth;
3263 const int sx= block_w*mb_x - block_w/2;
3264 const int sy= block_w*mb_y - block_w/2;
3265 const int w= s->plane[0].width;
3266 const int h= s->plane[0].height;
3270 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3271 for(y=h; y<sy+block_w*2; y++)
3272 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3274 for(y=sy; y<sy+block_w*2; y++)
3275 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3277 if(sx+block_w*2 > w){
3278 for(y=sy; y<sy+block_w*2; y++)
3279 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3283 // intra(black) = neighbors' contribution to the current block
3285 color[i]= get_dc(s, mb_x, mb_y, i);
3287 // get previous score (cant be cached due to OBMC)
3288 if(pass > 0 && (block->type&BLOCK_INTRA)){
3289 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3290 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3292 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3296 for(ref=0; ref < s->ref_frames; ref++){
3297 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3298 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3303 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3304 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3306 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3308 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3310 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3312 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3315 //FIXME avoid subpel interpol / round to nearest integer
3318 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3320 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3321 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3322 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3323 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3329 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3332 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3334 //FIXME or try the standard 2 pass qpel or similar
3336 mvr[0][0]= block->mx;
3337 mvr[0][1]= block->my;
3338 if(ref_rd > best_rd){
3346 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3347 //FIXME RD style color selection
3349 if(!same_block(block, &backup)){
3350 if(tb ) tb ->type &= ~BLOCK_OPT;
3351 if(lb ) lb ->type &= ~BLOCK_OPT;
3352 if(rb ) rb ->type &= ~BLOCK_OPT;
3353 if(bb ) bb ->type &= ~BLOCK_OPT;
3354 if(tlb) tlb->type &= ~BLOCK_OPT;
3355 if(trb) trb->type &= ~BLOCK_OPT;
3356 if(blb) blb->type &= ~BLOCK_OPT;
3357 if(brb) brb->type &= ~BLOCK_OPT;
3362 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3367 if(s->block_max_depth == 1){
3369 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3370 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3372 int best_rd, init_rd;
3373 const int index= mb_x + mb_y * b_stride;
3376 b[0]= &s->block[index];
3378 b[2]= b[0]+b_stride;
3380 if(same_block(b[0], b[1]) &&
3381 same_block(b[0], b[2]) &&
3382 same_block(b[0], b[3]))
3385 if(!s->me_cache_generation)
3386 memset(s->me_cache, 0, sizeof(s->me_cache));
3387 s->me_cache_generation += 1<<22;
3389 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3391 //FIXME more multiref search?
3392 check_4block_inter(s, mb_x, mb_y,
3393 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3394 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3397 if(!(b[i]->type&BLOCK_INTRA))
3398 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3400 if(init_rd != best_rd)
3404 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3408 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3409 const int level= b->level;
3410 const int w= b->width;
3411 const int h= b->height;
3412 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3413 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3414 int x,y, thres1, thres2;
3417 if(s->qlog == LOSSLESS_QLOG) return;
3419 bias= bias ? 0 : (3*qmul)>>3;
3420 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3426 int i= src[x + y*stride];
3428 if((unsigned)(i+thres1) > thres2){
3431 i/= qmul; //FIXME optimize
3432 src[x + y*stride]= i;
3436 i/= qmul; //FIXME optimize
3437 src[x + y*stride]= -i;
3440 src[x + y*stride]= 0;
3446 int i= src[x + y*stride];
3448 if((unsigned)(i+thres1) > thres2){
3451 i= (i + bias) / qmul; //FIXME optimize
3452 src[x + y*stride]= i;
3456 i= (i + bias) / qmul; //FIXME optimize
3457 src[x + y*stride]= -i;
3460 src[x + y*stride]= 0;
3464 if(level+1 == s->spatial_decomposition_count){
3465 // STOP_TIMER("quantize")
3469 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3470 const int w= b->width;
3471 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3472 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3473 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3477 if(s->qlog == LOSSLESS_QLOG) return;
3479 for(y=start_y; y<end_y; y++){
3480 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3481 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3485 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3487 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3491 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3492 STOP_TIMER("dquant")
3496 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3497 const int w= b->width;
3498 const int h= b->height;
3499 const int qlog= clip(s->qlog + b->qlog, 0, QROOT*16);
3500 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3501 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3505 if(s->qlog == LOSSLESS_QLOG) return;
3509 int i= src[x + y*stride];
3511 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3513 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3517 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3518 STOP_TIMER("dquant")
3522 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3523 const int w= b->width;
3524 const int h= b->height;
3527 for(y=h-1; y>=0; y--){
3528 for(x=w-1; x>=0; x--){
3529 int i= x + y*stride;
3533 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3534 else src[i] -= src[i - 1];
3536 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3537 else src[i] -= src[i - 1];
3540 if(y) src[i] -= src[i - stride];
3546 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3547 const int w= b->width;
3556 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3558 for(y=start_y; y<end_y; y++){
3560 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3561 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3565 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3566 else line[x] += line[x - 1];
3568 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3569 else line[x] += line[x - 1];
3572 if(y) line[x] += prev[x];
3577 // STOP_TIMER("correlate")
3580 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3581 const int w= b->width;
3582 const int h= b->height;
3587 int i= x + y*stride;
3591 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3592 else src[i] += src[i - 1];
3594 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3595 else src[i] += src[i - 1];
3598 if(y) src[i] += src[i - stride];
3604 static void encode_header(SnowContext *s){
3605 int plane_index, level, orientation;
3608 memset(kstate, MID_STATE, sizeof(kstate));
3610 put_rac(&s->c, kstate, s->keyframe);
3611 if(s->keyframe || s->always_reset){
3613 s->last_spatial_decomposition_type=
3617 s->last_block_max_depth= 0;
3620 put_symbol(&s->c, s->header_state, s->version, 0);
3621 put_rac(&s->c, s->header_state, s->always_reset);
3622 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3623 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3624 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3625 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3626 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3627 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3628 put_rac(&s->c, s->header_state, s->spatial_scalability);
3629 // put_rac(&s->c, s->header_state, s->rate_scalability);
3630 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3632 for(plane_index=0; plane_index<2; plane_index++){
3633 for(level=0; level<s->spatial_decomposition_count; level++){
3634 for(orientation=level ? 1:0; orientation<4; orientation++){
3635 if(orientation==2) continue;
3636 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3641 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3642 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3643 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3644 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3645 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3647 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3648 s->last_qlog = s->qlog;
3649 s->last_qbias = s->qbias;
3650 s->last_mv_scale = s->mv_scale;
3651 s->last_block_max_depth = s->block_max_depth;
3654 static int decode_header(SnowContext *s){
3655 int plane_index, level, orientation;
3658 memset(kstate, MID_STATE, sizeof(kstate));
3660 s->keyframe= get_rac(&s->c, kstate);
3661 if(s->keyframe || s->always_reset){
3663 s->spatial_decomposition_type=
3667 s->block_max_depth= 0;
3670 s->version= get_symbol(&s->c, s->header_state, 0);
3672 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3675 s->always_reset= get_rac(&s->c, s->header_state);
3676 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3677 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3678 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3679 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3680 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3681 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3682 s->spatial_scalability= get_rac(&s->c, s->header_state);
3683 // s->rate_scalability= get_rac(&s->c, s->header_state);
3684 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3686 for(plane_index=0; plane_index<3; plane_index++){
3687 for(level=0; level<s->spatial_decomposition_count; level++){
3688 for(orientation=level ? 1:0; orientation<4; orientation++){
3690 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3691 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3692 else q= get_symbol(&s->c, s->header_state, 1);
3693 s->plane[plane_index].band[level][orientation].qlog= q;
3699 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3700 if(s->spatial_decomposition_type > 2){
3701 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3705 s->qlog += get_symbol(&s->c, s->header_state, 1);
3706 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3707 s->qbias += get_symbol(&s->c, s->header_state, 1);
3708 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3709 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3710 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3711 s->block_max_depth= 0;
3718 static void init_qexp(void){
3722 for(i=0; i<QROOT; i++){
3724 v *= pow(2, 1.0 / QROOT);
3728 static int common_init(AVCodecContext *avctx){
3729 SnowContext *s = avctx->priv_data;
3731 int level, orientation, plane_index, dec;
3736 dsputil_init(&s->dsp, avctx);
3739 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3740 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3741 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3742 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3743 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3744 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3763 #define mcfh(dx,dy)\
3764 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3765 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3766 mc_block_hpel ## dx ## dy ## 16;\
3767 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3768 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3769 mc_block_hpel ## dx ## dy ## 8;
3779 dec= s->spatial_decomposition_count= 5;
3780 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3782 s->chroma_h_shift= 1; //FIXME XXX
3783 s->chroma_v_shift= 1;
3785 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3787 width= s->avctx->width;
3788 height= s->avctx->height;
3790 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3792 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3793 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3795 for(plane_index=0; plane_index<3; plane_index++){
3796 int w= s->avctx->width;
3797 int h= s->avctx->height;
3800 w>>= s->chroma_h_shift;
3801 h>>= s->chroma_v_shift;
3803 s->plane[plane_index].width = w;
3804 s->plane[plane_index].height= h;
3805 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3806 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3807 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3808 SubBand *b= &s->plane[plane_index].band[level][orientation];
3810 b->buf= s->spatial_dwt_buffer;
3812 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3813 b->width = (w + !(orientation&1))>>1;
3814 b->height= (h + !(orientation>1))>>1;
3816 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3817 b->buf_x_offset = 0;
3818 b->buf_y_offset = 0;
3822 b->buf_x_offset = (w+1)>>1;
3825 b->buf += b->stride>>1;
3826 b->buf_y_offset = b->stride_line >> 1;
3830 b->parent= &s->plane[plane_index].band[level-1][orientation];
3831 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3838 for(i=0; i<MAX_REF_FRAMES; i++)
3839 for(j=0; j<MAX_REF_FRAMES; j++)
3840 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3844 width= s->width= avctx->width;
3845 height= s->height= avctx->height;
3847 assert(width && height);
3849 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3854 static int qscale2qlog(int qscale){
3855 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3856 + 61*QROOT/8; //<64 >60
3859 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3861 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3862 * FIXME we know exact mv bits at this point,
3863 * but ratecontrol isn't set up to include them. */
3864 uint32_t coef_sum= 0;
3865 int level, orientation, delta_qlog;
3867 for(level=0; level<s->spatial_decomposition_count; level++){
3868 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3869 SubBand *b= &s->plane[0].band[level][orientation];
3870 DWTELEM *buf= b->buf;
3871 const int w= b->width;
3872 const int h= b->height;
3873 const int stride= b->stride;
3874 const int qlog= clip(2*QROOT + b->qlog, 0, QROOT*16);
3875 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3876 const int qdiv= (1<<16)/qmul;
3879 decorrelate(s, b, buf, stride, 1, 0);
3882 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3884 correlate(s, b, buf, stride, 1, 0);
3888 /* ugly, ratecontrol just takes a sqrt again */
3889 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3890 assert(coef_sum < INT_MAX);
3892 if(pict->pict_type == I_TYPE){
3893 s->m.current_picture.mb_var_sum= coef_sum;
3894 s->m.current_picture.mc_mb_var_sum= 0;
3896 s->m.current_picture.mc_mb_var_sum= coef_sum;
3897 s->m.current_picture.mb_var_sum= 0;
3900 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3901 if (pict->quality < 0)
3903 s->lambda= pict->quality * 3/2;
3904 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3905 s->qlog+= delta_qlog;
3909 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3910 int width = p->width;
3911 int height= p->height;
3912 int level, orientation, x, y;
3914 for(level=0; level<s->spatial_decomposition_count; level++){
3915 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3916 SubBand *b= &p->band[level][orientation];
3917 DWTELEM *buf= b->buf;
3920 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3921 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3922 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3923 for(y=0; y<height; y++){
3924 for(x=0; x<width; x++){
3925 int64_t d= s->spatial_dwt_buffer[x + y*width];
3930 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3931 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3936 static int encode_init(AVCodecContext *avctx)
3938 SnowContext *s = avctx->priv_data;
3941 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3942 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3943 "use vstrict=-2 / -strict -2 to use it anyway\n");
3947 if(avctx->prediction_method == DWT_97
3948 && (avctx->flags & CODEC_FLAG_QSCALE)
3949 && avctx->global_quality == 0){
3950 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3960 s->m.flags = avctx->flags;
3961 s->m.bit_rate= avctx->bit_rate;
3963 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3964 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3965 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3966 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3967 h263_encode_init(&s->m); //mv_penalty
3969 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3971 if(avctx->flags&CODEC_FLAG_PASS1){
3972 if(!avctx->stats_out)
3973 avctx->stats_out = av_mallocz(256);
3975 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3976 if(ff_rate_control_init(&s->m) < 0)
3979 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3981 for(plane_index=0; plane_index<3; plane_index++){
3982 calculate_vissual_weight(s, &s->plane[plane_index]);
3986 avctx->coded_frame= &s->current_picture;
3987 switch(avctx->pix_fmt){
3988 // case PIX_FMT_YUV444P:
3989 // case PIX_FMT_YUV422P:
3990 case PIX_FMT_YUV420P:
3992 // case PIX_FMT_YUV411P:
3993 // case PIX_FMT_YUV410P:
3994 s->colorspace_type= 0;
3996 /* case PIX_FMT_RGBA32:
4000 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4003 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4004 s->chroma_h_shift= 1;
4005 s->chroma_v_shift= 1;
4007 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4008 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4010 s->avctx->get_buffer(s->avctx, &s->input_picture);
4012 if(s->avctx->me_method == ME_ITER){
4014 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4015 for(i=0; i<s->max_ref_frames; i++){
4016 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4017 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4024 static int frame_start(SnowContext *s){
4026 int w= s->avctx->width; //FIXME round up to x16 ?
4027 int h= s->avctx->height;
4029 if(s->current_picture.data[0]){
4030 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4031 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4032 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4035 tmp= s->last_picture[s->max_ref_frames-1];
4036 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4037 s->last_picture[0]= s->current_picture;
4038 s->current_picture= tmp;
4044 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4045 if(i && s->last_picture[i-1].key_frame)
4050 s->current_picture.reference= 1;
4051 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4052 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4056 s->current_picture.key_frame= s->keyframe;
4061 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4062 SnowContext *s = avctx->priv_data;
4063 RangeCoder * const c= &s->c;
4064 AVFrame *pict = data;
4065 const int width= s->avctx->width;
4066 const int height= s->avctx->height;
4067 int level, orientation, plane_index, i, y;
4068 uint8_t rc_header_bak[sizeof(s->header_state)];
4069 uint8_t rc_block_bak[sizeof(s->block_state)];
4071 ff_init_range_encoder(c, buf, buf_size);
4072 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4076 for(y=0; y<(height>>shift); y++)
4077 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4078 &pict->data[i][y * pict->linesize[i]],
4081 s->new_picture = *pict;
4083 s->m.picture_number= avctx->frame_number;
4084 if(avctx->flags&CODEC_FLAG_PASS2){
4086 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4087 s->keyframe= pict->pict_type==FF_I_TYPE;
4088 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4089 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4090 if (pict->quality < 0)
4094 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4096 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4099 if(s->pass1_rc && avctx->frame_number == 0)
4100 pict->quality= 2*FF_QP2LAMBDA;
4102 s->qlog= qscale2qlog(pict->quality);
4103 s->lambda = pict->quality * 3/2;
4105 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4106 s->qlog= LOSSLESS_QLOG;
4108 }//else keep previous frame's qlog until after motion est
4112 s->m.current_picture_ptr= &s->m.current_picture;
4113 if(pict->pict_type == P_TYPE){
4114 int block_width = (width +15)>>4;
4115 int block_height= (height+15)>>4;
4116 int stride= s->current_picture.linesize[0];
4118 assert(s->current_picture.data[0]);
4119 assert(s->last_picture[0].data[0]);
4121 s->m.avctx= s->avctx;
4122 s->m.current_picture.data[0]= s->current_picture.data[0];
4123 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4124 s->m. new_picture.data[0]= s-> input_picture.data[0];
4125 s->m. last_picture_ptr= &s->m. last_picture;
4127 s->m. last_picture.linesize[0]=
4128 s->m. new_picture.linesize[0]=
4129 s->m.current_picture.linesize[0]= stride;
4130 s->m.uvlinesize= s->current_picture.linesize[1];
4132 s->m.height= height;
4133 s->m.mb_width = block_width;
4134 s->m.mb_height= block_height;
4135 s->m.mb_stride= s->m.mb_width+1;
4136 s->m.b8_stride= 2*s->m.mb_width+1;
4138 s->m.pict_type= pict->pict_type;
4139 s->m.me_method= s->avctx->me_method;
4140 s->m.me.scene_change_score=0;
4141 s->m.flags= s->avctx->flags;
4142 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4143 s->m.out_format= FMT_H263;
4144 s->m.unrestricted_mv= 1;
4146 s->m.lambda = s->lambda;
4147 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4148 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4150 s->m.dsp= s->dsp; //move
4156 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4157 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4162 s->m.pict_type = pict->pict_type;
4163 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4166 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4167 encode_blocks(s, 1);
4168 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4170 for(plane_index=0; plane_index<3; plane_index++){
4171 Plane *p= &s->plane[plane_index];
4175 // int bits= put_bits_count(&s->c.pb);
4177 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4179 if(pict->data[plane_index]) //FIXME gray hack
4182 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4185 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4188 && pict->pict_type == P_TYPE
4189 && !(avctx->flags&CODEC_FLAG_PASS2)
4190 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4191 ff_init_range_encoder(c, buf, buf_size);
4192 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4193 pict->pict_type= FF_I_TYPE;
4195 s->current_picture.key_frame=1;
4199 if(s->qlog == LOSSLESS_QLOG){
4202 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4207 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4209 if(s->pass1_rc && plane_index==0){
4210 int delta_qlog = ratecontrol_1pass(s, pict);
4211 if (delta_qlog <= INT_MIN)
4214 //reordering qlog in the bitstream would eliminate this reset
4215 ff_init_range_encoder(c, buf, buf_size);
4216 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4217 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4219 encode_blocks(s, 0);
4223 for(level=0; level<s->spatial_decomposition_count; level++){
4224 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4225 SubBand *b= &p->band[level][orientation];
4227 quantize(s, b, b->buf, b->stride, s->qbias);
4229 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4230 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4231 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4233 correlate(s, b, b->buf, b->stride, 1, 0);
4236 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4238 for(level=0; level<s->spatial_decomposition_count; level++){
4239 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4240 SubBand *b= &p->band[level][orientation];
4242 dequantize(s, b, b->buf, b->stride);
4246 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4247 if(s->qlog == LOSSLESS_QLOG){
4250 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4255 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4256 STOP_TIMER("pred-conv")}
4259 if(pict->pict_type == I_TYPE){
4262 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4263 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4267 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4268 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4271 if(s->avctx->flags&CODEC_FLAG_PSNR){
4274 if(pict->data[plane_index]) //FIXME gray hack
4277 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4281 s->avctx->error[plane_index] += error;
4282 s->current_picture.error[plane_index] = error;
4286 if(s->last_picture[s->max_ref_frames-1].data[0])
4287 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4289 s->current_picture.coded_picture_number = avctx->frame_number;
4290 s->current_picture.pict_type = pict->pict_type;
4291 s->current_picture.quality = pict->quality;
4292 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4293 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4294 s->m.current_picture.display_picture_number =
4295 s->m.current_picture.coded_picture_number = avctx->frame_number;
4296 s->m.current_picture.quality = pict->quality;
4297 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4299 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4301 if(avctx->flags&CODEC_FLAG_PASS1)
4302 ff_write_pass1_stats(&s->m);
4303 s->m.last_pict_type = s->m.pict_type;
4304 avctx->frame_bits = s->m.frame_bits;
4305 avctx->mv_bits = s->m.mv_bits;
4306 avctx->misc_bits = s->m.misc_bits;
4307 avctx->p_tex_bits = s->m.p_tex_bits;
4311 return ff_rac_terminate(c);
4314 static void common_end(SnowContext *s){
4315 int plane_index, level, orientation, i;
4317 av_freep(&s->spatial_dwt_buffer);
4319 av_freep(&s->m.me.scratchpad);
4320 av_freep(&s->m.me.map);
4321 av_freep(&s->m.me.score_map);
4322 av_freep(&s->m.obmc_scratchpad);
4324 av_freep(&s->block);
4326 for(i=0; i<MAX_REF_FRAMES; i++){
4327 av_freep(&s->ref_mvs[i]);
4328 av_freep(&s->ref_scores[i]);
4329 if(s->last_picture[i].data[0])
4330 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4333 for(plane_index=0; plane_index<3; plane_index++){
4334 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4335 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4336 SubBand *b= &s->plane[plane_index].band[level][orientation];
4338 av_freep(&b->x_coeff);
4344 static int encode_end(AVCodecContext *avctx)
4346 SnowContext *s = avctx->priv_data;
4349 av_free(avctx->stats_out);
4354 static int decode_init(AVCodecContext *avctx)
4356 SnowContext *s = avctx->priv_data;
4359 avctx->pix_fmt= PIX_FMT_YUV420P;
4363 block_size = MB_SIZE >> s->block_max_depth;
4364 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4369 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4370 SnowContext *s = avctx->priv_data;
4371 RangeCoder * const c= &s->c;
4373 AVFrame *picture = data;
4374 int level, orientation, plane_index;
4376 ff_init_range_decoder(c, buf, buf_size);
4377 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4379 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4381 if(!s->block) alloc_blocks(s);
4384 //keyframe flag dupliaction mess FIXME
4385 if(avctx->debug&FF_DEBUG_PICT_INFO)
4386 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4390 for(plane_index=0; plane_index<3; plane_index++){
4391 Plane *p= &s->plane[plane_index];
4395 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4397 if(s->avctx->debug&2048){
4398 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4399 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4403 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4404 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4410 for(level=0; level<s->spatial_decomposition_count; level++){
4411 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4412 SubBand *b= &p->band[level][orientation];
4413 unpack_coeffs(s, b, b->parent, orientation);
4416 STOP_TIMER("unpack coeffs");
4420 const int mb_h= s->b_height << s->block_max_depth;
4421 const int block_size = MB_SIZE >> s->block_max_depth;
4422 const int block_w = plane_index ? block_size/2 : block_size;
4424 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4429 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4430 for(mb_y=0; mb_y<=mb_h; mb_y++){
4432 int slice_starty = block_w*mb_y;
4433 int slice_h = block_w*(mb_y+1);
4434 if (!(s->keyframe || s->avctx->debug&512)){
4435 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4436 slice_h -= (block_w >> 1);
4441 for(level=0; level<s->spatial_decomposition_count; level++){
4442 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4443 SubBand *b= &p->band[level][orientation];
4446 int our_mb_start = mb_y;
4447 int our_mb_end = (mb_y + 1);
4449 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4450 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4451 if (!(s->keyframe || s->avctx->debug&512)){
4452 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4453 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4455 start_y = FFMIN(b->height, start_y);
4456 end_y = FFMIN(b->height, end_y);
4458 if (start_y != end_y){
4459 if (orientation == 0){
4460 SubBand * correlate_band = &p->band[0][0];
4461 int correlate_end_y = FFMIN(b->height, end_y + 1);
4462 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4463 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4464 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4465 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4468 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4472 STOP_TIMER("decode_subband_slice");
4476 for(; yd<slice_h; yd+=4){
4477 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4479 STOP_TIMER("idwt slice");}
4482 if(s->qlog == LOSSLESS_QLOG){
4483 for(; yq<slice_h && yq<h; yq++){
4484 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4486 line[x] <<= FRAC_BITS;
4491 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4493 y = FFMIN(p->height, slice_starty);
4494 end_y = FFMIN(p->height, slice_h);
4496 slice_buffer_release(&s->sb, y++);
4499 slice_buffer_flush(&s->sb);
4501 STOP_TIMER("idwt + predict_slices")}
4506 if(s->last_picture[s->max_ref_frames-1].data[0])
4507 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4509 if(!(s->avctx->debug&2048))
4510 *picture= s->current_picture;
4512 *picture= s->mconly_picture;
4514 *data_size = sizeof(AVFrame);
4516 bytes_read= c->bytestream - c->bytestream_start;
4517 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4522 static int decode_end(AVCodecContext *avctx)
4524 SnowContext *s = avctx->priv_data;
4526 slice_buffer_destroy(&s->sb);
4533 AVCodec snow_decoder = {
4537 sizeof(SnowContext),
4542 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4546 #ifdef CONFIG_ENCODERS
4547 AVCodec snow_encoder = {
4551 sizeof(SnowContext),
4567 int buffer[2][width*height];
4570 s.spatial_decomposition_count=6;
4571 s.spatial_decomposition_type=1;
4573 printf("testing 5/3 DWT\n");
4574 for(i=0; i<width*height; i++)
4575 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4577 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4578 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4580 for(i=0; i<width*height; i++)
4581 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4583 printf("testing 9/7 DWT\n");
4584 s.spatial_decomposition_type=0;
4585 for(i=0; i<width*height; i++)
4586 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4588 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4589 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4591 for(i=0; i<width*height; i++)
4592 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4595 printf("testing AC coder\n");
4596 memset(s.header_state, 0, sizeof(s.header_state));
4597 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4598 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4600 for(i=-256; i<256; i++){
4602 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4603 STOP_TIMER("put_symbol")
4605 ff_rac_terminate(&s.c);
4607 memset(s.header_state, 0, sizeof(s.header_state));
4608 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4609 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4611 for(i=-256; i<256; i++){
4614 j= get_symbol(&s.c, s.header_state, 1);
4615 STOP_TIMER("get_symbol")
4616 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4620 int level, orientation, x, y;
4621 int64_t errors[8][4];
4624 memset(errors, 0, sizeof(errors));
4625 s.spatial_decomposition_count=3;
4626 s.spatial_decomposition_type=0;
4627 for(level=0; level<s.spatial_decomposition_count; level++){
4628 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4629 int w= width >> (s.spatial_decomposition_count-level);
4630 int h= height >> (s.spatial_decomposition_count-level);
4631 int stride= width << (s.spatial_decomposition_count-level);
4632 DWTELEM *buf= buffer[0];
4635 if(orientation&1) buf+=w;
4636 if(orientation>1) buf+=stride>>1;
4638 memset(buffer[0], 0, sizeof(int)*width*height);
4639 buf[w/2 + h/2*stride]= 256*256;
4640 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4641 for(y=0; y<height; y++){
4642 for(x=0; x<width; x++){
4643 int64_t d= buffer[0][x + y*width];
4645 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4647 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4649 error= (int)(sqrt(error)+0.5);
4650 errors[level][orientation]= error;
4651 if(g) g=ff_gcd(g, error);
4655 printf("static int const visual_weight[][4]={\n");
4656 for(level=0; level<s.spatial_decomposition_count; level++){
4658 for(orientation=0; orientation<4; orientation++){
4659 printf("%8"PRId64",", errors[level][orientation]/g);
4667 int w= width >> (s.spatial_decomposition_count-level);
4668 int h= height >> (s.spatial_decomposition_count-level);
4669 int stride= width << (s.spatial_decomposition_count-level);
4670 DWTELEM *buf= buffer[0];
4676 memset(buffer[0], 0, sizeof(int)*width*height);
4678 for(y=0; y<height; y++){
4679 for(x=0; x<width; x++){
4680 int tab[4]={0,2,3,1};
4681 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4684 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4688 buf[x + y*stride ]=169;
4689 buf[x + y*stride-w]=64;
4692 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4694 for(y=0; y<height; y++){
4695 for(x=0; x<width; x++){
4696 int64_t d= buffer[0][x + y*width];
4698 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4700 if(FFABS(height/2-y)<9) printf("\n");