2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
397 typedef struct x_and_coeff{
402 typedef struct SubBand{
407 int qlog; ///< log(qscale)/log[2^(1/6)]
411 int stride_line; ///< Stride measured in lines, not pixels.
412 x_and_coeff * x_coeff;
413 struct SubBand *parent;
414 uint8_t state[/*7*2*/ 7 + 512][32];
417 typedef struct Plane{
420 SubBand band[MAX_DECOMPOSITIONS][4];
423 typedef struct SnowContext{
424 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
426 AVCodecContext *avctx;
430 AVFrame input_picture; ///< new_picture with the internal linesizes
431 AVFrame current_picture;
432 AVFrame last_picture[MAX_REF_FRAMES];
433 AVFrame mconly_picture;
434 // uint8_t q_context[16];
435 uint8_t header_state[32];
436 uint8_t block_state[128 + 32*128];
440 int spatial_decomposition_type;
441 int last_spatial_decomposition_type;
442 int temporal_decomposition_type;
443 int spatial_decomposition_count;
444 int temporal_decomposition_count;
447 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
448 uint32_t *ref_scores[MAX_REF_FRAMES];
449 DWTELEM *spatial_dwt_buffer;
453 int spatial_scalability;
463 #define QBIAS_SHIFT 3
467 int last_block_max_depth;
468 Plane plane[MAX_PLANES];
470 #define ME_CACHE_SIZE 1024
471 int me_cache[ME_CACHE_SIZE];
472 int me_cache_generation;
475 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
486 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
487 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
489 static void iterative_me(SnowContext *s);
491 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
495 buf->base_buffer = base_buffer;
496 buf->line_count = line_count;
497 buf->line_width = line_width;
498 buf->data_count = max_allocated_lines;
499 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
500 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
502 for (i = 0; i < max_allocated_lines; i++)
504 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
507 buf->data_stack_top = max_allocated_lines - 1;
510 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
515 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
517 assert(buf->data_stack_top >= 0);
518 // assert(!buf->line[line]);
520 return buf->line[line];
522 offset = buf->line_width * line;
523 buffer = buf->data_stack[buf->data_stack_top];
524 buf->data_stack_top--;
525 buf->line[line] = buffer;
527 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
532 static void slice_buffer_release(slice_buffer * buf, int line)
537 assert(line >= 0 && line < buf->line_count);
538 assert(buf->line[line]);
540 offset = buf->line_width * line;
541 buffer = buf->line[line];
542 buf->data_stack_top++;
543 buf->data_stack[buf->data_stack_top] = buffer;
544 buf->line[line] = NULL;
546 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
549 static void slice_buffer_flush(slice_buffer * buf)
552 for (i = 0; i < buf->line_count; i++)
556 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
557 slice_buffer_release(buf, i);
562 static void slice_buffer_destroy(slice_buffer * buf)
565 slice_buffer_flush(buf);
567 for (i = buf->data_count - 1; i >= 0; i--)
569 assert(buf->data_stack[i]);
570 av_freep(&buf->data_stack[i]);
572 assert(buf->data_stack);
573 av_freep(&buf->data_stack);
575 av_freep(&buf->line);
579 // Avoid a name clash on SGI IRIX
582 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
583 static uint8_t qexp[QROOT];
585 static inline int mirror(int v, int m){
586 while((unsigned)v > (unsigned)m){
593 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
597 const int a= FFABS(v);
598 const int e= av_log2(a);
600 const int el= FFMIN(e, 10);
601 put_rac(c, state+0, 0);
604 put_rac(c, state+1+i, 1); //1..10
607 put_rac(c, state+1+9, 1); //1..10
609 put_rac(c, state+1+FFMIN(i,9), 0);
611 for(i=e-1; i>=el; i--){
612 put_rac(c, state+22+9, (a>>i)&1); //22..31
615 put_rac(c, state+22+i, (a>>i)&1); //22..31
619 put_rac(c, state+11 + el, v < 0); //11..21
622 put_rac(c, state+0, 0);
625 put_rac(c, state+1+i, 1); //1..10
627 put_rac(c, state+1+i, 0);
629 for(i=e-1; i>=0; i--){
630 put_rac(c, state+22+i, (a>>i)&1); //22..31
634 put_rac(c, state+11 + e, v < 0); //11..21
637 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
639 put_rac(c, state+1+FFMIN(i,9), 0);
641 for(i=e-1; i>=0; i--){
642 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
646 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
650 put_rac(c, state+0, 1);
654 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
655 if(get_rac(c, state+0))
660 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
665 for(i=e-1; i>=0; i--){
666 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
669 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
676 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
678 int r= log2>=0 ? 1<<log2 : 1;
684 put_rac(c, state+4+log2, 1);
689 put_rac(c, state+4+log2, 0);
691 for(i=log2-1; i>=0; i--){
692 put_rac(c, state+31-i, (v>>i)&1);
696 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
698 int r= log2>=0 ? 1<<log2 : 1;
703 while(get_rac(c, state+4+log2)){
709 for(i=log2-1; i>=0; i--){
710 v+= get_rac(c, state+31-i)<<i;
716 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
717 const int mirror_left= !highpass;
718 const int mirror_right= (width&1) ^ highpass;
719 const int w= (width>>1) - 1 + (highpass & width);
722 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
724 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
730 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
734 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
739 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
740 const int mirror_left= !highpass;
741 const int mirror_right= (width&1) ^ highpass;
742 const int w= (width>>1) - 1 + (highpass & width);
749 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
755 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
758 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
762 int r= 3*2*ref[w*ref_step];
765 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
771 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
772 const int mirror_left= !highpass;
773 const int mirror_right= (width&1) ^ highpass;
774 const int w= (width>>1) - 1 + (highpass & width);
778 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
780 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
786 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
790 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
796 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
799 for(x=start; x<width; x+=2){
803 int x2= x + 2*i - n + 1;
805 else if(x2>=width) x2= 2*width-x2-2;
806 sum += coeffs[i]*(int64_t)dst[x2];
808 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
809 else dst[x] += (sum + (1<<shift)/2)>>shift;
813 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
815 for(y=start; y<height; y+=2){
816 for(x=0; x<width; x++){
820 int y2= y + 2*i - n + 1;
822 else if(y2>=height) y2= 2*height-y2-2;
823 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
825 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
826 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
835 #if 0 // more accurate 9/7
838 #define COEFFS1 (int[]){-25987,-25987}
841 #define COEFFS2 (int[]){-27777,-27777}
844 #define COEFFS3 (int[]){28931,28931}
847 #define COEFFS4 (int[]){14533,14533}
851 #define COEFFS1 (int[]){1,-9,-9,1}
854 #define COEFFS2 (int[]){-1,5,5,-1}
867 #define COEFFS1 (int[]){1,1}
870 #define COEFFS2 (int[]){-1,-1}
883 #define COEFFS2 (int[]){-1,-1}
886 #define COEFFS3 (int[]){-1,-1}
889 #define COEFFS4 (int[]){-5,29,29,-5}
894 #define COEFFS1 (int[]){-203,-203}
897 #define COEFFS2 (int[]){-217,-217}
900 #define COEFFS3 (int[]){113,113}
903 #define COEFFS4 (int[]){227,227}
911 #define COEFFS2 (int[]){-1,-1}
914 #define COEFFS3 (int[]){-1,-1}
917 #define COEFFS4 (int[]){3,3}
921 #define COEFFS1 (int[]){1,-9,-9,1}
924 #define COEFFS2 (int[]){1,1}
934 #define COEFFS1 (int[]){1,-9,-9,1}
937 #define COEFFS2 (int[]){-1,5,5,-1}
945 static void horizontal_decomposeX(DWTELEM *b, int width){
947 const int width2= width>>1;
948 const int w2= (width+1)>>1;
951 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
952 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
953 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
954 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
956 for(x=0; x<width2; x++){
958 temp[x+w2]= b[2*x + 1];
962 memcpy(b, temp, width*sizeof(int));
965 static void horizontal_composeX(DWTELEM *b, int width){
967 const int width2= width>>1;
969 const int w2= (width+1)>>1;
971 memcpy(temp, b, width*sizeof(int));
972 for(x=0; x<width2; x++){
974 b[2*x + 1]= temp[x+w2];
979 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
980 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
981 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
982 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
985 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
988 for(y=0; y<height; y++){
989 for(x=0; x<width; x++){
990 buffer[y*stride + x] *= SCALEX;
994 for(y=0; y<height; y++){
995 horizontal_decomposeX(buffer + y*stride, width);
998 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
999 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1004 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1007 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1008 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1012 for(y=0; y<height; y++){
1013 horizontal_composeX(buffer + y*stride, width);
1016 for(y=0; y<height; y++){
1017 for(x=0; x<width; x++){
1018 buffer[y*stride + x] /= SCALEX;
1023 static void horizontal_decompose53i(DWTELEM *b, int width){
1024 DWTELEM temp[width];
1025 const int width2= width>>1;
1027 const int w2= (width+1)>>1;
1029 for(x=0; x<width2; x++){
1031 temp[x+w2]= b[2*x + 1];
1045 for(x=1; x+1<width2; x+=2){
1049 A2 += (A1 + A3 + 2)>>2;
1053 A1= temp[x+1+width2];
1056 A4 += (A1 + A3 + 2)>>2;
1062 A2 += (A1 + A3 + 2)>>2;
1067 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1068 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1072 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1075 for(i=0; i<width; i++){
1076 b1[i] -= (b0[i] + b2[i])>>1;
1080 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1083 for(i=0; i<width; i++){
1084 b1[i] += (b0[i] + b2[i] + 2)>>2;
1088 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1090 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1091 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1093 for(y=-2; y<height; y+=2){
1094 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1095 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1098 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1099 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1100 STOP_TIMER("horizontal_decompose53i")}
1103 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1104 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1105 STOP_TIMER("vertical_decompose53i*")}
1112 static void horizontal_decompose97i(DWTELEM *b, int width){
1113 DWTELEM temp[width];
1114 const int w2= (width+1)>>1;
1116 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1117 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1118 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1119 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1123 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1126 for(i=0; i<width; i++){
1127 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1131 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1134 for(i=0; i<width; i++){
1136 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1138 int r= 3*(b0[i] + b2[i]);
1141 b1[i] += (r+W_CO)>>W_CS;
1146 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1149 for(i=0; i<width; i++){
1151 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1153 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1158 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1161 for(i=0; i<width; i++){
1162 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1166 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1168 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1169 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1170 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1171 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1173 for(y=-4; y<height; y+=2){
1174 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1175 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1178 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1179 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1181 STOP_TIMER("horizontal_decompose97i")
1185 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1186 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1187 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1188 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1191 STOP_TIMER("vertical_decompose97i")
1201 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1204 for(level=0; level<decomposition_count; level++){
1206 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1207 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1213 static void horizontal_compose53i(DWTELEM *b, int width){
1214 DWTELEM temp[width];
1215 const int width2= width>>1;
1216 const int w2= (width+1)>>1;
1228 for(x=1; x+1<width2; x+=2){
1232 A2 += (A1 + A3 + 2)>>2;
1236 A1= temp[x+1+width2];
1239 A4 += (A1 + A3 + 2)>>2;
1245 A2 += (A1 + A3 + 2)>>2;
1249 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1250 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1252 for(x=0; x<width2; x++){
1254 b[2*x + 1]= temp[x+w2];
1260 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1263 for(i=0; i<width; i++){
1264 b1[i] += (b0[i] + b2[i])>>1;
1268 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1271 for(i=0; i<width; i++){
1272 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1276 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1277 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1278 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1282 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1283 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1284 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1288 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1291 DWTELEM *b0= cs->b0;
1292 DWTELEM *b1= cs->b1;
1293 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1294 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1297 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1298 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1299 STOP_TIMER("vertical_compose53i*")}
1302 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1303 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1304 STOP_TIMER("horizontal_compose53i")}
1311 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1313 DWTELEM *b0= cs->b0;
1314 DWTELEM *b1= cs->b1;
1315 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1316 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1319 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1320 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1321 STOP_TIMER("vertical_compose53i*")}
1324 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1325 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1326 STOP_TIMER("horizontal_compose53i")}
1333 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1335 spatial_compose53i_init(&cs, buffer, height, stride);
1336 while(cs.y <= height)
1337 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1341 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1342 DWTELEM temp[width];
1343 const int w2= (width+1)>>1;
1345 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1346 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1347 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1348 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1351 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1354 for(i=0; i<width; i++){
1355 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1359 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1362 for(i=0; i<width; i++){
1364 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1366 int r= 3*(b0[i] + b2[i]);
1369 b1[i] -= (r+W_CO)>>W_CS;
1374 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1377 for(i=0; i<width; i++){
1379 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1381 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1386 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1389 for(i=0; i<width; i++){
1390 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1394 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1397 for(i=0; i<width; i++){
1401 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1403 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1405 r= 3*(b2[i] + b4[i]);
1408 b3[i] -= (r+W_CO)>>W_CS;
1411 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1413 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1415 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1419 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1420 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1421 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1422 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1423 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1427 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1428 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1429 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1430 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1431 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1435 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1438 DWTELEM *b0= cs->b0;
1439 DWTELEM *b1= cs->b1;
1440 DWTELEM *b2= cs->b2;
1441 DWTELEM *b3= cs->b3;
1442 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1443 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1446 if(y>0 && y+4<height){
1447 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1449 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1450 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1451 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1452 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1455 STOP_TIMER("vertical_compose97i")}}
1458 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1459 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1460 if(width>400 && y+0<(unsigned)height){
1461 STOP_TIMER("horizontal_compose97i")}}
1470 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1472 DWTELEM *b0= cs->b0;
1473 DWTELEM *b1= cs->b1;
1474 DWTELEM *b2= cs->b2;
1475 DWTELEM *b3= cs->b3;
1476 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1477 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1480 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1481 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1482 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1483 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1485 STOP_TIMER("vertical_compose97i")}}
1488 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1489 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1490 if(width>400 && b0 <= b2){
1491 STOP_TIMER("horizontal_compose97i")}}
1500 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1502 spatial_compose97i_init(&cs, buffer, height, stride);
1503 while(cs.y <= height)
1504 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1507 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1509 for(level=decomposition_count-1; level>=0; level--){
1511 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1512 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 /* not slicified yet */
1514 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1515 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1520 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1522 for(level=decomposition_count-1; level>=0; level--){
1524 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1525 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 /* not slicified yet */
1527 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1532 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1533 const int support = type==1 ? 3 : 5;
1537 for(level=decomposition_count-1; level>=0; level--){
1538 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1540 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1542 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1550 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1551 const int support = type==1 ? 3 : 5;
1555 for(level=decomposition_count-1; level>=0; level--){
1556 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1558 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1560 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1568 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1571 for(level=decomposition_count-1; level>=0; level--)
1572 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1574 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1576 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1577 for(y=0; y<height; y+=4)
1578 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1582 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1583 const int w= b->width;
1584 const int h= b->height;
1596 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1597 v= src[x + y*stride];
1600 t= src[x + (y-1)*stride];
1602 lt= src[x - 1 + (y-1)*stride];
1605 rt= src[x + 1 + (y-1)*stride];
1609 l= src[x - 1 + y*stride];
1611 if(orientation==1) ll= src[y + (x-2)*stride];
1612 else ll= src[x - 2 + y*stride];
1618 if(px<b->parent->width && py<b->parent->height)
1619 p= parent[px + py*2*stride];
1621 if(!(/*ll|*/l|lt|t|rt|p)){
1623 runs[run_index++]= run;
1631 max_index= run_index;
1632 runs[run_index++]= run;
1634 run= runs[run_index++];
1636 put_symbol2(&s->c, b->state[30], max_index, 0);
1637 if(run_index <= max_index)
1638 put_symbol2(&s->c, b->state[1], run, 3);
1641 if(s->c.bytestream_end - s->c.bytestream < w*40){
1642 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1647 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1648 v= src[x + y*stride];
1651 t= src[x + (y-1)*stride];
1653 lt= src[x - 1 + (y-1)*stride];
1656 rt= src[x + 1 + (y-1)*stride];
1660 l= src[x - 1 + y*stride];
1662 if(orientation==1) ll= src[y + (x-2)*stride];
1663 else ll= src[x - 2 + y*stride];
1669 if(px<b->parent->width && py<b->parent->height)
1670 p= parent[px + py*2*stride];
1672 if(/*ll|*/l|lt|t|rt|p){
1673 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1675 put_rac(&s->c, &b->state[0][context], !!v);
1678 run= runs[run_index++];
1680 if(run_index <= max_index)
1681 put_symbol2(&s->c, b->state[1], run, 3);
1689 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1690 int l2= 2*FFABS(l) + (l<0);
1691 int t2= 2*FFABS(t) + (t<0);
1693 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1694 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1702 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1703 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1704 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1705 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1706 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1709 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1710 const int w= b->width;
1711 const int h= b->height;
1716 x_and_coeff *xc= b->x_coeff;
1717 x_and_coeff *prev_xc= NULL;
1718 x_and_coeff *prev2_xc= xc;
1719 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1720 x_and_coeff *prev_parent_xc= parent_xc;
1722 runs= get_symbol2(&s->c, b->state[30], 0);
1723 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1728 int lt=0, t=0, rt=0;
1730 if(y && prev_xc->x == 0){
1742 if(prev_xc->x == x + 1)
1748 if(x>>1 > parent_xc->x){
1751 if(x>>1 == parent_xc->x){
1752 p= parent_xc->coeff;
1755 if(/*ll|*/l|lt|t|rt|p){
1756 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1758 v=get_rac(&s->c, &b->state[0][context]);
1760 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1761 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1768 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1770 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1771 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1780 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1781 else max_run= FFMIN(run, w-x-1);
1783 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1789 (xc++)->x= w+1; //end marker
1795 while(parent_xc->x != parent->width+1)
1798 prev_parent_xc= parent_xc;
1800 parent_xc= prev_parent_xc;
1805 (xc++)->x= w+1; //end marker
1809 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1810 const int w= b->width;
1812 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1813 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1814 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1819 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1824 /* If we are on the second or later slice, restore our index. */
1826 new_index = save_state[0];
1829 for(y=start_y; y<h; y++){
1832 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1833 memset(line, 0, b->width*sizeof(DWTELEM));
1834 v = b->x_coeff[new_index].coeff;
1835 x = b->x_coeff[new_index++].x;
1838 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1839 register int u= -(v&1);
1840 line[x] = (t^u) - u;
1842 v = b->x_coeff[new_index].coeff;
1843 x = b->x_coeff[new_index++].x;
1846 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1847 STOP_TIMER("decode_subband")
1850 /* Save our variables for the next slice. */
1851 save_state[0] = new_index;
1856 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1857 int plane_index, level, orientation;
1859 for(plane_index=0; plane_index<3; plane_index++){
1860 for(level=0; level<s->spatial_decomposition_count; level++){
1861 for(orientation=level ? 1:0; orientation<4; orientation++){
1862 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1866 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1867 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1870 static int alloc_blocks(SnowContext *s){
1871 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1872 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1877 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1881 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1882 uint8_t *bytestream= d->bytestream;
1883 uint8_t *bytestream_start= d->bytestream_start;
1885 d->bytestream= bytestream;
1886 d->bytestream_start= bytestream_start;
1889 //near copy & paste from dsputil, FIXME
1890 static int pix_sum(uint8_t * pix, int line_size, int w)
1895 for (i = 0; i < w; i++) {
1896 for (j = 0; j < w; j++) {
1900 pix += line_size - w;
1905 //near copy & paste from dsputil, FIXME
1906 static int pix_norm1(uint8_t * pix, int line_size, int w)
1909 uint32_t *sq = ff_squareTbl + 256;
1912 for (i = 0; i < w; i++) {
1913 for (j = 0; j < w; j ++) {
1917 pix += line_size - w;
1922 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1923 const int w= s->b_width << s->block_max_depth;
1924 const int rem_depth= s->block_max_depth - level;
1925 const int index= (x + y*w) << rem_depth;
1926 const int block_w= 1<<rem_depth;
1939 for(j=0; j<block_w; j++){
1940 for(i=0; i<block_w; i++){
1941 s->block[index + i + j*w]= block;
1946 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1947 const int offset[3]= {
1949 ((y*c->uvstride + x)>>1),
1950 ((y*c->uvstride + x)>>1),
1954 c->src[0][i]= src [i];
1955 c->ref[0][i]= ref [i] + offset[i];
1960 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1961 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1962 if(s->ref_frames == 1){
1963 *mx = mid_pred(left->mx, top->mx, tr->mx);
1964 *my = mid_pred(left->my, top->my, tr->my);
1966 const int *scale = scale_mv_ref[ref];
1967 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1968 (top ->mx * scale[top ->ref] + 128) >>8,
1969 (tr ->mx * scale[tr ->ref] + 128) >>8);
1970 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1971 (top ->my * scale[top ->ref] + 128) >>8,
1972 (tr ->my * scale[tr ->ref] + 128) >>8);
1979 #define P_TOPRIGHT P[3]
1980 #define P_MEDIAN P[4]
1982 #define FLAG_QPEL 1 //must be 1
1984 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1985 uint8_t p_buffer[1024];
1986 uint8_t i_buffer[1024];
1987 uint8_t p_state[sizeof(s->block_state)];
1988 uint8_t i_state[sizeof(s->block_state)];
1990 uint8_t *pbbak= s->c.bytestream;
1991 uint8_t *pbbak_start= s->c.bytestream_start;
1992 int score, score2, iscore, i_len, p_len, block_s, sum;
1993 const int w= s->b_width << s->block_max_depth;
1994 const int h= s->b_height << s->block_max_depth;
1995 const int rem_depth= s->block_max_depth - level;
1996 const int index= (x + y*w) << rem_depth;
1997 const int block_w= 1<<(LOG2_MB_SIZE - level);
1998 int trx= (x+1)<<rem_depth;
1999 int try= (y+1)<<rem_depth;
2000 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2001 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2002 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2003 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2004 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2005 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2006 int pl = left->color[0];
2007 int pcb= left->color[1];
2008 int pcr= left->color[2];
2012 const int stride= s->current_picture.linesize[0];
2013 const int uvstride= s->current_picture.linesize[1];
2014 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2015 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2016 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2018 int16_t last_mv[3][2];
2019 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2020 const int shift= 1+qpel;
2021 MotionEstContext *c= &s->m.me;
2022 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2023 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2024 int my_context= av_log2(2*FFABS(left->my - top->my));
2025 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2026 int ref, best_ref, ref_score, ref_mx, ref_my;
2028 assert(sizeof(s->block_state) >= 256);
2030 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2034 // clip predictors / edge ?
2036 P_LEFT[0]= left->mx;
2037 P_LEFT[1]= left->my;
2040 P_TOPRIGHT[0]= tr->mx;
2041 P_TOPRIGHT[1]= tr->my;
2043 last_mv[0][0]= s->block[index].mx;
2044 last_mv[0][1]= s->block[index].my;
2045 last_mv[1][0]= right->mx;
2046 last_mv[1][1]= right->my;
2047 last_mv[2][0]= bottom->mx;
2048 last_mv[2][1]= bottom->my;
2055 assert(c-> stride == stride);
2056 assert(c->uvstride == uvstride);
2058 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2059 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2060 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2061 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2063 c->xmin = - x*block_w - 16+2;
2064 c->ymin = - y*block_w - 16+2;
2065 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2066 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2068 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2069 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2070 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2071 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2072 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2073 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2074 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2076 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2077 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2080 c->pred_x= P_LEFT[0];
2081 c->pred_y= P_LEFT[1];
2083 c->pred_x = P_MEDIAN[0];
2084 c->pred_y = P_MEDIAN[1];
2089 for(ref=0; ref<s->ref_frames; ref++){
2090 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2092 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2093 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2095 assert(ref_mx >= c->xmin);
2096 assert(ref_mx <= c->xmax);
2097 assert(ref_my >= c->ymin);
2098 assert(ref_my <= c->ymax);
2100 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2101 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2102 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2103 if(s->ref_mvs[ref]){
2104 s->ref_mvs[ref][index][0]= ref_mx;
2105 s->ref_mvs[ref][index][1]= ref_my;
2106 s->ref_scores[ref][index]= ref_score;
2108 if(score > ref_score){
2115 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2119 pc.bytestream_start=
2120 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2121 memcpy(p_state, s->block_state, sizeof(s->block_state));
2123 if(level!=s->block_max_depth)
2124 put_rac(&pc, &p_state[4 + s_context], 1);
2125 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2126 if(s->ref_frames > 1)
2127 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2128 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2129 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2130 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2131 p_len= pc.bytestream - pc.bytestream_start;
2132 score += (s->lambda2*(p_len*8
2133 + (pc.outstanding_count - s->c.outstanding_count)*8
2134 + (-av_log2(pc.range) + av_log2(s->c.range))
2135 ))>>FF_LAMBDA_SHIFT;
2137 block_s= block_w*block_w;
2138 sum = pix_sum(current_data[0], stride, block_w);
2139 l= (sum + block_s/2)/block_s;
2140 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2142 block_s= block_w*block_w>>2;
2143 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2144 cb= (sum + block_s/2)/block_s;
2145 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2146 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2147 cr= (sum + block_s/2)/block_s;
2148 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2151 ic.bytestream_start=
2152 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2153 memcpy(i_state, s->block_state, sizeof(s->block_state));
2154 if(level!=s->block_max_depth)
2155 put_rac(&ic, &i_state[4 + s_context], 1);
2156 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2157 put_symbol(&ic, &i_state[32], l-pl , 1);
2158 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2159 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2160 i_len= ic.bytestream - ic.bytestream_start;
2161 iscore += (s->lambda2*(i_len*8
2162 + (ic.outstanding_count - s->c.outstanding_count)*8
2163 + (-av_log2(ic.range) + av_log2(s->c.range))
2164 ))>>FF_LAMBDA_SHIFT;
2166 // assert(score==256*256*256*64-1);
2167 assert(iscore < 255*255*256 + s->lambda2*10);
2168 assert(iscore >= 0);
2169 assert(l>=0 && l<=255);
2170 assert(pl>=0 && pl<=255);
2173 int varc= iscore >> 8;
2174 int vard= score >> 8;
2175 if (vard <= 64 || vard < varc)
2176 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2178 c->scene_change_score+= s->m.qscale;
2181 if(level!=s->block_max_depth){
2182 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2183 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2184 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2185 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2186 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2187 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2189 if(score2 < score && score2 < iscore)
2194 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2195 memcpy(pbbak, i_buffer, i_len);
2197 s->c.bytestream_start= pbbak_start;
2198 s->c.bytestream= pbbak + i_len;
2199 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2200 memcpy(s->block_state, i_state, sizeof(s->block_state));
2203 memcpy(pbbak, p_buffer, p_len);
2205 s->c.bytestream_start= pbbak_start;
2206 s->c.bytestream= pbbak + p_len;
2207 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2208 memcpy(s->block_state, p_state, sizeof(s->block_state));
2213 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2214 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2215 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2217 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2221 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2222 const int w= s->b_width << s->block_max_depth;
2223 const int rem_depth= s->block_max_depth - level;
2224 const int index= (x + y*w) << rem_depth;
2225 int trx= (x+1)<<rem_depth;
2226 BlockNode *b= &s->block[index];
2227 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2228 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2229 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2230 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2231 int pl = left->color[0];
2232 int pcb= left->color[1];
2233 int pcr= left->color[2];
2235 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2236 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2237 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2238 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2241 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2245 if(level!=s->block_max_depth){
2246 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2247 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2249 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2250 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2251 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2252 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2253 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2257 if(b->type & BLOCK_INTRA){
2258 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2259 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2260 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2261 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2262 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2263 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2265 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2266 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2267 if(s->ref_frames > 1)
2268 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2269 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2270 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2271 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2275 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2276 const int w= s->b_width << s->block_max_depth;
2277 const int rem_depth= s->block_max_depth - level;
2278 const int index= (x + y*w) << rem_depth;
2279 int trx= (x+1)<<rem_depth;
2280 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2281 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2282 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2283 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2284 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2287 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2291 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2293 int l = left->color[0];
2294 int cb= left->color[1];
2295 int cr= left->color[2];
2297 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2298 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2299 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2301 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2304 pred_mv(s, &mx, &my, 0, left, top, tr);
2305 l += get_symbol(&s->c, &s->block_state[32], 1);
2306 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2307 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2309 if(s->ref_frames > 1)
2310 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2311 pred_mv(s, &mx, &my, ref, left, top, tr);
2312 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2313 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2315 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2317 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2318 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2319 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2320 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2324 static void encode_blocks(SnowContext *s, int search){
2329 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2333 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2334 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2338 if(s->avctx->me_method == ME_ITER || !search)
2339 encode_q_branch2(s, 0, x, y);
2341 encode_q_branch (s, 0, x, y);
2346 static void decode_blocks(SnowContext *s){
2353 decode_q_branch(s, 0, x, y);
2358 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2361 for(y=0; y < b_h+5; y++){
2362 for(x=0; x < b_w; x++){
2369 // int am= 9*(a1+a2) - (a0+a3);
2370 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2371 // int am= 18*(a2+a3) - 2*(a1+a4);
2372 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2373 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2375 // if(b_w==16) am= 8*(a1+a2);
2377 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2378 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2380 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2381 if(am&(~255)) am= ~(am>>31);
2385 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2386 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2387 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2388 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2393 tmp -= (b_h+5)*stride;
2395 for(y=0; y < b_h; y++){
2396 for(x=0; x < b_w; x++){
2397 int a0= tmp[x + 0*stride];
2398 int a1= tmp[x + 1*stride];
2399 int a2= tmp[x + 2*stride];
2400 int a3= tmp[x + 3*stride];
2401 int a4= tmp[x + 4*stride];
2402 int a5= tmp[x + 5*stride];
2403 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2404 // int am= 18*(a2+a3) - 2*(a1+a4);
2405 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2406 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2408 // if(b_w==16) am= 8*(a1+a2);
2410 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2411 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2413 if(am&(~255)) am= ~(am>>31);
2416 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2417 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2418 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2419 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2424 STOP_TIMER("mc_block")
2427 #define mca(dx,dy,b_w)\
2428 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2429 uint8_t tmp[stride*(b_w+5)];\
2431 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2443 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2444 if(block->type & BLOCK_INTRA){
2446 const int color = block->color[plane_index];
2447 const int color4= color*0x01010101;
2449 for(y=0; y < b_h; y++){
2450 *(uint32_t*)&dst[0 + y*stride]= color4;
2451 *(uint32_t*)&dst[4 + y*stride]= color4;
2452 *(uint32_t*)&dst[8 + y*stride]= color4;
2453 *(uint32_t*)&dst[12+ y*stride]= color4;
2454 *(uint32_t*)&dst[16+ y*stride]= color4;
2455 *(uint32_t*)&dst[20+ y*stride]= color4;
2456 *(uint32_t*)&dst[24+ y*stride]= color4;
2457 *(uint32_t*)&dst[28+ y*stride]= color4;
2460 for(y=0; y < b_h; y++){
2461 *(uint32_t*)&dst[0 + y*stride]= color4;
2462 *(uint32_t*)&dst[4 + y*stride]= color4;
2463 *(uint32_t*)&dst[8 + y*stride]= color4;
2464 *(uint32_t*)&dst[12+ y*stride]= color4;
2467 for(y=0; y < b_h; y++){
2468 *(uint32_t*)&dst[0 + y*stride]= color4;
2469 *(uint32_t*)&dst[4 + y*stride]= color4;
2472 for(y=0; y < b_h; y++){
2473 *(uint32_t*)&dst[0 + y*stride]= color4;
2476 for(y=0; y < b_h; y++){
2477 for(x=0; x < b_w; x++){
2478 dst[x + y*stride]= color;
2483 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2484 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2485 int mx= block->mx*scale;
2486 int my= block->my*scale;
2487 const int dx= mx&15;
2488 const int dy= my&15;
2489 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2492 src += sx + sy*stride;
2493 if( (unsigned)sx >= w - b_w - 4
2494 || (unsigned)sy >= h - b_h - 4){
2495 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2498 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2499 // assert(!(b_w&(b_w-1)));
2500 assert(b_w>1 && b_h>1);
2501 assert(tab_index>=0 && tab_index<4 || b_w==32);
2502 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2503 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2506 for(y=0; y<b_h; y+=16){
2507 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2508 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2511 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2512 else if(b_w==2*b_h){
2513 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2517 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2518 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2523 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2524 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2527 for(y=0; y<b_h; y++){
2528 //FIXME ugly missue of obmc_stride
2529 const uint8_t *obmc1= obmc + y*obmc_stride;
2530 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2531 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2532 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2533 dst = slice_buffer_get_line(sb, src_y + y);
2534 for(x=0; x<b_w; x++){
2535 int v= obmc1[x] * block[3][x + y*src_stride]
2536 +obmc2[x] * block[2][x + y*src_stride]
2537 +obmc3[x] * block[1][x + y*src_stride]
2538 +obmc4[x] * block[0][x + y*src_stride];
2540 v <<= 8 - LOG2_OBMC_MAX;
2542 v += 1<<(7 - FRAC_BITS);
2543 v >>= 8 - FRAC_BITS;
2546 v += dst[x + src_x];
2547 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2548 if(v&(~255)) v= ~(v>>31);
2549 dst8[x + y*src_stride] = v;
2551 dst[x + src_x] -= v;
2557 //FIXME name clenup (b_w, block_w, b_width stuff)
2558 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2559 const int b_width = s->b_width << s->block_max_depth;
2560 const int b_height= s->b_height << s->block_max_depth;
2561 const int b_stride= b_width;
2562 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2563 BlockNode *rt= lt+1;
2564 BlockNode *lb= lt+b_stride;
2565 BlockNode *rb= lb+1;
2567 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2568 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2575 }else if(b_x + 1 >= b_width){
2582 }else if(b_y + 1 >= b_height){
2587 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2590 if(!sliced && !offset_dst)
2593 }else if(src_x + b_w > w){
2597 obmc -= src_y*obmc_stride;
2599 if(!sliced && !offset_dst)
2600 dst -= src_y*dst_stride;
2602 }else if(src_y + b_h> h){
2606 if(b_w<=0 || b_h<=0) return;
2608 assert(src_stride > 2*MB_SIZE + 5);
2609 if(!sliced && offset_dst)
2610 dst += src_x + src_y*dst_stride;
2611 dst8+= src_x + src_y*src_stride;
2612 // src += src_x + src_y*src_stride;
2614 ptmp= tmp + 3*tmp_step;
2617 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2619 if(same_block(lt, rt)){
2624 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2627 if(same_block(lt, lb)){
2629 }else if(same_block(rt, lb)){
2634 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2637 if(same_block(lt, rb) ){
2639 }else if(same_block(rt, rb)){
2641 }else if(same_block(lb, rb)){
2645 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2648 for(y=0; y<b_h; y++){
2649 for(x=0; x<b_w; x++){
2650 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2651 if(add) dst[x + y*dst_stride] += v;
2652 else dst[x + y*dst_stride] -= v;
2655 for(y=0; y<b_h; y++){
2656 uint8_t *obmc2= obmc + (obmc_stride>>1);
2657 for(x=0; x<b_w; x++){
2658 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2659 if(add) dst[x + y*dst_stride] += v;
2660 else dst[x + y*dst_stride] -= v;
2663 for(y=0; y<b_h; y++){
2664 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2665 for(x=0; x<b_w; x++){
2666 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2667 if(add) dst[x + y*dst_stride] += v;
2668 else dst[x + y*dst_stride] -= v;
2671 for(y=0; y<b_h; y++){
2672 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2673 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2674 for(x=0; x<b_w; x++){
2675 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2676 if(add) dst[x + y*dst_stride] += v;
2677 else dst[x + y*dst_stride] -= v;
2684 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2685 STOP_TIMER("inner_add_yblock")
2687 for(y=0; y<b_h; y++){
2688 //FIXME ugly missue of obmc_stride
2689 const uint8_t *obmc1= obmc + y*obmc_stride;
2690 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2691 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2692 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2693 for(x=0; x<b_w; x++){
2694 int v= obmc1[x] * block[3][x + y*src_stride]
2695 +obmc2[x] * block[2][x + y*src_stride]
2696 +obmc3[x] * block[1][x + y*src_stride]
2697 +obmc4[x] * block[0][x + y*src_stride];
2699 v <<= 8 - LOG2_OBMC_MAX;
2701 v += 1<<(7 - FRAC_BITS);
2702 v >>= 8 - FRAC_BITS;
2705 v += dst[x + y*dst_stride];
2706 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2707 if(v&(~255)) v= ~(v>>31);
2708 dst8[x + y*src_stride] = v;
2710 dst[x + y*dst_stride] -= v;
2717 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2718 Plane *p= &s->plane[plane_index];
2719 const int mb_w= s->b_width << s->block_max_depth;
2720 const int mb_h= s->b_height << s->block_max_depth;
2722 int block_size = MB_SIZE >> s->block_max_depth;
2723 int block_w = plane_index ? block_size/2 : block_size;
2724 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2725 int obmc_stride= plane_index ? block_size : 2*block_size;
2726 int ref_stride= s->current_picture.linesize[plane_index];
2727 uint8_t *dst8= s->current_picture.data[plane_index];
2732 if(s->keyframe || (s->avctx->debug&512)){
2737 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2739 // DWTELEM * line = slice_buffer_get_line(sb, y);
2740 DWTELEM * line = sb->line[y];
2743 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2744 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2746 if(v&(~255)) v= ~(v>>31);
2747 dst8[x + y*ref_stride]= v;
2751 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2753 // DWTELEM * line = slice_buffer_get_line(sb, y);
2754 DWTELEM * line = sb->line[y];
2757 line[x] -= 128 << FRAC_BITS;
2758 // buf[x + y*w]-= 128<<FRAC_BITS;
2766 for(mb_x=0; mb_x<=mb_w; mb_x++){
2769 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2770 block_w*mb_x - block_w/2,
2771 block_w*mb_y - block_w/2,
2774 w, ref_stride, obmc_stride,
2776 add, 0, plane_index);
2778 STOP_TIMER("add_yblock")
2781 STOP_TIMER("predict_slice")
2784 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2785 Plane *p= &s->plane[plane_index];
2786 const int mb_w= s->b_width << s->block_max_depth;
2787 const int mb_h= s->b_height << s->block_max_depth;
2789 int block_size = MB_SIZE >> s->block_max_depth;
2790 int block_w = plane_index ? block_size/2 : block_size;
2791 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2792 const int obmc_stride= plane_index ? block_size : 2*block_size;
2793 int ref_stride= s->current_picture.linesize[plane_index];
2794 uint8_t *dst8= s->current_picture.data[plane_index];
2799 if(s->keyframe || (s->avctx->debug&512)){
2804 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2806 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2808 if(v&(~255)) v= ~(v>>31);
2809 dst8[x + y*ref_stride]= v;
2813 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2815 buf[x + y*w]-= 128<<FRAC_BITS;
2823 for(mb_x=0; mb_x<=mb_w; mb_x++){
2826 add_yblock(s, 0, NULL, buf, dst8, obmc,
2827 block_w*mb_x - block_w/2,
2828 block_w*mb_y - block_w/2,
2831 w, ref_stride, obmc_stride,
2833 add, 1, plane_index);
2835 STOP_TIMER("add_yblock")
2838 STOP_TIMER("predict_slice")
2841 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2842 const int mb_h= s->b_height << s->block_max_depth;
2844 for(mb_y=0; mb_y<=mb_h; mb_y++)
2845 predict_slice(s, buf, plane_index, add, mb_y);
2848 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2850 Plane *p= &s->plane[plane_index];
2851 const int block_size = MB_SIZE >> s->block_max_depth;
2852 const int block_w = plane_index ? block_size/2 : block_size;
2853 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2854 const int obmc_stride= plane_index ? block_size : 2*block_size;
2855 const int ref_stride= s->current_picture.linesize[plane_index];
2856 uint8_t *src= s-> input_picture.data[plane_index];
2857 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2858 const int b_stride = s->b_width << s->block_max_depth;
2859 const int w= p->width;
2860 const int h= p->height;
2861 int index= mb_x + mb_y*b_stride;
2862 BlockNode *b= &s->block[index];
2863 BlockNode backup= *b;
2867 b->type|= BLOCK_INTRA;
2868 b->color[plane_index]= 0;
2869 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2872 int mb_x2= mb_x + (i &1) - 1;
2873 int mb_y2= mb_y + (i>>1) - 1;
2874 int x= block_w*mb_x2 + block_w/2;
2875 int y= block_w*mb_y2 + block_w/2;
2877 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2878 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2880 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2881 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2882 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2883 int obmc_v= obmc[index];
2885 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2886 if(x<0) obmc_v += obmc[index + block_w];
2887 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2888 if(x+block_w>w) obmc_v += obmc[index - block_w];
2889 //FIXME precalc this or simplify it somehow else
2891 d = -dst[index] + (1<<(FRAC_BITS-1));
2893 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2894 aa += obmc_v * obmc_v; //FIXME precalclate this
2900 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2903 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2904 const int b_stride = s->b_width << s->block_max_depth;
2905 const int b_height = s->b_height<< s->block_max_depth;
2906 int index= x + y*b_stride;
2907 const BlockNode *b = &s->block[index];
2908 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2909 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2910 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2911 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2913 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2914 // int my_context= av_log2(2*FFABS(left->my - top->my));
2916 if(x<0 || x>=b_stride || y>=b_height)
2923 00001XXXX 15-30 8-15
2925 //FIXME try accurate rate
2926 //FIXME intra and inter predictors if surrounding blocks arent the same type
2927 if(b->type & BLOCK_INTRA){
2928 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2929 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2930 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2932 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2935 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2936 + av_log2(2*FFABS(dmy))
2937 + av_log2(2*b->ref));
2941 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2942 Plane *p= &s->plane[plane_index];
2943 const int block_size = MB_SIZE >> s->block_max_depth;
2944 const int block_w = plane_index ? block_size/2 : block_size;
2945 const int obmc_stride= plane_index ? block_size : 2*block_size;
2946 const int ref_stride= s->current_picture.linesize[plane_index];
2947 uint8_t *dst= s->current_picture.data[plane_index];
2948 uint8_t *src= s-> input_picture.data[plane_index];
2949 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2950 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2951 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2952 const int b_stride = s->b_width << s->block_max_depth;
2953 const int b_height = s->b_height<< s->block_max_depth;
2954 const int w= p->width;
2955 const int h= p->height;
2958 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2959 int sx= block_w*mb_x - block_w/2;
2960 int sy= block_w*mb_y - block_w/2;
2961 int x0= FFMAX(0,-sx);
2962 int y0= FFMAX(0,-sy);
2963 int x1= FFMIN(block_w*2, w-sx);
2964 int y1= FFMIN(block_w*2, h-sy);
2967 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2969 for(y=y0; y<y1; y++){
2970 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2971 const DWTELEM *pred1 = pred + y*obmc_stride;
2972 uint8_t *cur1 = cur + y*ref_stride;
2973 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2974 for(x=x0; x<x1; x++){
2975 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2976 v = (v + pred1[x]) >> FRAC_BITS;
2977 if(v&(~255)) v= ~(v>>31);
2982 /* copy the regions where obmc[] = (uint8_t)256 */
2983 if(LOG2_OBMC_MAX == 8
2984 && (mb_x == 0 || mb_x == b_stride-1)
2985 && (mb_y == 0 || mb_y == b_height-1)){
2994 for(y=y0; y<y1; y++)
2995 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2999 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3000 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3001 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3002 * so improving the score of one block is not strictly guaranteed to
3003 * improve the score of the whole frame, so iterative motion est
3004 * doesn't always converge. */
3005 if(s->avctx->me_cmp == FF_CMP_W97)
3006 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3007 else if(s->avctx->me_cmp == FF_CMP_W53)
3008 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3012 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3013 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3018 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3027 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3029 if(mb_x == b_stride-2)
3030 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3032 return distortion + rate*penalty_factor;
3035 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3037 Plane *p= &s->plane[plane_index];
3038 const int block_size = MB_SIZE >> s->block_max_depth;
3039 const int block_w = plane_index ? block_size/2 : block_size;
3040 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3041 const int obmc_stride= plane_index ? block_size : 2*block_size;
3042 const int ref_stride= s->current_picture.linesize[plane_index];
3043 uint8_t *dst= s->current_picture.data[plane_index];
3044 uint8_t *src= s-> input_picture.data[plane_index];
3045 static const DWTELEM zero_dst[4096]; //FIXME
3046 const int b_stride = s->b_width << s->block_max_depth;
3047 const int w= p->width;
3048 const int h= p->height;
3051 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3054 int mb_x2= mb_x + (i%3) - 1;
3055 int mb_y2= mb_y + (i/3) - 1;
3056 int x= block_w*mb_x2 + block_w/2;
3057 int y= block_w*mb_y2 + block_w/2;
3059 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3060 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3062 //FIXME find a cleaner/simpler way to skip the outside stuff
3063 for(y2= y; y2<0; y2++)
3064 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3065 for(y2= h; y2<y+block_w; y2++)
3066 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3068 for(y2= y; y2<y+block_w; y2++)
3069 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3072 for(y2= y; y2<y+block_w; y2++)
3073 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3076 assert(block_w== 8 || block_w==16);
3077 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3081 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3082 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3090 rate = get_block_bits(s, mb_x, mb_y, 2);
3091 for(i=merged?4:0; i<9; i++){
3092 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3093 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3096 return distortion + rate*penalty_factor;
3099 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3100 const int b_stride= s->b_width << s->block_max_depth;
3101 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3102 BlockNode backup= *block;
3103 int rd, index, value;
3105 assert(mb_x>=0 && mb_y>=0);
3106 assert(mb_x<b_stride);
3109 block->color[0] = p[0];
3110 block->color[1] = p[1];
3111 block->color[2] = p[2];
3112 block->type |= BLOCK_INTRA;
3114 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3115 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3116 if(s->me_cache[index] == value)
3118 s->me_cache[index]= value;
3122 block->type &= ~BLOCK_INTRA;
3125 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3137 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3138 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3139 int p[2] = {p0, p1};
3140 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3143 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3144 const int b_stride= s->b_width << s->block_max_depth;
3145 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3146 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3147 int rd, index, value;
3149 assert(mb_x>=0 && mb_y>=0);
3150 assert(mb_x<b_stride);
3151 assert(((mb_x|mb_y)&1) == 0);
3153 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3154 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3155 if(s->me_cache[index] == value)
3157 s->me_cache[index]= value;
3162 block->type &= ~BLOCK_INTRA;
3163 block[1]= block[b_stride]= block[b_stride+1]= *block;
3165 rd= get_4block_rd(s, mb_x, mb_y, 0);
3172 block[0]= backup[0];
3173 block[1]= backup[1];
3174 block[b_stride]= backup[2];
3175 block[b_stride+1]= backup[3];
3180 static void iterative_me(SnowContext *s){
3181 int pass, mb_x, mb_y;
3182 const int b_width = s->b_width << s->block_max_depth;
3183 const int b_height= s->b_height << s->block_max_depth;
3184 const int b_stride= b_width;
3188 RangeCoder r = s->c;
3189 uint8_t state[sizeof(s->block_state)];
3190 memcpy(state, s->block_state, sizeof(s->block_state));
3191 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3192 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3193 encode_q_branch(s, 0, mb_x, mb_y);
3195 memcpy(s->block_state, state, sizeof(s->block_state));
3198 for(pass=0; pass<25; pass++){
3201 for(mb_y= 0; mb_y<b_height; mb_y++){
3202 for(mb_x= 0; mb_x<b_width; mb_x++){
3203 int dia_change, i, j, ref;
3204 int best_rd= INT_MAX, ref_rd;
3205 BlockNode backup, ref_b;
3206 const int index= mb_x + mb_y * b_stride;
3207 BlockNode *block= &s->block[index];
3208 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3209 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3210 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3211 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3212 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3213 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3214 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3215 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3216 const int b_w= (MB_SIZE >> s->block_max_depth);
3217 uint8_t obmc_edged[b_w*2][b_w*2];
3219 if(pass && (block->type & BLOCK_OPT))
3221 block->type |= BLOCK_OPT;
3225 if(!s->me_cache_generation)
3226 memset(s->me_cache, 0, sizeof(s->me_cache));
3227 s->me_cache_generation += 1<<22;
3232 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3234 for(y=0; y<b_w*2; y++)
3235 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3236 if(mb_x==b_stride-1)
3237 for(y=0; y<b_w*2; y++)
3238 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3240 for(x=0; x<b_w*2; x++)
3241 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3242 for(y=1; y<b_w; y++)
3243 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3245 if(mb_y==b_height-1){
3246 for(x=0; x<b_w*2; x++)
3247 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3248 for(y=b_w; y<b_w*2-1; y++)
3249 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3253 //skip stuff outside the picture
3254 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3256 uint8_t *src= s-> input_picture.data[0];
3257 uint8_t *dst= s->current_picture.data[0];
3258 const int stride= s->current_picture.linesize[0];
3259 const int block_w= MB_SIZE >> s->block_max_depth;
3260 const int sx= block_w*mb_x - block_w/2;
3261 const int sy= block_w*mb_y - block_w/2;
3262 const int w= s->plane[0].width;
3263 const int h= s->plane[0].height;
3267 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3268 for(y=h; y<sy+block_w*2; y++)
3269 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3271 for(y=sy; y<sy+block_w*2; y++)
3272 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3274 if(sx+block_w*2 > w){
3275 for(y=sy; y<sy+block_w*2; y++)
3276 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3280 // intra(black) = neighbors' contribution to the current block
3282 color[i]= get_dc(s, mb_x, mb_y, i);
3284 // get previous score (cant be cached due to OBMC)
3285 if(pass > 0 && (block->type&BLOCK_INTRA)){
3286 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3287 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3289 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3293 for(ref=0; ref < s->ref_frames; ref++){
3294 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3295 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3300 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3301 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3303 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3305 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3307 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3309 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3312 //FIXME avoid subpel interpol / round to nearest integer
3315 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3317 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3318 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3319 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3320 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3326 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3329 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3331 //FIXME or try the standard 2 pass qpel or similar
3333 mvr[0][0]= block->mx;
3334 mvr[0][1]= block->my;
3335 if(ref_rd > best_rd){
3343 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3344 //FIXME RD style color selection
3346 if(!same_block(block, &backup)){
3347 if(tb ) tb ->type &= ~BLOCK_OPT;
3348 if(lb ) lb ->type &= ~BLOCK_OPT;
3349 if(rb ) rb ->type &= ~BLOCK_OPT;
3350 if(bb ) bb ->type &= ~BLOCK_OPT;
3351 if(tlb) tlb->type &= ~BLOCK_OPT;
3352 if(trb) trb->type &= ~BLOCK_OPT;
3353 if(blb) blb->type &= ~BLOCK_OPT;
3354 if(brb) brb->type &= ~BLOCK_OPT;
3359 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3364 if(s->block_max_depth == 1){
3366 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3367 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3369 int best_rd, init_rd;
3370 const int index= mb_x + mb_y * b_stride;
3373 b[0]= &s->block[index];
3375 b[2]= b[0]+b_stride;
3377 if(same_block(b[0], b[1]) &&
3378 same_block(b[0], b[2]) &&
3379 same_block(b[0], b[3]))
3382 if(!s->me_cache_generation)
3383 memset(s->me_cache, 0, sizeof(s->me_cache));
3384 s->me_cache_generation += 1<<22;
3386 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3388 //FIXME more multiref search?
3389 check_4block_inter(s, mb_x, mb_y,
3390 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3391 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3394 if(!(b[i]->type&BLOCK_INTRA))
3395 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3397 if(init_rd != best_rd)
3401 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3405 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3406 const int level= b->level;
3407 const int w= b->width;
3408 const int h= b->height;
3409 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3410 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3411 int x,y, thres1, thres2;
3414 if(s->qlog == LOSSLESS_QLOG) return;
3416 bias= bias ? 0 : (3*qmul)>>3;
3417 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3423 int i= src[x + y*stride];
3425 if((unsigned)(i+thres1) > thres2){
3428 i/= qmul; //FIXME optimize
3429 src[x + y*stride]= i;
3433 i/= qmul; //FIXME optimize
3434 src[x + y*stride]= -i;
3437 src[x + y*stride]= 0;
3443 int i= src[x + y*stride];
3445 if((unsigned)(i+thres1) > thres2){
3448 i= (i + bias) / qmul; //FIXME optimize
3449 src[x + y*stride]= i;
3453 i= (i + bias) / qmul; //FIXME optimize
3454 src[x + y*stride]= -i;
3457 src[x + y*stride]= 0;
3461 if(level+1 == s->spatial_decomposition_count){
3462 // STOP_TIMER("quantize")
3466 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3467 const int w= b->width;
3468 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3469 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3470 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3474 if(s->qlog == LOSSLESS_QLOG) return;
3476 for(y=start_y; y<end_y; y++){
3477 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3478 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3482 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3484 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3488 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3489 STOP_TIMER("dquant")
3493 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3494 const int w= b->width;
3495 const int h= b->height;
3496 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3497 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3498 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3502 if(s->qlog == LOSSLESS_QLOG) return;
3506 int i= src[x + y*stride];
3508 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3510 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3514 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3515 STOP_TIMER("dquant")
3519 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3520 const int w= b->width;
3521 const int h= b->height;
3524 for(y=h-1; y>=0; y--){
3525 for(x=w-1; x>=0; x--){
3526 int i= x + y*stride;
3530 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3531 else src[i] -= src[i - 1];
3533 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3534 else src[i] -= src[i - 1];
3537 if(y) src[i] -= src[i - stride];
3543 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3544 const int w= b->width;
3549 DWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3553 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3555 for(y=start_y; y<end_y; y++){
3557 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3558 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3562 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3563 else line[x] += line[x - 1];
3565 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3566 else line[x] += line[x - 1];
3569 if(y) line[x] += prev[x];
3574 // STOP_TIMER("correlate")
3577 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3578 const int w= b->width;
3579 const int h= b->height;
3584 int i= x + y*stride;
3588 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3589 else src[i] += src[i - 1];
3591 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3592 else src[i] += src[i - 1];
3595 if(y) src[i] += src[i - stride];
3601 static void encode_header(SnowContext *s){
3602 int plane_index, level, orientation;
3605 memset(kstate, MID_STATE, sizeof(kstate));
3607 put_rac(&s->c, kstate, s->keyframe);
3608 if(s->keyframe || s->always_reset){
3610 s->last_spatial_decomposition_type=
3614 s->last_block_max_depth= 0;
3617 put_symbol(&s->c, s->header_state, s->version, 0);
3618 put_rac(&s->c, s->header_state, s->always_reset);
3619 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3620 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3621 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3622 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3623 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3624 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3625 put_rac(&s->c, s->header_state, s->spatial_scalability);
3626 // put_rac(&s->c, s->header_state, s->rate_scalability);
3627 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3629 for(plane_index=0; plane_index<2; plane_index++){
3630 for(level=0; level<s->spatial_decomposition_count; level++){
3631 for(orientation=level ? 1:0; orientation<4; orientation++){
3632 if(orientation==2) continue;
3633 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3638 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3639 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3640 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3641 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3642 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3644 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3645 s->last_qlog = s->qlog;
3646 s->last_qbias = s->qbias;
3647 s->last_mv_scale = s->mv_scale;
3648 s->last_block_max_depth = s->block_max_depth;
3651 static int decode_header(SnowContext *s){
3652 int plane_index, level, orientation;
3655 memset(kstate, MID_STATE, sizeof(kstate));
3657 s->keyframe= get_rac(&s->c, kstate);
3658 if(s->keyframe || s->always_reset){
3660 s->spatial_decomposition_type=
3664 s->block_max_depth= 0;
3667 s->version= get_symbol(&s->c, s->header_state, 0);
3669 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3672 s->always_reset= get_rac(&s->c, s->header_state);
3673 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3674 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3675 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3676 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3677 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3678 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3679 s->spatial_scalability= get_rac(&s->c, s->header_state);
3680 // s->rate_scalability= get_rac(&s->c, s->header_state);
3681 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3683 for(plane_index=0; plane_index<3; plane_index++){
3684 for(level=0; level<s->spatial_decomposition_count; level++){
3685 for(orientation=level ? 1:0; orientation<4; orientation++){
3687 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3688 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3689 else q= get_symbol(&s->c, s->header_state, 1);
3690 s->plane[plane_index].band[level][orientation].qlog= q;
3696 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3697 if(s->spatial_decomposition_type > 2){
3698 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3702 s->qlog += get_symbol(&s->c, s->header_state, 1);
3703 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3704 s->qbias += get_symbol(&s->c, s->header_state, 1);
3705 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3706 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3707 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3708 s->block_max_depth= 0;
3715 static void init_qexp(void){
3719 for(i=0; i<QROOT; i++){
3721 v *= pow(2, 1.0 / QROOT);
3725 static int common_init(AVCodecContext *avctx){
3726 SnowContext *s = avctx->priv_data;
3728 int level, orientation, plane_index, dec;
3733 dsputil_init(&s->dsp, avctx);
3736 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3737 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3738 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3739 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3740 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3741 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3760 #define mcfh(dx,dy)\
3761 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3762 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3763 mc_block_hpel ## dx ## dy ## 16;\
3764 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3765 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3766 mc_block_hpel ## dx ## dy ## 8;
3776 dec= s->spatial_decomposition_count= 5;
3777 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3779 s->chroma_h_shift= 1; //FIXME XXX
3780 s->chroma_v_shift= 1;
3782 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3784 width= s->avctx->width;
3785 height= s->avctx->height;
3787 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3789 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3790 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3792 for(plane_index=0; plane_index<3; plane_index++){
3793 int w= s->avctx->width;
3794 int h= s->avctx->height;
3797 w>>= s->chroma_h_shift;
3798 h>>= s->chroma_v_shift;
3800 s->plane[plane_index].width = w;
3801 s->plane[plane_index].height= h;
3802 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3803 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3804 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3805 SubBand *b= &s->plane[plane_index].band[level][orientation];
3807 b->buf= s->spatial_dwt_buffer;
3809 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3810 b->width = (w + !(orientation&1))>>1;
3811 b->height= (h + !(orientation>1))>>1;
3813 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3814 b->buf_x_offset = 0;
3815 b->buf_y_offset = 0;
3819 b->buf_x_offset = (w+1)>>1;
3822 b->buf += b->stride>>1;
3823 b->buf_y_offset = b->stride_line >> 1;
3827 b->parent= &s->plane[plane_index].band[level-1][orientation];
3828 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3835 for(i=0; i<MAX_REF_FRAMES; i++)
3836 for(j=0; j<MAX_REF_FRAMES; j++)
3837 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3841 width= s->width= avctx->width;
3842 height= s->height= avctx->height;
3844 assert(width && height);
3846 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3851 static int qscale2qlog(int qscale){
3852 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3853 + 61*QROOT/8; //<64 >60
3856 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3858 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3859 * FIXME we know exact mv bits at this point,
3860 * but ratecontrol isn't set up to include them. */
3861 uint32_t coef_sum= 0;
3862 int level, orientation, delta_qlog;
3864 for(level=0; level<s->spatial_decomposition_count; level++){
3865 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3866 SubBand *b= &s->plane[0].band[level][orientation];
3867 DWTELEM *buf= b->buf;
3868 const int w= b->width;
3869 const int h= b->height;
3870 const int stride= b->stride;
3871 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3872 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3873 const int qdiv= (1<<16)/qmul;
3876 decorrelate(s, b, buf, stride, 1, 0);
3879 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3881 correlate(s, b, buf, stride, 1, 0);
3885 /* ugly, ratecontrol just takes a sqrt again */
3886 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3887 assert(coef_sum < INT_MAX);
3889 if(pict->pict_type == I_TYPE){
3890 s->m.current_picture.mb_var_sum= coef_sum;
3891 s->m.current_picture.mc_mb_var_sum= 0;
3893 s->m.current_picture.mc_mb_var_sum= coef_sum;
3894 s->m.current_picture.mb_var_sum= 0;
3897 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3898 if (pict->quality < 0)
3900 s->lambda= pict->quality * 3/2;
3901 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3902 s->qlog+= delta_qlog;
3906 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3907 int width = p->width;
3908 int height= p->height;
3909 int level, orientation, x, y;
3911 for(level=0; level<s->spatial_decomposition_count; level++){
3912 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3913 SubBand *b= &p->band[level][orientation];
3914 DWTELEM *buf= b->buf;
3917 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3918 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3919 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3920 for(y=0; y<height; y++){
3921 for(x=0; x<width; x++){
3922 int64_t d= s->spatial_dwt_buffer[x + y*width];
3927 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3928 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3933 static int encode_init(AVCodecContext *avctx)
3935 SnowContext *s = avctx->priv_data;
3938 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3939 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3940 "use vstrict=-2 / -strict -2 to use it anyway\n");
3944 if(avctx->prediction_method == DWT_97
3945 && (avctx->flags & CODEC_FLAG_QSCALE)
3946 && avctx->global_quality == 0){
3947 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3957 s->m.flags = avctx->flags;
3958 s->m.bit_rate= avctx->bit_rate;
3960 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3961 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3962 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3963 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3964 h263_encode_init(&s->m); //mv_penalty
3966 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3968 if(avctx->flags&CODEC_FLAG_PASS1){
3969 if(!avctx->stats_out)
3970 avctx->stats_out = av_mallocz(256);
3972 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3973 if(ff_rate_control_init(&s->m) < 0)
3976 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3978 for(plane_index=0; plane_index<3; plane_index++){
3979 calculate_vissual_weight(s, &s->plane[plane_index]);
3983 avctx->coded_frame= &s->current_picture;
3984 switch(avctx->pix_fmt){
3985 // case PIX_FMT_YUV444P:
3986 // case PIX_FMT_YUV422P:
3987 case PIX_FMT_YUV420P:
3989 // case PIX_FMT_YUV411P:
3990 // case PIX_FMT_YUV410P:
3991 s->colorspace_type= 0;
3993 /* case PIX_FMT_RGB32:
3997 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4000 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4001 s->chroma_h_shift= 1;
4002 s->chroma_v_shift= 1;
4004 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4005 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4007 s->avctx->get_buffer(s->avctx, &s->input_picture);
4009 if(s->avctx->me_method == ME_ITER){
4011 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4012 for(i=0; i<s->max_ref_frames; i++){
4013 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4014 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4021 static int frame_start(SnowContext *s){
4023 int w= s->avctx->width; //FIXME round up to x16 ?
4024 int h= s->avctx->height;
4026 if(s->current_picture.data[0]){
4027 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4028 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4029 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4032 tmp= s->last_picture[s->max_ref_frames-1];
4033 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4034 s->last_picture[0]= s->current_picture;
4035 s->current_picture= tmp;
4041 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4042 if(i && s->last_picture[i-1].key_frame)
4047 s->current_picture.reference= 1;
4048 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4049 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4053 s->current_picture.key_frame= s->keyframe;
4058 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4059 SnowContext *s = avctx->priv_data;
4060 RangeCoder * const c= &s->c;
4061 AVFrame *pict = data;
4062 const int width= s->avctx->width;
4063 const int height= s->avctx->height;
4064 int level, orientation, plane_index, i, y;
4065 uint8_t rc_header_bak[sizeof(s->header_state)];
4066 uint8_t rc_block_bak[sizeof(s->block_state)];
4068 ff_init_range_encoder(c, buf, buf_size);
4069 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4073 for(y=0; y<(height>>shift); y++)
4074 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4075 &pict->data[i][y * pict->linesize[i]],
4078 s->new_picture = *pict;
4080 s->m.picture_number= avctx->frame_number;
4081 if(avctx->flags&CODEC_FLAG_PASS2){
4083 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4084 s->keyframe= pict->pict_type==FF_I_TYPE;
4085 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4086 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4087 if (pict->quality < 0)
4091 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4093 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4096 if(s->pass1_rc && avctx->frame_number == 0)
4097 pict->quality= 2*FF_QP2LAMBDA;
4099 s->qlog= qscale2qlog(pict->quality);
4100 s->lambda = pict->quality * 3/2;
4102 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4103 s->qlog= LOSSLESS_QLOG;
4105 }//else keep previous frame's qlog until after motion est
4109 s->m.current_picture_ptr= &s->m.current_picture;
4110 if(pict->pict_type == P_TYPE){
4111 int block_width = (width +15)>>4;
4112 int block_height= (height+15)>>4;
4113 int stride= s->current_picture.linesize[0];
4115 assert(s->current_picture.data[0]);
4116 assert(s->last_picture[0].data[0]);
4118 s->m.avctx= s->avctx;
4119 s->m.current_picture.data[0]= s->current_picture.data[0];
4120 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4121 s->m. new_picture.data[0]= s-> input_picture.data[0];
4122 s->m. last_picture_ptr= &s->m. last_picture;
4124 s->m. last_picture.linesize[0]=
4125 s->m. new_picture.linesize[0]=
4126 s->m.current_picture.linesize[0]= stride;
4127 s->m.uvlinesize= s->current_picture.linesize[1];
4129 s->m.height= height;
4130 s->m.mb_width = block_width;
4131 s->m.mb_height= block_height;
4132 s->m.mb_stride= s->m.mb_width+1;
4133 s->m.b8_stride= 2*s->m.mb_width+1;
4135 s->m.pict_type= pict->pict_type;
4136 s->m.me_method= s->avctx->me_method;
4137 s->m.me.scene_change_score=0;
4138 s->m.flags= s->avctx->flags;
4139 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4140 s->m.out_format= FMT_H263;
4141 s->m.unrestricted_mv= 1;
4143 s->m.lambda = s->lambda;
4144 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4145 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4147 s->m.dsp= s->dsp; //move
4153 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4154 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4159 s->m.pict_type = pict->pict_type;
4160 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4163 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4164 encode_blocks(s, 1);
4165 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4167 for(plane_index=0; plane_index<3; plane_index++){
4168 Plane *p= &s->plane[plane_index];
4172 // int bits= put_bits_count(&s->c.pb);
4174 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4176 if(pict->data[plane_index]) //FIXME gray hack
4179 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4182 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4185 && pict->pict_type == P_TYPE
4186 && !(avctx->flags&CODEC_FLAG_PASS2)
4187 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4188 ff_init_range_encoder(c, buf, buf_size);
4189 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4190 pict->pict_type= FF_I_TYPE;
4192 s->current_picture.key_frame=1;
4196 if(s->qlog == LOSSLESS_QLOG){
4199 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4204 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4206 if(s->pass1_rc && plane_index==0){
4207 int delta_qlog = ratecontrol_1pass(s, pict);
4208 if (delta_qlog <= INT_MIN)
4211 //reordering qlog in the bitstream would eliminate this reset
4212 ff_init_range_encoder(c, buf, buf_size);
4213 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4214 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4216 encode_blocks(s, 0);
4220 for(level=0; level<s->spatial_decomposition_count; level++){
4221 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4222 SubBand *b= &p->band[level][orientation];
4224 quantize(s, b, b->buf, b->stride, s->qbias);
4226 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4227 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4228 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4230 correlate(s, b, b->buf, b->stride, 1, 0);
4233 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4235 for(level=0; level<s->spatial_decomposition_count; level++){
4236 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4237 SubBand *b= &p->band[level][orientation];
4239 dequantize(s, b, b->buf, b->stride);
4243 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4244 if(s->qlog == LOSSLESS_QLOG){
4247 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4252 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4253 STOP_TIMER("pred-conv")}
4256 if(pict->pict_type == I_TYPE){
4259 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4260 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4264 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4265 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4268 if(s->avctx->flags&CODEC_FLAG_PSNR){
4271 if(pict->data[plane_index]) //FIXME gray hack
4274 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4278 s->avctx->error[plane_index] += error;
4279 s->current_picture.error[plane_index] = error;
4283 if(s->last_picture[s->max_ref_frames-1].data[0])
4284 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4286 s->current_picture.coded_picture_number = avctx->frame_number;
4287 s->current_picture.pict_type = pict->pict_type;
4288 s->current_picture.quality = pict->quality;
4289 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4290 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4291 s->m.current_picture.display_picture_number =
4292 s->m.current_picture.coded_picture_number = avctx->frame_number;
4293 s->m.current_picture.quality = pict->quality;
4294 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4296 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4298 if(avctx->flags&CODEC_FLAG_PASS1)
4299 ff_write_pass1_stats(&s->m);
4300 s->m.last_pict_type = s->m.pict_type;
4301 avctx->frame_bits = s->m.frame_bits;
4302 avctx->mv_bits = s->m.mv_bits;
4303 avctx->misc_bits = s->m.misc_bits;
4304 avctx->p_tex_bits = s->m.p_tex_bits;
4308 return ff_rac_terminate(c);
4311 static void common_end(SnowContext *s){
4312 int plane_index, level, orientation, i;
4314 av_freep(&s->spatial_dwt_buffer);
4316 av_freep(&s->m.me.scratchpad);
4317 av_freep(&s->m.me.map);
4318 av_freep(&s->m.me.score_map);
4319 av_freep(&s->m.obmc_scratchpad);
4321 av_freep(&s->block);
4323 for(i=0; i<MAX_REF_FRAMES; i++){
4324 av_freep(&s->ref_mvs[i]);
4325 av_freep(&s->ref_scores[i]);
4326 if(s->last_picture[i].data[0])
4327 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4330 for(plane_index=0; plane_index<3; plane_index++){
4331 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4332 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4333 SubBand *b= &s->plane[plane_index].band[level][orientation];
4335 av_freep(&b->x_coeff);
4341 static int encode_end(AVCodecContext *avctx)
4343 SnowContext *s = avctx->priv_data;
4346 av_free(avctx->stats_out);
4351 static int decode_init(AVCodecContext *avctx)
4353 SnowContext *s = avctx->priv_data;
4356 avctx->pix_fmt= PIX_FMT_YUV420P;
4360 block_size = MB_SIZE >> s->block_max_depth;
4361 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4366 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4367 SnowContext *s = avctx->priv_data;
4368 RangeCoder * const c= &s->c;
4370 AVFrame *picture = data;
4371 int level, orientation, plane_index;
4373 ff_init_range_decoder(c, buf, buf_size);
4374 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4376 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4378 if(!s->block) alloc_blocks(s);
4381 //keyframe flag dupliaction mess FIXME
4382 if(avctx->debug&FF_DEBUG_PICT_INFO)
4383 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4387 for(plane_index=0; plane_index<3; plane_index++){
4388 Plane *p= &s->plane[plane_index];
4392 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4394 if(s->avctx->debug&2048){
4395 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4396 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4400 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4401 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4407 for(level=0; level<s->spatial_decomposition_count; level++){
4408 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4409 SubBand *b= &p->band[level][orientation];
4410 unpack_coeffs(s, b, b->parent, orientation);
4413 STOP_TIMER("unpack coeffs");
4417 const int mb_h= s->b_height << s->block_max_depth;
4418 const int block_size = MB_SIZE >> s->block_max_depth;
4419 const int block_w = plane_index ? block_size/2 : block_size;
4421 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4426 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4427 for(mb_y=0; mb_y<=mb_h; mb_y++){
4429 int slice_starty = block_w*mb_y;
4430 int slice_h = block_w*(mb_y+1);
4431 if (!(s->keyframe || s->avctx->debug&512)){
4432 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4433 slice_h -= (block_w >> 1);
4438 for(level=0; level<s->spatial_decomposition_count; level++){
4439 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4440 SubBand *b= &p->band[level][orientation];
4443 int our_mb_start = mb_y;
4444 int our_mb_end = (mb_y + 1);
4446 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4447 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4448 if (!(s->keyframe || s->avctx->debug&512)){
4449 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4450 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4452 start_y = FFMIN(b->height, start_y);
4453 end_y = FFMIN(b->height, end_y);
4455 if (start_y != end_y){
4456 if (orientation == 0){
4457 SubBand * correlate_band = &p->band[0][0];
4458 int correlate_end_y = FFMIN(b->height, end_y + 1);
4459 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4460 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4461 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4462 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4465 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4469 STOP_TIMER("decode_subband_slice");
4473 for(; yd<slice_h; yd+=4){
4474 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4476 STOP_TIMER("idwt slice");}
4479 if(s->qlog == LOSSLESS_QLOG){
4480 for(; yq<slice_h && yq<h; yq++){
4481 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4483 line[x] <<= FRAC_BITS;
4488 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4490 y = FFMIN(p->height, slice_starty);
4491 end_y = FFMIN(p->height, slice_h);
4493 slice_buffer_release(&s->sb, y++);
4496 slice_buffer_flush(&s->sb);
4498 STOP_TIMER("idwt + predict_slices")}
4503 if(s->last_picture[s->max_ref_frames-1].data[0])
4504 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4506 if(!(s->avctx->debug&2048))
4507 *picture= s->current_picture;
4509 *picture= s->mconly_picture;
4511 *data_size = sizeof(AVFrame);
4513 bytes_read= c->bytestream - c->bytestream_start;
4514 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4519 static int decode_end(AVCodecContext *avctx)
4521 SnowContext *s = avctx->priv_data;
4523 slice_buffer_destroy(&s->sb);
4530 AVCodec snow_decoder = {
4534 sizeof(SnowContext),
4539 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4543 #ifdef CONFIG_ENCODERS
4544 AVCodec snow_encoder = {
4548 sizeof(SnowContext),
4564 int buffer[2][width*height];
4567 s.spatial_decomposition_count=6;
4568 s.spatial_decomposition_type=1;
4570 printf("testing 5/3 DWT\n");
4571 for(i=0; i<width*height; i++)
4572 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4574 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4575 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4577 for(i=0; i<width*height; i++)
4578 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4580 printf("testing 9/7 DWT\n");
4581 s.spatial_decomposition_type=0;
4582 for(i=0; i<width*height; i++)
4583 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4585 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4586 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4588 for(i=0; i<width*height; i++)
4589 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4592 printf("testing AC coder\n");
4593 memset(s.header_state, 0, sizeof(s.header_state));
4594 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4595 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4597 for(i=-256; i<256; i++){
4599 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4600 STOP_TIMER("put_symbol")
4602 ff_rac_terminate(&s.c);
4604 memset(s.header_state, 0, sizeof(s.header_state));
4605 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4606 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4608 for(i=-256; i<256; i++){
4611 j= get_symbol(&s.c, s.header_state, 1);
4612 STOP_TIMER("get_symbol")
4613 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4617 int level, orientation, x, y;
4618 int64_t errors[8][4];
4621 memset(errors, 0, sizeof(errors));
4622 s.spatial_decomposition_count=3;
4623 s.spatial_decomposition_type=0;
4624 for(level=0; level<s.spatial_decomposition_count; level++){
4625 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4626 int w= width >> (s.spatial_decomposition_count-level);
4627 int h= height >> (s.spatial_decomposition_count-level);
4628 int stride= width << (s.spatial_decomposition_count-level);
4629 DWTELEM *buf= buffer[0];
4632 if(orientation&1) buf+=w;
4633 if(orientation>1) buf+=stride>>1;
4635 memset(buffer[0], 0, sizeof(int)*width*height);
4636 buf[w/2 + h/2*stride]= 256*256;
4637 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4638 for(y=0; y<height; y++){
4639 for(x=0; x<width; x++){
4640 int64_t d= buffer[0][x + y*width];
4642 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4644 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4646 error= (int)(sqrt(error)+0.5);
4647 errors[level][orientation]= error;
4648 if(g) g=ff_gcd(g, error);
4652 printf("static int const visual_weight[][4]={\n");
4653 for(level=0; level<s.spatial_decomposition_count; level++){
4655 for(orientation=0; orientation<4; orientation++){
4656 printf("%8"PRId64",", errors[level][orientation]/g);
4664 int w= width >> (s.spatial_decomposition_count-level);
4665 int h= height >> (s.spatial_decomposition_count-level);
4666 int stride= width << (s.spatial_decomposition_count-level);
4667 DWTELEM *buf= buffer[0];
4673 memset(buffer[0], 0, sizeof(int)*width*height);
4675 for(y=0; y<height; y++){
4676 for(x=0; x<width; x++){
4677 int tab[4]={0,2,3,1};
4678 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4681 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4685 buf[x + y*stride ]=169;
4686 buf[x + y*stride-w]=64;
4689 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4691 for(y=0; y<height; y++){
4692 for(x=0; x<width; x++){
4693 int64_t d= buffer[0][x + y*width];
4695 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4697 if(FFABS(height/2-y)<9) printf("\n");