2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 #include "rangecoder.h"
27 #include "mpegvideo.h"
32 static const int8_t quant3[256]={
33 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
34 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
50 static const int8_t quant3b[256]={
51 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
52 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
68 static const int8_t quant3bA[256]={
69 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
70 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
86 static const int8_t quant5[256]={
87 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
88 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
104 static const int8_t quant7[256]={
105 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
106 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
108 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
119 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
122 static const int8_t quant9[256]={
123 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
124 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
125 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
138 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
140 static const int8_t quant11[256]={
141 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
142 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
143 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
144 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
155 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
156 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
158 static const int8_t quant13[256]={
159 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
160 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
161 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
163 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
172 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
178 static const uint8_t obmc32[1024]={
179 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
181 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
182 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
183 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
184 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
185 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
186 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
187 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
188 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
189 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
190 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
191 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
192 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
193 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
194 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
197 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
198 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
199 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
200 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
201 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
202 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
203 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
204 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
205 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
206 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
207 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
208 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
209 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
210 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 static const uint8_t obmc16[256]={
214 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
215 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
216 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
217 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
218 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
219 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
220 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
221 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
224 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
225 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
226 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
227 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
228 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
229 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
233 static const uint8_t obmc32[1024]={
234 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
235 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
236 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
237 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
238 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
239 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
240 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
241 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
242 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
243 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
244 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
245 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
246 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
247 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
248 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
249 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
252 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
253 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
254 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
255 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
256 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
257 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
258 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
259 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
260 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
261 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
262 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
263 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
264 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
265 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
268 static const uint8_t obmc16[256]={
269 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
270 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
271 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
272 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
273 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
274 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
275 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
276 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
279 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
280 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
281 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
282 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
283 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
284 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
288 static const uint8_t obmc32[1024]={
289 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
290 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
292 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
293 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
294 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
295 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
296 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
297 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
298 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
299 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
300 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
301 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
302 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
303 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
304 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
307 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
308 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
309 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
310 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
311 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
312 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
313 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
314 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
315 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
316 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
317 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
318 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
319 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
323 static const uint8_t obmc16[256]={
324 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
325 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
326 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
327 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
328 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
329 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
330 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
331 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
334 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
335 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
336 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
337 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
338 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
339 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
345 static const uint8_t obmc8[64]={
346 4, 12, 20, 28, 28, 20, 12, 4,
347 12, 36, 60, 84, 84, 60, 36, 12,
348 20, 60,100,140,140,100, 60, 20,
349 28, 84,140,196,196,140, 84, 28,
350 28, 84,140,196,196,140, 84, 28,
351 20, 60,100,140,140,100, 60, 20,
352 12, 36, 60, 84, 84, 60, 36, 12,
353 4, 12, 20, 28, 28, 20, 12, 4,
358 static const uint8_t obmc4[16]={
366 static const uint8_t *obmc_tab[4]={
367 obmc32, obmc16, obmc8, obmc4
370 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
372 typedef struct BlockNode{
378 //#define TYPE_SPLIT 1
379 #define BLOCK_INTRA 1
381 //#define TYPE_NOCOLOR 4
382 uint8_t level; //FIXME merge into type?
385 static const BlockNode null_block= { //FIXME add border maybe
386 .color= {128,128,128},
394 #define LOG2_MB_SIZE 4
395 #define MB_SIZE (1<<LOG2_MB_SIZE)
396 #define ENCODER_EXTRA_BITS 4
399 typedef struct x_and_coeff{
404 typedef struct SubBand{
409 int qlog; ///< log(qscale)/log[2^(1/6)]
414 int stride_line; ///< Stride measured in lines, not pixels.
415 x_and_coeff * x_coeff;
416 struct SubBand *parent;
417 uint8_t state[/*7*2*/ 7 + 512][32];
420 typedef struct Plane{
423 SubBand band[MAX_DECOMPOSITIONS][4];
426 typedef struct SnowContext{
427 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
429 AVCodecContext *avctx;
433 AVFrame input_picture; ///< new_picture with the internal linesizes
434 AVFrame current_picture;
435 AVFrame last_picture[MAX_REF_FRAMES];
436 uint8_t *halfpel_plane[MAX_REF_FRAMES][4][4];
437 AVFrame mconly_picture;
438 // uint8_t q_context[16];
439 uint8_t header_state[32];
440 uint8_t block_state[128 + 32*128];
444 int spatial_decomposition_type;
445 int last_spatial_decomposition_type;
446 int temporal_decomposition_type;
447 int spatial_decomposition_count;
448 int temporal_decomposition_count;
451 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
452 uint32_t *ref_scores[MAX_REF_FRAMES];
453 DWTELEM *spatial_dwt_buffer;
454 IDWTELEM *spatial_idwt_buffer;
458 int spatial_scalability;
468 #define QBIAS_SHIFT 3
472 int last_block_max_depth;
473 Plane plane[MAX_PLANES];
475 #define ME_CACHE_SIZE 1024
476 int me_cache[ME_CACHE_SIZE];
477 int me_cache_generation;
480 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
491 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
492 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
494 static void iterative_me(SnowContext *s);
496 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, IDWTELEM * base_buffer)
500 buf->base_buffer = base_buffer;
501 buf->line_count = line_count;
502 buf->line_width = line_width;
503 buf->data_count = max_allocated_lines;
504 buf->line = av_mallocz (sizeof(IDWTELEM *) * line_count);
505 buf->data_stack = av_malloc (sizeof(IDWTELEM *) * max_allocated_lines);
507 for (i = 0; i < max_allocated_lines; i++)
509 buf->data_stack[i] = av_malloc (sizeof(IDWTELEM) * line_width);
512 buf->data_stack_top = max_allocated_lines - 1;
515 static IDWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
520 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
522 assert(buf->data_stack_top >= 0);
523 // assert(!buf->line[line]);
525 return buf->line[line];
527 offset = buf->line_width * line;
528 buffer = buf->data_stack[buf->data_stack_top];
529 buf->data_stack_top--;
530 buf->line[line] = buffer;
532 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
537 static void slice_buffer_release(slice_buffer * buf, int line)
542 assert(line >= 0 && line < buf->line_count);
543 assert(buf->line[line]);
545 offset = buf->line_width * line;
546 buffer = buf->line[line];
547 buf->data_stack_top++;
548 buf->data_stack[buf->data_stack_top] = buffer;
549 buf->line[line] = NULL;
551 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
554 static void slice_buffer_flush(slice_buffer * buf)
557 for (i = 0; i < buf->line_count; i++)
561 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
562 slice_buffer_release(buf, i);
567 static void slice_buffer_destroy(slice_buffer * buf)
570 slice_buffer_flush(buf);
572 for (i = buf->data_count - 1; i >= 0; i--)
574 assert(buf->data_stack[i]);
575 av_freep(&buf->data_stack[i]);
577 assert(buf->data_stack);
578 av_freep(&buf->data_stack);
580 av_freep(&buf->line);
584 // Avoid a name clash on SGI IRIX
587 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
588 static uint8_t qexp[QROOT];
590 static inline int mirror(int v, int m){
591 while((unsigned)v > (unsigned)m){
598 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
602 const int a= FFABS(v);
603 const int e= av_log2(a);
605 const int el= FFMIN(e, 10);
606 put_rac(c, state+0, 0);
609 put_rac(c, state+1+i, 1); //1..10
612 put_rac(c, state+1+9, 1); //1..10
614 put_rac(c, state+1+FFMIN(i,9), 0);
616 for(i=e-1; i>=el; i--){
617 put_rac(c, state+22+9, (a>>i)&1); //22..31
620 put_rac(c, state+22+i, (a>>i)&1); //22..31
624 put_rac(c, state+11 + el, v < 0); //11..21
627 put_rac(c, state+0, 0);
630 put_rac(c, state+1+i, 1); //1..10
632 put_rac(c, state+1+i, 0);
634 for(i=e-1; i>=0; i--){
635 put_rac(c, state+22+i, (a>>i)&1); //22..31
639 put_rac(c, state+11 + e, v < 0); //11..21
642 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
644 put_rac(c, state+1+FFMIN(i,9), 0);
646 for(i=e-1; i>=0; i--){
647 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
651 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
655 put_rac(c, state+0, 1);
659 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
660 if(get_rac(c, state+0))
665 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
670 for(i=e-1; i>=0; i--){
671 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
674 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
681 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
683 int r= log2>=0 ? 1<<log2 : 1;
689 put_rac(c, state+4+log2, 1);
694 put_rac(c, state+4+log2, 0);
696 for(i=log2-1; i>=0; i--){
697 put_rac(c, state+31-i, (v>>i)&1);
701 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
703 int r= log2>=0 ? 1<<log2 : 1;
708 while(get_rac(c, state+4+log2)){
714 for(i=log2-1; i>=0; i--){
715 v+= get_rac(c, state+31-i)<<i;
721 static av_always_inline void
722 lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
723 int dst_step, int src_step, int ref_step,
724 int width, int mul, int add, int shift,
725 int highpass, int inverse){
726 const int mirror_left= !highpass;
727 const int mirror_right= (width&1) ^ highpass;
728 const int w= (width>>1) - 1 + (highpass & width);
731 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
733 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
740 LIFT(src[i*src_step],
741 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
747 LIFT(src[w*src_step],
748 ((mul*2*ref[w*ref_step]+add)>>shift),
753 static av_always_inline void
754 inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
755 int dst_step, int src_step, int ref_step,
756 int width, int mul, int add, int shift,
757 int highpass, int inverse){
758 const int mirror_left= !highpass;
759 const int mirror_right= (width&1) ^ highpass;
760 const int w= (width>>1) - 1 + (highpass & width);
763 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
765 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
772 LIFT(src[i*src_step],
773 ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift),
779 LIFT(src[w*src_step],
780 ((mul*2*ref[w*ref_step]+add)>>shift),
786 static av_always_inline void
787 liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
788 int dst_step, int src_step, int ref_step,
789 int width, int mul, int add, int shift,
790 int highpass, int inverse){
791 const int mirror_left= !highpass;
792 const int mirror_right= (width&1) ^ highpass;
793 const int w= (width>>1) - 1 + (highpass & width);
797 #define LIFTS(src, ref, inv) \
799 (src) + (((ref) + 4*(src))>>shift): \
800 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
802 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
809 LIFTS(src[i*src_step],
810 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
816 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
819 static av_always_inline void
820 inv_liftS(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
821 int dst_step, int src_step, int ref_step,
822 int width, int mul, int add, int shift,
823 int highpass, int inverse){
824 const int mirror_left= !highpass;
825 const int mirror_right= (width&1) ^ highpass;
826 const int w= (width>>1) - 1 + (highpass & width);
830 #define LIFTS(src, ref, inv) \
832 (src) + (((ref) + 4*(src))>>shift): \
833 -((-16*(src) + (ref) + add/4 + 1 + (5<<25))/(5*4) - (1<<23)))
835 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
842 LIFTS(src[i*src_step],
843 mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add,
849 LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
854 static void horizontal_decompose53i(DWTELEM *b, int width){
856 const int width2= width>>1;
858 const int w2= (width+1)>>1;
860 for(x=0; x<width2; x++){
862 temp[x+w2]= b[2*x + 1];
876 for(x=1; x+1<width2; x+=2){
880 A2 += (A1 + A3 + 2)>>2;
884 A1= temp[x+1+width2];
887 A4 += (A1 + A3 + 2)>>2;
893 A2 += (A1 + A3 + 2)>>2;
898 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
899 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
903 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
906 for(i=0; i<width; i++){
907 b1[i] -= (b0[i] + b2[i])>>1;
911 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
914 for(i=0; i<width; i++){
915 b1[i] += (b0[i] + b2[i] + 2)>>2;
919 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
921 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
922 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
924 for(y=-2; y<height; y+=2){
925 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
926 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
929 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
930 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
931 STOP_TIMER("horizontal_decompose53i")}
934 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
935 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
936 STOP_TIMER("vertical_decompose53i*")}
943 static void horizontal_decompose97i(DWTELEM *b, int width){
945 const int w2= (width+1)>>1;
947 lift (temp+w2, b +1, b , 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
948 liftS(temp , b , temp+w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
949 lift (b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
950 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
954 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
957 for(i=0; i<width; i++){
958 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
962 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
965 for(i=0; i<width; i++){
966 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
970 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
973 for(i=0; i<width; i++){
975 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
977 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + W_BO*5 + (5<<27)) / (5*16) - (1<<23);
982 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
985 for(i=0; i<width; i++){
986 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
990 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
992 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
993 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
994 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
995 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
997 for(y=-4; y<height; y+=2){
998 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
999 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1002 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1003 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1005 STOP_TIMER("horizontal_decompose97i")
1009 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1010 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1011 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1012 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1015 STOP_TIMER("vertical_decompose97i")
1025 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1028 for(level=0; level<decomposition_count; level++){
1030 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1031 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1036 static void horizontal_compose53i(IDWTELEM *b, int width){
1037 IDWTELEM temp[width];
1038 const int width2= width>>1;
1039 const int w2= (width+1)>>1;
1051 for(x=1; x+1<width2; x+=2){
1055 A2 += (A1 + A3 + 2)>>2;
1059 A1= temp[x+1+width2];
1062 A4 += (A1 + A3 + 2)>>2;
1068 A2 += (A1 + A3 + 2)>>2;
1072 inv_lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1073 inv_lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1075 for(x=0; x<width2; x++){
1077 b[2*x + 1]= temp[x+w2];
1083 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1086 for(i=0; i<width; i++){
1087 b1[i] += (b0[i] + b2[i])>>1;
1091 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1094 for(i=0; i<width; i++){
1095 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1099 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1100 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1101 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1105 static void spatial_compose53i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1106 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1107 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1111 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1114 IDWTELEM *b0= cs->b0;
1115 IDWTELEM *b1= cs->b1;
1116 IDWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1117 IDWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1120 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1121 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1122 STOP_TIMER("vertical_compose53i*")}
1125 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1126 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1127 STOP_TIMER("horizontal_compose53i")}
1134 static void spatial_compose53i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1136 IDWTELEM *b0= cs->b0;
1137 IDWTELEM *b1= cs->b1;
1138 IDWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1139 IDWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1142 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1143 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1144 STOP_TIMER("vertical_compose53i*")}
1147 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1148 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1149 STOP_TIMER("horizontal_compose53i")}
1156 static void spatial_compose53i(IDWTELEM *buffer, int width, int height, int stride){
1158 spatial_compose53i_init(&cs, buffer, height, stride);
1159 while(cs.y <= height)
1160 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1164 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width){
1165 IDWTELEM temp[width];
1166 const int w2= (width+1)>>1;
1168 inv_lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1169 inv_lift (temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1170 inv_liftS(b , temp , temp+w2, 2, 1, 1, width, W_BM, W_BO, W_BS, 0, 1);
1171 inv_lift (b+1 , temp+w2, b , 2, 1, 2, width, W_AM, W_AO, W_AS, 1, 0);
1174 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1177 for(i=0; i<width; i++){
1178 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1182 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1185 for(i=0; i<width; i++){
1186 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1190 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1193 for(i=0; i<width; i++){
1195 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1197 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1202 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, int width){
1205 for(i=0; i<width; i++){
1206 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1210 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2, IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5, int width){
1213 for(i=0; i<width; i++){
1214 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1215 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1217 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1219 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1221 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1225 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1226 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1227 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1228 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1229 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1233 static void spatial_compose97i_init(dwt_compose_t *cs, IDWTELEM *buffer, int height, int stride){
1234 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1235 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1236 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1237 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1241 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1244 IDWTELEM *b0= cs->b0;
1245 IDWTELEM *b1= cs->b1;
1246 IDWTELEM *b2= cs->b2;
1247 IDWTELEM *b3= cs->b3;
1248 IDWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1249 IDWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1252 if(y>0 && y+4<height){
1253 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1255 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1256 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1257 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1258 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1261 STOP_TIMER("vertical_compose97i")}}
1264 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1265 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1266 if(width>400 && y+0<(unsigned)height){
1267 STOP_TIMER("horizontal_compose97i")}}
1276 static void spatial_compose97i_dy(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride){
1278 IDWTELEM *b0= cs->b0;
1279 IDWTELEM *b1= cs->b1;
1280 IDWTELEM *b2= cs->b2;
1281 IDWTELEM *b3= cs->b3;
1282 IDWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1283 IDWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1286 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1287 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1288 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1289 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1291 STOP_TIMER("vertical_compose97i")}}
1294 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1295 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1296 if(width>400 && b0 <= b2){
1297 STOP_TIMER("horizontal_compose97i")}}
1306 static void spatial_compose97i(IDWTELEM *buffer, int width, int height, int stride){
1308 spatial_compose97i_init(&cs, buffer, height, stride);
1309 while(cs.y <= height)
1310 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1313 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1315 for(level=decomposition_count-1; level>=0; level--){
1317 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1318 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1323 static void ff_spatial_idwt_init(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1325 for(level=decomposition_count-1; level>=0; level--){
1327 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1328 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1333 static void ff_spatial_idwt_slice(dwt_compose_t *cs, IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1334 const int support = type==1 ? 3 : 5;
1338 for(level=decomposition_count-1; level>=0; level--){
1339 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1341 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1343 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1350 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1351 const int support = type==1 ? 3 : 5;
1355 for(level=decomposition_count-1; level>=0; level--){
1356 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1358 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1360 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1367 static void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1368 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1370 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1371 for(y=0; y<height; y+=4)
1372 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1375 static int encode_subband_c0run(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1376 const int w= b->width;
1377 const int h= b->height;
1389 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1390 v= src[x + y*stride];
1393 t= src[x + (y-1)*stride];
1395 lt= src[x - 1 + (y-1)*stride];
1398 rt= src[x + 1 + (y-1)*stride];
1402 l= src[x - 1 + y*stride];
1404 if(orientation==1) ll= src[y + (x-2)*stride];
1405 else ll= src[x - 2 + y*stride];
1411 if(px<b->parent->width && py<b->parent->height)
1412 p= parent[px + py*2*stride];
1414 if(!(/*ll|*/l|lt|t|rt|p)){
1416 runs[run_index++]= run;
1424 max_index= run_index;
1425 runs[run_index++]= run;
1427 run= runs[run_index++];
1429 put_symbol2(&s->c, b->state[30], max_index, 0);
1430 if(run_index <= max_index)
1431 put_symbol2(&s->c, b->state[1], run, 3);
1434 if(s->c.bytestream_end - s->c.bytestream < w*40){
1435 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1440 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1441 v= src[x + y*stride];
1444 t= src[x + (y-1)*stride];
1446 lt= src[x - 1 + (y-1)*stride];
1449 rt= src[x + 1 + (y-1)*stride];
1453 l= src[x - 1 + y*stride];
1455 if(orientation==1) ll= src[y + (x-2)*stride];
1456 else ll= src[x - 2 + y*stride];
1462 if(px<b->parent->width && py<b->parent->height)
1463 p= parent[px + py*2*stride];
1465 if(/*ll|*/l|lt|t|rt|p){
1466 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1468 put_rac(&s->c, &b->state[0][context], !!v);
1471 run= runs[run_index++];
1473 if(run_index <= max_index)
1474 put_symbol2(&s->c, b->state[1], run, 3);
1482 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1483 int l2= 2*FFABS(l) + (l<0);
1484 int t2= 2*FFABS(t) + (t<0);
1486 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1487 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1495 static int encode_subband(SnowContext *s, SubBand *b, IDWTELEM *src, IDWTELEM *parent, int stride, int orientation){
1496 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1497 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1498 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1499 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1502 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1503 const int w= b->width;
1504 const int h= b->height;
1509 x_and_coeff *xc= b->x_coeff;
1510 x_and_coeff *prev_xc= NULL;
1511 x_and_coeff *prev2_xc= xc;
1512 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1513 x_and_coeff *prev_parent_xc= parent_xc;
1515 runs= get_symbol2(&s->c, b->state[30], 0);
1516 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1521 int lt=0, t=0, rt=0;
1523 if(y && prev_xc->x == 0){
1535 if(prev_xc->x == x + 1)
1541 if(x>>1 > parent_xc->x){
1544 if(x>>1 == parent_xc->x){
1545 p= parent_xc->coeff;
1548 if(/*ll|*/l|lt|t|rt|p){
1549 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1551 v=get_rac(&s->c, &b->state[0][context]);
1553 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1554 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1561 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1563 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1564 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1573 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1574 else max_run= FFMIN(run, w-x-1);
1576 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1582 (xc++)->x= w+1; //end marker
1588 while(parent_xc->x != parent->width+1)
1591 prev_parent_xc= parent_xc;
1593 parent_xc= prev_parent_xc;
1598 (xc++)->x= w+1; //end marker
1602 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1603 const int w= b->width;
1605 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1606 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1607 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1612 if(b->ibuf == s->spatial_idwt_buffer || s->qlog == LOSSLESS_QLOG){
1617 /* If we are on the second or later slice, restore our index. */
1619 new_index = save_state[0];
1622 for(y=start_y; y<h; y++){
1625 IDWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1626 memset(line, 0, b->width*sizeof(IDWTELEM));
1627 v = b->x_coeff[new_index].coeff;
1628 x = b->x_coeff[new_index++].x;
1631 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1632 register int u= -(v&1);
1633 line[x] = (t^u) - u;
1635 v = b->x_coeff[new_index].coeff;
1636 x = b->x_coeff[new_index++].x;
1639 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1640 STOP_TIMER("decode_subband")
1643 /* Save our variables for the next slice. */
1644 save_state[0] = new_index;
1649 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1650 int plane_index, level, orientation;
1652 for(plane_index=0; plane_index<3; plane_index++){
1653 for(level=0; level<s->spatial_decomposition_count; level++){
1654 for(orientation=level ? 1:0; orientation<4; orientation++){
1655 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1659 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1660 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1663 static int alloc_blocks(SnowContext *s){
1664 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1665 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1670 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1674 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1675 uint8_t *bytestream= d->bytestream;
1676 uint8_t *bytestream_start= d->bytestream_start;
1678 d->bytestream= bytestream;
1679 d->bytestream_start= bytestream_start;
1682 //near copy & paste from dsputil, FIXME
1683 static int pix_sum(uint8_t * pix, int line_size, int w)
1688 for (i = 0; i < w; i++) {
1689 for (j = 0; j < w; j++) {
1693 pix += line_size - w;
1698 //near copy & paste from dsputil, FIXME
1699 static int pix_norm1(uint8_t * pix, int line_size, int w)
1702 uint32_t *sq = ff_squareTbl + 256;
1705 for (i = 0; i < w; i++) {
1706 for (j = 0; j < w; j ++) {
1710 pix += line_size - w;
1715 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1716 const int w= s->b_width << s->block_max_depth;
1717 const int rem_depth= s->block_max_depth - level;
1718 const int index= (x + y*w) << rem_depth;
1719 const int block_w= 1<<rem_depth;
1732 for(j=0; j<block_w; j++){
1733 for(i=0; i<block_w; i++){
1734 s->block[index + i + j*w]= block;
1739 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1740 const int offset[3]= {
1742 ((y*c->uvstride + x)>>1),
1743 ((y*c->uvstride + x)>>1),
1747 c->src[0][i]= src [i];
1748 c->ref[0][i]= ref [i] + offset[i];
1753 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1754 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1755 if(s->ref_frames == 1){
1756 *mx = mid_pred(left->mx, top->mx, tr->mx);
1757 *my = mid_pred(left->my, top->my, tr->my);
1759 const int *scale = scale_mv_ref[ref];
1760 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1761 (top ->mx * scale[top ->ref] + 128) >>8,
1762 (tr ->mx * scale[tr ->ref] + 128) >>8);
1763 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1764 (top ->my * scale[top ->ref] + 128) >>8,
1765 (tr ->my * scale[tr ->ref] + 128) >>8);
1772 #define P_TOPRIGHT P[3]
1773 #define P_MEDIAN P[4]
1775 #define FLAG_QPEL 1 //must be 1
1777 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1778 uint8_t p_buffer[1024];
1779 uint8_t i_buffer[1024];
1780 uint8_t p_state[sizeof(s->block_state)];
1781 uint8_t i_state[sizeof(s->block_state)];
1783 uint8_t *pbbak= s->c.bytestream;
1784 uint8_t *pbbak_start= s->c.bytestream_start;
1785 int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
1786 const int w= s->b_width << s->block_max_depth;
1787 const int h= s->b_height << s->block_max_depth;
1788 const int rem_depth= s->block_max_depth - level;
1789 const int index= (x + y*w) << rem_depth;
1790 const int block_w= 1<<(LOG2_MB_SIZE - level);
1791 int trx= (x+1)<<rem_depth;
1792 int try= (y+1)<<rem_depth;
1793 const BlockNode *left = x ? &s->block[index-1] : &null_block;
1794 const BlockNode *top = y ? &s->block[index-w] : &null_block;
1795 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
1796 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
1797 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
1798 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
1799 int pl = left->color[0];
1800 int pcb= left->color[1];
1801 int pcr= left->color[2];
1805 const int stride= s->current_picture.linesize[0];
1806 const int uvstride= s->current_picture.linesize[1];
1807 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
1808 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
1809 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
1811 int16_t last_mv[3][2];
1812 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
1813 const int shift= 1+qpel;
1814 MotionEstContext *c= &s->m.me;
1815 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
1816 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
1817 int my_context= av_log2(2*FFABS(left->my - top->my));
1818 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
1819 int ref, best_ref, ref_score, ref_mx, ref_my;
1821 assert(sizeof(s->block_state) >= 256);
1823 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
1827 // clip predictors / edge ?
1829 P_LEFT[0]= left->mx;
1830 P_LEFT[1]= left->my;
1833 P_TOPRIGHT[0]= tr->mx;
1834 P_TOPRIGHT[1]= tr->my;
1836 last_mv[0][0]= s->block[index].mx;
1837 last_mv[0][1]= s->block[index].my;
1838 last_mv[1][0]= right->mx;
1839 last_mv[1][1]= right->my;
1840 last_mv[2][0]= bottom->mx;
1841 last_mv[2][1]= bottom->my;
1848 assert(c-> stride == stride);
1849 assert(c->uvstride == uvstride);
1851 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
1852 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
1853 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
1854 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
1856 c->xmin = - x*block_w - 16+2;
1857 c->ymin = - y*block_w - 16+2;
1858 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1859 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
1861 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
1862 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
1863 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
1864 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
1865 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
1866 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
1867 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
1869 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1870 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1873 c->pred_x= P_LEFT[0];
1874 c->pred_y= P_LEFT[1];
1876 c->pred_x = P_MEDIAN[0];
1877 c->pred_y = P_MEDIAN[1];
1882 for(ref=0; ref<s->ref_frames; ref++){
1883 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
1885 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
1886 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
1888 assert(ref_mx >= c->xmin);
1889 assert(ref_mx <= c->xmax);
1890 assert(ref_my >= c->ymin);
1891 assert(ref_my <= c->ymax);
1893 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
1894 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
1895 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
1896 if(s->ref_mvs[ref]){
1897 s->ref_mvs[ref][index][0]= ref_mx;
1898 s->ref_mvs[ref][index][1]= ref_my;
1899 s->ref_scores[ref][index]= ref_score;
1901 if(score > ref_score){
1908 //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
1911 base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
1913 pc.bytestream_start=
1914 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
1915 memcpy(p_state, s->block_state, sizeof(s->block_state));
1917 if(level!=s->block_max_depth)
1918 put_rac(&pc, &p_state[4 + s_context], 1);
1919 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
1920 if(s->ref_frames > 1)
1921 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
1922 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
1923 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
1924 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
1925 p_len= pc.bytestream - pc.bytestream_start;
1926 score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
1928 block_s= block_w*block_w;
1929 sum = pix_sum(current_data[0], stride, block_w);
1930 l= (sum + block_s/2)/block_s;
1931 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
1933 block_s= block_w*block_w>>2;
1934 sum = pix_sum(current_data[1], uvstride, block_w>>1);
1935 cb= (sum + block_s/2)/block_s;
1936 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
1937 sum = pix_sum(current_data[2], uvstride, block_w>>1);
1938 cr= (sum + block_s/2)/block_s;
1939 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
1942 ic.bytestream_start=
1943 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
1944 memcpy(i_state, s->block_state, sizeof(s->block_state));
1945 if(level!=s->block_max_depth)
1946 put_rac(&ic, &i_state[4 + s_context], 1);
1947 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
1948 put_symbol(&ic, &i_state[32], l-pl , 1);
1949 put_symbol(&ic, &i_state[64], cb-pcb, 1);
1950 put_symbol(&ic, &i_state[96], cr-pcr, 1);
1951 i_len= ic.bytestream - ic.bytestream_start;
1952 iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
1954 // assert(score==256*256*256*64-1);
1955 assert(iscore < 255*255*256 + s->lambda2*10);
1956 assert(iscore >= 0);
1957 assert(l>=0 && l<=255);
1958 assert(pl>=0 && pl<=255);
1961 int varc= iscore >> 8;
1962 int vard= score >> 8;
1963 if (vard <= 64 || vard < varc)
1964 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1966 c->scene_change_score+= s->m.qscale;
1969 if(level!=s->block_max_depth){
1970 put_rac(&s->c, &s->block_state[4 + s_context], 0);
1971 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
1972 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
1973 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
1974 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
1975 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
1977 if(score2 < score && score2 < iscore)
1982 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
1983 memcpy(pbbak, i_buffer, i_len);
1985 s->c.bytestream_start= pbbak_start;
1986 s->c.bytestream= pbbak + i_len;
1987 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
1988 memcpy(s->block_state, i_state, sizeof(s->block_state));
1991 memcpy(pbbak, p_buffer, p_len);
1993 s->c.bytestream_start= pbbak_start;
1994 s->c.bytestream= pbbak + p_len;
1995 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
1996 memcpy(s->block_state, p_state, sizeof(s->block_state));
2001 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2002 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2003 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2005 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2009 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2010 const int w= s->b_width << s->block_max_depth;
2011 const int rem_depth= s->block_max_depth - level;
2012 const int index= (x + y*w) << rem_depth;
2013 int trx= (x+1)<<rem_depth;
2014 BlockNode *b= &s->block[index];
2015 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2016 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2017 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2018 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2019 int pl = left->color[0];
2020 int pcb= left->color[1];
2021 int pcr= left->color[2];
2023 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2024 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2025 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2029 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2033 if(level!=s->block_max_depth){
2034 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2035 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2037 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2038 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2039 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2040 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2041 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2045 if(b->type & BLOCK_INTRA){
2046 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2047 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2048 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2049 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2050 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2051 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2053 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2054 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2055 if(s->ref_frames > 1)
2056 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2057 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2058 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2059 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2063 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2064 const int w= s->b_width << s->block_max_depth;
2065 const int rem_depth= s->block_max_depth - level;
2066 const int index= (x + y*w) << rem_depth;
2067 int trx= (x+1)<<rem_depth;
2068 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2069 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2070 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2071 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2072 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2075 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2079 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2081 int l = left->color[0];
2082 int cb= left->color[1];
2083 int cr= left->color[2];
2085 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2086 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2087 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2089 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2092 pred_mv(s, &mx, &my, 0, left, top, tr);
2093 l += get_symbol(&s->c, &s->block_state[32], 1);
2094 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2095 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2097 if(s->ref_frames > 1)
2098 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2099 pred_mv(s, &mx, &my, ref, left, top, tr);
2100 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2101 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2103 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2105 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2106 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2107 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2108 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2112 static void encode_blocks(SnowContext *s, int search){
2117 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2121 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2122 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2126 if(s->avctx->me_method == ME_ITER || !search)
2127 encode_q_branch2(s, 0, x, y);
2129 encode_q_branch (s, 0, x, y);
2134 static void decode_blocks(SnowContext *s){
2141 decode_q_branch(s, 0, x, y);
2146 static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2149 for(y=0; y < b_h+HTAPS-1; y++){
2150 for(x=0; x < b_w; x++){
2151 int a_2=src[x + HTAPS/2-5];
2152 int a_1=src[x + HTAPS/2-4];
2153 int a0= src[x + HTAPS/2-3];
2154 int a1= src[x + HTAPS/2-2];
2155 int a2= src[x + HTAPS/2-1];
2156 int a3= src[x + HTAPS/2+0];
2157 int a4= src[x + HTAPS/2+1];
2158 int a5= src[x + HTAPS/2+2];
2159 int a6= src[x + HTAPS/2+3];
2160 int a7= src[x + HTAPS/2+4];
2161 // int am= 9*(a1+a2) - (a0+a3);
2163 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2165 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2167 // int am= 18*(a2+a3) - 2*(a1+a4);
2168 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2169 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2171 // if(b_w==16) am= 8*(a1+a2);
2173 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2174 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2176 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2177 if(am&(~255)) am= ~(am>>31);
2181 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2182 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2183 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2184 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2189 tmp -= (b_h+HTAPS-1)*stride;
2191 for(y=0; y < b_h; y++){
2192 for(x=0; x < b_w; x++){
2193 int a_2=tmp[x + (HTAPS/2-5)*stride];
2194 int a_1=tmp[x + (HTAPS/2-4)*stride];
2195 int a0= tmp[x + (HTAPS/2-3)*stride];
2196 int a1= tmp[x + (HTAPS/2-2)*stride];
2197 int a2= tmp[x + (HTAPS/2-1)*stride];
2198 int a3= tmp[x + (HTAPS/2+0)*stride];
2199 int a4= tmp[x + (HTAPS/2+1)*stride];
2200 int a5= tmp[x + (HTAPS/2+2)*stride];
2201 int a6= tmp[x + (HTAPS/2+3)*stride];
2202 int a7= tmp[x + (HTAPS/2+4)*stride];
2204 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2206 int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
2208 // int am= 18*(a2+a3) - 2*(a1+a4);
2209 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2210 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2212 // if(b_w==16) am= 8*(a1+a2);
2214 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2215 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2217 if(am&(~255)) am= ~(am>>31);
2220 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2221 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2222 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2223 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2228 STOP_TIMER("mc_block")
2231 #define mca(dx,dy,b_w)\
2232 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
2233 uint8_t tmp[stride*(b_w+HTAPS-1)];\
2235 mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
2247 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2248 if(block->type & BLOCK_INTRA){
2250 const int color = block->color[plane_index];
2251 const int color4= color*0x01010101;
2253 for(y=0; y < b_h; y++){
2254 *(uint32_t*)&dst[0 + y*stride]= color4;
2255 *(uint32_t*)&dst[4 + y*stride]= color4;
2256 *(uint32_t*)&dst[8 + y*stride]= color4;
2257 *(uint32_t*)&dst[12+ y*stride]= color4;
2258 *(uint32_t*)&dst[16+ y*stride]= color4;
2259 *(uint32_t*)&dst[20+ y*stride]= color4;
2260 *(uint32_t*)&dst[24+ y*stride]= color4;
2261 *(uint32_t*)&dst[28+ y*stride]= color4;
2264 for(y=0; y < b_h; y++){
2265 *(uint32_t*)&dst[0 + y*stride]= color4;
2266 *(uint32_t*)&dst[4 + y*stride]= color4;
2267 *(uint32_t*)&dst[8 + y*stride]= color4;
2268 *(uint32_t*)&dst[12+ y*stride]= color4;
2271 for(y=0; y < b_h; y++){
2272 *(uint32_t*)&dst[0 + y*stride]= color4;
2273 *(uint32_t*)&dst[4 + y*stride]= color4;
2276 for(y=0; y < b_h; y++){
2277 *(uint32_t*)&dst[0 + y*stride]= color4;
2280 for(y=0; y < b_h; y++){
2281 for(x=0; x < b_w; x++){
2282 dst[x + y*stride]= color;
2287 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2288 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2289 int mx= block->mx*scale;
2290 int my= block->my*scale;
2291 const int dx= mx&15;
2292 const int dy= my&15;
2293 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2294 sx += (mx>>4) - (HTAPS/2-1);
2295 sy += (my>>4) - (HTAPS/2-1);
2296 src += sx + sy*stride;
2297 if( (unsigned)sx >= w - b_w - (HTAPS-2)
2298 || (unsigned)sy >= h - b_h - (HTAPS-2)){
2299 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+HTAPS-1, b_h+HTAPS-1, sx, sy, w, h);
2302 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2303 // assert(!(b_w&(b_w-1)));
2304 assert(b_w>1 && b_h>1);
2305 assert(tab_index>=0 && tab_index<4 || b_w==32);
2306 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6)
2307 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2310 for(y=0; y<b_h; y+=16){
2311 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2312 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2315 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2316 else if(b_w==2*b_h){
2317 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2318 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2321 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2322 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2327 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2328 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2331 for(y=0; y<b_h; y++){
2332 //FIXME ugly misuse of obmc_stride
2333 const uint8_t *obmc1= obmc + y*obmc_stride;
2334 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2335 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2336 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2337 dst = slice_buffer_get_line(sb, src_y + y);
2338 for(x=0; x<b_w; x++){
2339 int v= obmc1[x] * block[3][x + y*src_stride]
2340 +obmc2[x] * block[2][x + y*src_stride]
2341 +obmc3[x] * block[1][x + y*src_stride]
2342 +obmc4[x] * block[0][x + y*src_stride];
2344 v <<= 8 - LOG2_OBMC_MAX;
2346 v >>= 8 - FRAC_BITS;
2349 v += dst[x + src_x];
2350 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2351 if(v&(~255)) v= ~(v>>31);
2352 dst8[x + y*src_stride] = v;
2354 dst[x + src_x] -= v;
2360 //FIXME name clenup (b_w, block_w, b_width stuff)
2361 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, IDWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2362 const int b_width = s->b_width << s->block_max_depth;
2363 const int b_height= s->b_height << s->block_max_depth;
2364 const int b_stride= b_width;
2365 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2366 BlockNode *rt= lt+1;
2367 BlockNode *lb= lt+b_stride;
2368 BlockNode *rb= lb+1;
2370 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2371 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2378 }else if(b_x + 1 >= b_width){
2385 }else if(b_y + 1 >= b_height){
2390 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2393 if(!sliced && !offset_dst)
2396 }else if(src_x + b_w > w){
2400 obmc -= src_y*obmc_stride;
2402 if(!sliced && !offset_dst)
2403 dst -= src_y*dst_stride;
2405 }else if(src_y + b_h> h){
2409 if(b_w<=0 || b_h<=0) return;
2411 assert(src_stride > 2*MB_SIZE + 5);
2412 if(!sliced && offset_dst)
2413 dst += src_x + src_y*dst_stride;
2414 dst8+= src_x + src_y*src_stride;
2415 // src += src_x + src_y*src_stride;
2417 ptmp= tmp + 3*tmp_step;
2420 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2422 if(same_block(lt, rt)){
2427 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2430 if(same_block(lt, lb)){
2432 }else if(same_block(rt, lb)){
2437 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2440 if(same_block(lt, rb) ){
2442 }else if(same_block(rt, rb)){
2444 }else if(same_block(lb, rb)){
2448 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2451 for(y=0; y<b_h; y++){
2452 for(x=0; x<b_w; x++){
2453 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2454 if(add) dst[x + y*dst_stride] += v;
2455 else dst[x + y*dst_stride] -= v;
2458 for(y=0; y<b_h; y++){
2459 uint8_t *obmc2= obmc + (obmc_stride>>1);
2460 for(x=0; x<b_w; x++){
2461 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2462 if(add) dst[x + y*dst_stride] += v;
2463 else dst[x + y*dst_stride] -= v;
2466 for(y=0; y<b_h; y++){
2467 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2468 for(x=0; x<b_w; x++){
2469 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2470 if(add) dst[x + y*dst_stride] += v;
2471 else dst[x + y*dst_stride] -= v;
2474 for(y=0; y<b_h; y++){
2475 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2476 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2477 for(x=0; x<b_w; x++){
2478 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2479 if(add) dst[x + y*dst_stride] += v;
2480 else dst[x + y*dst_stride] -= v;
2487 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2488 STOP_TIMER("inner_add_yblock")
2490 for(y=0; y<b_h; y++){
2491 //FIXME ugly misuse of obmc_stride
2492 const uint8_t *obmc1= obmc + y*obmc_stride;
2493 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2494 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2495 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2496 for(x=0; x<b_w; x++){
2497 int v= obmc1[x] * block[3][x + y*src_stride]
2498 +obmc2[x] * block[2][x + y*src_stride]
2499 +obmc3[x] * block[1][x + y*src_stride]
2500 +obmc4[x] * block[0][x + y*src_stride];
2502 v <<= 8 - LOG2_OBMC_MAX;
2504 v >>= 8 - FRAC_BITS;
2507 v += dst[x + y*dst_stride];
2508 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2509 if(v&(~255)) v= ~(v>>31);
2510 dst8[x + y*src_stride] = v;
2512 dst[x + y*dst_stride] -= v;
2519 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, IDWTELEM * old_buffer, int plane_index, int add, int mb_y){
2520 Plane *p= &s->plane[plane_index];
2521 const int mb_w= s->b_width << s->block_max_depth;
2522 const int mb_h= s->b_height << s->block_max_depth;
2524 int block_size = MB_SIZE >> s->block_max_depth;
2525 int block_w = plane_index ? block_size/2 : block_size;
2526 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2527 int obmc_stride= plane_index ? block_size : 2*block_size;
2528 int ref_stride= s->current_picture.linesize[plane_index];
2529 uint8_t *dst8= s->current_picture.data[plane_index];
2534 if(s->keyframe || (s->avctx->debug&512)){
2539 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2541 // DWTELEM * line = slice_buffer_get_line(sb, y);
2542 IDWTELEM * line = sb->line[y];
2545 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2546 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2548 if(v&(~255)) v= ~(v>>31);
2549 dst8[x + y*ref_stride]= v;
2553 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2555 // DWTELEM * line = slice_buffer_get_line(sb, y);
2556 IDWTELEM * line = sb->line[y];
2559 line[x] -= 128 << FRAC_BITS;
2560 // buf[x + y*w]-= 128<<FRAC_BITS;
2568 for(mb_x=0; mb_x<=mb_w; mb_x++){
2571 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2572 block_w*mb_x - block_w/2,
2573 block_w*mb_y - block_w/2,
2576 w, ref_stride, obmc_stride,
2578 add, 0, plane_index);
2580 STOP_TIMER("add_yblock")
2583 STOP_TIMER("predict_slice")
2586 static av_always_inline void predict_slice(SnowContext *s, IDWTELEM *buf, int plane_index, int add, int mb_y){
2587 Plane *p= &s->plane[plane_index];
2588 const int mb_w= s->b_width << s->block_max_depth;
2589 const int mb_h= s->b_height << s->block_max_depth;
2591 int block_size = MB_SIZE >> s->block_max_depth;
2592 int block_w = plane_index ? block_size/2 : block_size;
2593 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2594 const int obmc_stride= plane_index ? block_size : 2*block_size;
2595 int ref_stride= s->current_picture.linesize[plane_index];
2596 uint8_t *dst8= s->current_picture.data[plane_index];
2601 if(s->keyframe || (s->avctx->debug&512)){
2606 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2608 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2610 if(v&(~255)) v= ~(v>>31);
2611 dst8[x + y*ref_stride]= v;
2615 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2617 buf[x + y*w]-= 128<<FRAC_BITS;
2625 for(mb_x=0; mb_x<=mb_w; mb_x++){
2628 add_yblock(s, 0, NULL, buf, dst8, obmc,
2629 block_w*mb_x - block_w/2,
2630 block_w*mb_y - block_w/2,
2633 w, ref_stride, obmc_stride,
2635 add, 1, plane_index);
2637 STOP_TIMER("add_yblock")
2640 STOP_TIMER("predict_slice")
2643 static av_always_inline void predict_plane(SnowContext *s, IDWTELEM *buf, int plane_index, int add){
2644 const int mb_h= s->b_height << s->block_max_depth;
2646 for(mb_y=0; mb_y<=mb_h; mb_y++)
2647 predict_slice(s, buf, plane_index, add, mb_y);
2650 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2652 Plane *p= &s->plane[plane_index];
2653 const int block_size = MB_SIZE >> s->block_max_depth;
2654 const int block_w = plane_index ? block_size/2 : block_size;
2655 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2656 const int obmc_stride= plane_index ? block_size : 2*block_size;
2657 const int ref_stride= s->current_picture.linesize[plane_index];
2658 uint8_t *src= s-> input_picture.data[plane_index];
2659 IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
2660 const int b_stride = s->b_width << s->block_max_depth;
2661 const int w= p->width;
2662 const int h= p->height;
2663 int index= mb_x + mb_y*b_stride;
2664 BlockNode *b= &s->block[index];
2665 BlockNode backup= *b;
2669 b->type|= BLOCK_INTRA;
2670 b->color[plane_index]= 0;
2671 memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
2674 int mb_x2= mb_x + (i &1) - 1;
2675 int mb_y2= mb_y + (i>>1) - 1;
2676 int x= block_w*mb_x2 + block_w/2;
2677 int y= block_w*mb_y2 + block_w/2;
2679 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2680 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2682 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2683 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2684 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2685 int obmc_v= obmc[index];
2687 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2688 if(x<0) obmc_v += obmc[index + block_w];
2689 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2690 if(x+block_w>w) obmc_v += obmc[index - block_w];
2691 //FIXME precalc this or simplify it somehow else
2693 d = -dst[index] + (1<<(FRAC_BITS-1));
2695 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2696 aa += obmc_v * obmc_v; //FIXME precalclate this
2702 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we should not need clipping
2705 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2706 const int b_stride = s->b_width << s->block_max_depth;
2707 const int b_height = s->b_height<< s->block_max_depth;
2708 int index= x + y*b_stride;
2709 const BlockNode *b = &s->block[index];
2710 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2711 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2712 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2713 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2715 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2716 // int my_context= av_log2(2*FFABS(left->my - top->my));
2718 if(x<0 || x>=b_stride || y>=b_height)
2725 00001XXXX 15-30 8-15
2727 //FIXME try accurate rate
2728 //FIXME intra and inter predictors if surrounding blocks arent the same type
2729 if(b->type & BLOCK_INTRA){
2730 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2731 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2732 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2734 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2737 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2738 + av_log2(2*FFABS(dmy))
2739 + av_log2(2*b->ref));
2743 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2744 Plane *p= &s->plane[plane_index];
2745 const int block_size = MB_SIZE >> s->block_max_depth;
2746 const int block_w = plane_index ? block_size/2 : block_size;
2747 const int obmc_stride= plane_index ? block_size : 2*block_size;
2748 const int ref_stride= s->current_picture.linesize[plane_index];
2749 uint8_t *dst= s->current_picture.data[plane_index];
2750 uint8_t *src= s-> input_picture.data[plane_index];
2751 IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2752 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2753 uint8_t tmp[ref_stride*(2*MB_SIZE+HTAPS-1)];
2754 const int b_stride = s->b_width << s->block_max_depth;
2755 const int b_height = s->b_height<< s->block_max_depth;
2756 const int w= p->width;
2757 const int h= p->height;
2760 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2761 int sx= block_w*mb_x - block_w/2;
2762 int sy= block_w*mb_y - block_w/2;
2763 int x0= FFMAX(0,-sx);
2764 int y0= FFMAX(0,-sy);
2765 int x1= FFMIN(block_w*2, w-sx);
2766 int y1= FFMIN(block_w*2, h-sy);
2769 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2771 for(y=y0; y<y1; y++){
2772 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2773 const IDWTELEM *pred1 = pred + y*obmc_stride;
2774 uint8_t *cur1 = cur + y*ref_stride;
2775 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2776 for(x=x0; x<x1; x++){
2777 #if FRAC_BITS >= LOG2_OBMC_MAX
2778 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2780 int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
2782 v = (v + pred1[x]) >> FRAC_BITS;
2783 if(v&(~255)) v= ~(v>>31);
2788 /* copy the regions where obmc[] = (uint8_t)256 */
2789 if(LOG2_OBMC_MAX == 8
2790 && (mb_x == 0 || mb_x == b_stride-1)
2791 && (mb_y == 0 || mb_y == b_height-1)){
2800 for(y=y0; y<y1; y++)
2801 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
2805 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
2806 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
2807 /* FIXME cmps overlap but don't cover the wavelet's whole support,
2808 * so improving the score of one block is not strictly guaranteed to
2809 * improve the score of the whole frame, so iterative motion est
2810 * doesn't always converge. */
2811 if(s->avctx->me_cmp == FF_CMP_W97)
2812 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2813 else if(s->avctx->me_cmp == FF_CMP_W53)
2814 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
2818 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
2819 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
2824 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
2833 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
2835 if(mb_x == b_stride-2)
2836 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
2838 return distortion + rate*penalty_factor;
2841 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
2843 Plane *p= &s->plane[plane_index];
2844 const int block_size = MB_SIZE >> s->block_max_depth;
2845 const int block_w = plane_index ? block_size/2 : block_size;
2846 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2847 const int obmc_stride= plane_index ? block_size : 2*block_size;
2848 const int ref_stride= s->current_picture.linesize[plane_index];
2849 uint8_t *dst= s->current_picture.data[plane_index];
2850 uint8_t *src= s-> input_picture.data[plane_index];
2851 static const IDWTELEM zero_dst[4096]; //FIXME
2852 const int b_stride = s->b_width << s->block_max_depth;
2853 const int w= p->width;
2854 const int h= p->height;
2857 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2860 int mb_x2= mb_x + (i%3) - 1;
2861 int mb_y2= mb_y + (i/3) - 1;
2862 int x= block_w*mb_x2 + block_w/2;
2863 int y= block_w*mb_y2 + block_w/2;
2865 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
2866 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
2868 //FIXME find a cleaner/simpler way to skip the outside stuff
2869 for(y2= y; y2<0; y2++)
2870 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2871 for(y2= h; y2<y+block_w; y2++)
2872 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
2874 for(y2= y; y2<y+block_w; y2++)
2875 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
2878 for(y2= y; y2<y+block_w; y2++)
2879 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
2882 assert(block_w== 8 || block_w==16);
2883 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
2887 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
2888 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
2896 rate = get_block_bits(s, mb_x, mb_y, 2);
2897 for(i=merged?4:0; i<9; i++){
2898 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
2899 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
2902 return distortion + rate*penalty_factor;
2905 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
2906 const int b_stride= s->b_width << s->block_max_depth;
2907 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2908 BlockNode backup= *block;
2909 int rd, index, value;
2911 assert(mb_x>=0 && mb_y>=0);
2912 assert(mb_x<b_stride);
2915 block->color[0] = p[0];
2916 block->color[1] = p[1];
2917 block->color[2] = p[2];
2918 block->type |= BLOCK_INTRA;
2920 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
2921 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
2922 if(s->me_cache[index] == value)
2924 s->me_cache[index]= value;
2928 block->type &= ~BLOCK_INTRA;
2931 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
2943 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
2944 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
2945 int p[2] = {p0, p1};
2946 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
2949 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
2950 const int b_stride= s->b_width << s->block_max_depth;
2951 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
2952 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
2953 int rd, index, value;
2955 assert(mb_x>=0 && mb_y>=0);
2956 assert(mb_x<b_stride);
2957 assert(((mb_x|mb_y)&1) == 0);
2959 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
2960 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
2961 if(s->me_cache[index] == value)
2963 s->me_cache[index]= value;
2968 block->type &= ~BLOCK_INTRA;
2969 block[1]= block[b_stride]= block[b_stride+1]= *block;
2971 rd= get_4block_rd(s, mb_x, mb_y, 0);
2978 block[0]= backup[0];
2979 block[1]= backup[1];
2980 block[b_stride]= backup[2];
2981 block[b_stride+1]= backup[3];
2986 static void iterative_me(SnowContext *s){
2987 int pass, mb_x, mb_y;
2988 const int b_width = s->b_width << s->block_max_depth;
2989 const int b_height= s->b_height << s->block_max_depth;
2990 const int b_stride= b_width;
2994 RangeCoder r = s->c;
2995 uint8_t state[sizeof(s->block_state)];
2996 memcpy(state, s->block_state, sizeof(s->block_state));
2997 for(mb_y= 0; mb_y<s->b_height; mb_y++)
2998 for(mb_x= 0; mb_x<s->b_width; mb_x++)
2999 encode_q_branch(s, 0, mb_x, mb_y);
3001 memcpy(s->block_state, state, sizeof(s->block_state));
3004 for(pass=0; pass<25; pass++){
3007 for(mb_y= 0; mb_y<b_height; mb_y++){
3008 for(mb_x= 0; mb_x<b_width; mb_x++){
3009 int dia_change, i, j, ref;
3010 int best_rd= INT_MAX, ref_rd;
3011 BlockNode backup, ref_b;
3012 const int index= mb_x + mb_y * b_stride;
3013 BlockNode *block= &s->block[index];
3014 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3015 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3016 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3017 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3018 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3019 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3020 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3021 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3022 const int b_w= (MB_SIZE >> s->block_max_depth);
3023 uint8_t obmc_edged[b_w*2][b_w*2];
3025 if(pass && (block->type & BLOCK_OPT))
3027 block->type |= BLOCK_OPT;
3031 if(!s->me_cache_generation)
3032 memset(s->me_cache, 0, sizeof(s->me_cache));
3033 s->me_cache_generation += 1<<22;
3038 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3040 for(y=0; y<b_w*2; y++)
3041 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3042 if(mb_x==b_stride-1)
3043 for(y=0; y<b_w*2; y++)
3044 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3046 for(x=0; x<b_w*2; x++)
3047 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3048 for(y=1; y<b_w; y++)
3049 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3051 if(mb_y==b_height-1){
3052 for(x=0; x<b_w*2; x++)
3053 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3054 for(y=b_w; y<b_w*2-1; y++)
3055 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3059 //skip stuff outside the picture
3060 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3062 uint8_t *src= s-> input_picture.data[0];
3063 uint8_t *dst= s->current_picture.data[0];
3064 const int stride= s->current_picture.linesize[0];
3065 const int block_w= MB_SIZE >> s->block_max_depth;
3066 const int sx= block_w*mb_x - block_w/2;
3067 const int sy= block_w*mb_y - block_w/2;
3068 const int w= s->plane[0].width;
3069 const int h= s->plane[0].height;
3073 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3074 for(y=h; y<sy+block_w*2; y++)
3075 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3077 for(y=sy; y<sy+block_w*2; y++)
3078 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3080 if(sx+block_w*2 > w){
3081 for(y=sy; y<sy+block_w*2; y++)
3082 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3086 // intra(black) = neighbors' contribution to the current block
3088 color[i]= get_dc(s, mb_x, mb_y, i);
3090 // get previous score (cannot be cached due to OBMC)
3091 if(pass > 0 && (block->type&BLOCK_INTRA)){
3092 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3093 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3095 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3099 for(ref=0; ref < s->ref_frames; ref++){
3100 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3101 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3106 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3107 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3109 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3111 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3113 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3115 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3118 //FIXME avoid subpel interpol / round to nearest integer
3121 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3123 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3124 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3125 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3126 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3132 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3135 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3137 //FIXME or try the standard 2 pass qpel or similar
3139 mvr[0][0]= block->mx;
3140 mvr[0][1]= block->my;
3141 if(ref_rd > best_rd){
3149 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3150 //FIXME RD style color selection
3152 if(!same_block(block, &backup)){
3153 if(tb ) tb ->type &= ~BLOCK_OPT;
3154 if(lb ) lb ->type &= ~BLOCK_OPT;
3155 if(rb ) rb ->type &= ~BLOCK_OPT;
3156 if(bb ) bb ->type &= ~BLOCK_OPT;
3157 if(tlb) tlb->type &= ~BLOCK_OPT;
3158 if(trb) trb->type &= ~BLOCK_OPT;
3159 if(blb) blb->type &= ~BLOCK_OPT;
3160 if(brb) brb->type &= ~BLOCK_OPT;
3165 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3170 if(s->block_max_depth == 1){
3172 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3173 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3175 int best_rd, init_rd;
3176 const int index= mb_x + mb_y * b_stride;
3179 b[0]= &s->block[index];
3181 b[2]= b[0]+b_stride;
3183 if(same_block(b[0], b[1]) &&
3184 same_block(b[0], b[2]) &&
3185 same_block(b[0], b[3]))
3188 if(!s->me_cache_generation)
3189 memset(s->me_cache, 0, sizeof(s->me_cache));
3190 s->me_cache_generation += 1<<22;
3192 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3194 //FIXME more multiref search?
3195 check_4block_inter(s, mb_x, mb_y,
3196 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3197 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3200 if(!(b[i]->type&BLOCK_INTRA))
3201 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3203 if(init_rd != best_rd)
3207 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3211 static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
3212 const int level= b->level;
3213 const int w= b->width;
3214 const int h= b->height;
3215 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3216 const int qmul= qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
3217 int x,y, thres1, thres2;
3220 if(s->qlog == LOSSLESS_QLOG){
3223 dst[x + y*stride]= src[x + y*stride];
3227 bias= bias ? 0 : (3*qmul)>>3;
3228 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3234 int i= src[x + y*stride];
3236 if((unsigned)(i+thres1) > thres2){
3239 i/= qmul; //FIXME optimize
3240 dst[x + y*stride]= i;
3244 i/= qmul; //FIXME optimize
3245 dst[x + y*stride]= -i;
3248 dst[x + y*stride]= 0;
3254 int i= src[x + y*stride];
3256 if((unsigned)(i+thres1) > thres2){
3259 i= (i + bias) / qmul; //FIXME optimize
3260 dst[x + y*stride]= i;
3264 i= (i + bias) / qmul; //FIXME optimize
3265 dst[x + y*stride]= -i;
3268 dst[x + y*stride]= 0;
3272 if(level+1 == s->spatial_decomposition_count){
3273 // STOP_TIMER("quantize")
3277 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int start_y, int end_y){
3278 const int w= b->width;
3279 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3280 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3281 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3285 if(s->qlog == LOSSLESS_QLOG) return;
3287 for(y=start_y; y<end_y; y++){
3288 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3289 IDWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3293 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3295 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3299 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3300 STOP_TIMER("dquant")
3304 static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
3305 const int w= b->width;
3306 const int h= b->height;
3307 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3308 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3309 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3313 if(s->qlog == LOSSLESS_QLOG) return;
3317 int i= src[x + y*stride];
3319 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3321 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3325 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3326 STOP_TIMER("dquant")
3330 static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3331 const int w= b->width;
3332 const int h= b->height;
3335 for(y=h-1; y>=0; y--){
3336 for(x=w-1; x>=0; x--){
3337 int i= x + y*stride;
3341 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3342 else src[i] -= src[i - 1];
3344 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3345 else src[i] -= src[i - 1];
3348 if(y) src[i] -= src[i - stride];
3354 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3355 const int w= b->width;
3360 IDWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3364 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3366 for(y=start_y; y<end_y; y++){
3368 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3369 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3373 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3374 else line[x] += line[x - 1];
3376 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3377 else line[x] += line[x - 1];
3380 if(y) line[x] += prev[x];
3385 // STOP_TIMER("correlate")
3388 static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
3389 const int w= b->width;
3390 const int h= b->height;
3395 int i= x + y*stride;
3399 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3400 else src[i] += src[i - 1];
3402 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3403 else src[i] += src[i - 1];
3406 if(y) src[i] += src[i - stride];
3412 static void encode_header(SnowContext *s){
3413 int plane_index, level, orientation;
3416 memset(kstate, MID_STATE, sizeof(kstate));
3418 put_rac(&s->c, kstate, s->keyframe);
3419 if(s->keyframe || s->always_reset){
3421 s->last_spatial_decomposition_type=
3425 s->last_block_max_depth= 0;
3428 put_symbol(&s->c, s->header_state, s->version, 0);
3429 put_rac(&s->c, s->header_state, s->always_reset);
3430 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3431 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3432 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3433 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3434 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3435 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3436 put_rac(&s->c, s->header_state, s->spatial_scalability);
3437 // put_rac(&s->c, s->header_state, s->rate_scalability);
3438 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3440 for(plane_index=0; plane_index<2; plane_index++){
3441 for(level=0; level<s->spatial_decomposition_count; level++){
3442 for(orientation=level ? 1:0; orientation<4; orientation++){
3443 if(orientation==2) continue;
3444 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3449 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3450 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3451 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3452 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3453 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3455 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3456 s->last_qlog = s->qlog;
3457 s->last_qbias = s->qbias;
3458 s->last_mv_scale = s->mv_scale;
3459 s->last_block_max_depth = s->block_max_depth;
3462 static int decode_header(SnowContext *s){
3463 int plane_index, level, orientation;
3466 memset(kstate, MID_STATE, sizeof(kstate));
3468 s->keyframe= get_rac(&s->c, kstate);
3469 if(s->keyframe || s->always_reset){
3471 s->spatial_decomposition_type=
3475 s->block_max_depth= 0;
3478 s->version= get_symbol(&s->c, s->header_state, 0);
3480 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3483 s->always_reset= get_rac(&s->c, s->header_state);
3484 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3485 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3486 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3487 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3488 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3489 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3490 s->spatial_scalability= get_rac(&s->c, s->header_state);
3491 // s->rate_scalability= get_rac(&s->c, s->header_state);
3492 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3494 for(plane_index=0; plane_index<3; plane_index++){
3495 for(level=0; level<s->spatial_decomposition_count; level++){
3496 for(orientation=level ? 1:0; orientation<4; orientation++){
3498 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3499 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3500 else q= get_symbol(&s->c, s->header_state, 1);
3501 s->plane[plane_index].band[level][orientation].qlog= q;
3507 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3508 if(s->spatial_decomposition_type > 1){
3509 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3513 s->qlog += get_symbol(&s->c, s->header_state, 1);
3514 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3515 s->qbias += get_symbol(&s->c, s->header_state, 1);
3516 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3517 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3518 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3519 s->block_max_depth= 0;
3526 static void init_qexp(void){
3530 for(i=0; i<QROOT; i++){
3532 v *= pow(2, 1.0 / QROOT);
3536 static int common_init(AVCodecContext *avctx){
3537 SnowContext *s = avctx->priv_data;
3539 int level, orientation, plane_index, dec;
3544 dsputil_init(&s->dsp, avctx);
3547 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3548 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3549 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3550 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3551 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3552 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3571 #define mcfh(dx,dy)\
3572 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3573 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3574 mc_block_hpel ## dx ## dy ## 16;\
3575 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3576 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3577 mc_block_hpel ## dx ## dy ## 8;
3587 dec= s->spatial_decomposition_count= 5;
3588 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3590 s->chroma_h_shift= 1; //FIXME XXX
3591 s->chroma_v_shift= 1;
3593 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3595 width= s->avctx->width;
3596 height= s->avctx->height;
3598 s->spatial_idwt_buffer= av_mallocz(width*height*sizeof(IDWTELEM));
3599 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM)); //FIXME this doesnt belong here
3601 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3602 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3604 for(plane_index=0; plane_index<3; plane_index++){
3605 int w= s->avctx->width;
3606 int h= s->avctx->height;
3609 w>>= s->chroma_h_shift;
3610 h>>= s->chroma_v_shift;
3612 s->plane[plane_index].width = w;
3613 s->plane[plane_index].height= h;
3614 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3615 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3616 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3617 SubBand *b= &s->plane[plane_index].band[level][orientation];
3619 b->buf= s->spatial_dwt_buffer;
3621 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3622 b->width = (w + !(orientation&1))>>1;
3623 b->height= (h + !(orientation>1))>>1;
3625 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3626 b->buf_x_offset = 0;
3627 b->buf_y_offset = 0;
3631 b->buf_x_offset = (w+1)>>1;
3634 b->buf += b->stride>>1;
3635 b->buf_y_offset = b->stride_line >> 1;
3637 b->ibuf= s->spatial_idwt_buffer + (b->buf - s->spatial_dwt_buffer);
3640 b->parent= &s->plane[plane_index].band[level-1][orientation];
3641 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3648 for(i=0; i<MAX_REF_FRAMES; i++)
3649 for(j=0; j<MAX_REF_FRAMES; j++)
3650 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3654 width= s->width= avctx->width;
3655 height= s->height= avctx->height;
3657 assert(width && height);
3659 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3664 static int qscale2qlog(int qscale){
3665 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3666 + 61*QROOT/8; //<64 >60
3669 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3671 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3672 * FIXME we know exact mv bits at this point,
3673 * but ratecontrol isn't set up to include them. */
3674 uint32_t coef_sum= 0;
3675 int level, orientation, delta_qlog;
3677 for(level=0; level<s->spatial_decomposition_count; level++){
3678 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3679 SubBand *b= &s->plane[0].band[level][orientation];
3680 IDWTELEM *buf= b->ibuf;
3681 const int w= b->width;
3682 const int h= b->height;
3683 const int stride= b->stride;
3684 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3685 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3686 const int qdiv= (1<<16)/qmul;
3688 //FIXME this is ugly
3691 buf[x+y*stride]= b->buf[x+y*stride];
3693 decorrelate(s, b, buf, stride, 1, 0);
3696 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3700 /* ugly, ratecontrol just takes a sqrt again */
3701 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3702 assert(coef_sum < INT_MAX);
3704 if(pict->pict_type == I_TYPE){
3705 s->m.current_picture.mb_var_sum= coef_sum;
3706 s->m.current_picture.mc_mb_var_sum= 0;
3708 s->m.current_picture.mc_mb_var_sum= coef_sum;
3709 s->m.current_picture.mb_var_sum= 0;
3712 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3713 if (pict->quality < 0)
3715 s->lambda= pict->quality * 3/2;
3716 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3717 s->qlog+= delta_qlog;
3721 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3722 int width = p->width;
3723 int height= p->height;
3724 int level, orientation, x, y;
3726 for(level=0; level<s->spatial_decomposition_count; level++){
3727 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3728 SubBand *b= &p->band[level][orientation];
3729 IDWTELEM *ibuf= b->ibuf;
3732 memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
3733 ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
3734 ff_spatial_idwt(s->spatial_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3735 for(y=0; y<height; y++){
3736 for(x=0; x<width; x++){
3737 int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
3742 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3743 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3748 static int encode_init(AVCodecContext *avctx)
3750 SnowContext *s = avctx->priv_data;
3753 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3754 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3755 "use vstrict=-2 / -strict -2 to use it anyway\n");
3759 if(avctx->prediction_method == DWT_97
3760 && (avctx->flags & CODEC_FLAG_QSCALE)
3761 && avctx->global_quality == 0){
3762 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3772 s->m.flags = avctx->flags;
3773 s->m.bit_rate= avctx->bit_rate;
3775 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3776 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3777 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3778 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3779 h263_encode_init(&s->m); //mv_penalty
3781 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3783 if(avctx->flags&CODEC_FLAG_PASS1){
3784 if(!avctx->stats_out)
3785 avctx->stats_out = av_mallocz(256);
3787 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3788 if(ff_rate_control_init(&s->m) < 0)
3791 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3793 for(plane_index=0; plane_index<3; plane_index++){
3794 calculate_vissual_weight(s, &s->plane[plane_index]);
3798 avctx->coded_frame= &s->current_picture;
3799 switch(avctx->pix_fmt){
3800 // case PIX_FMT_YUV444P:
3801 // case PIX_FMT_YUV422P:
3802 case PIX_FMT_YUV420P:
3804 // case PIX_FMT_YUV411P:
3805 // case PIX_FMT_YUV410P:
3806 s->colorspace_type= 0;
3808 /* case PIX_FMT_RGB32:
3812 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
3815 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
3816 s->chroma_h_shift= 1;
3817 s->chroma_v_shift= 1;
3819 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
3820 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
3822 s->avctx->get_buffer(s->avctx, &s->input_picture);
3824 if(s->avctx->me_method == ME_ITER){
3826 int size= s->b_width * s->b_height << 2*s->block_max_depth;
3827 for(i=0; i<s->max_ref_frames; i++){
3828 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
3829 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
3836 static void halfpel_interpol(SnowContext *s, uint8_t *halfpel[4][4], AVFrame *frame){
3839 assert(!(s->avctx->flags & CODEC_FLAG_EMU_EDGE));
3843 int w= s->avctx->width >>is_chroma;
3844 int h= s->avctx->height >>is_chroma;
3845 int ls= frame->linesize[p];
3846 uint8_t *src= frame->data[p];
3848 halfpel[1][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
3849 halfpel[2][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
3850 halfpel[3][p]= (uint8_t*)av_malloc(ls * (h+2*EDGE_WIDTH)) + EDGE_WIDTH*(1+ls);
3857 halfpel[1][p][i]= (20*(src[i] + src[i+1]) - 5*(src[i-1] + src[i+2]) + (src[i-2] + src[i+3]) + 16 )>>5;
3864 halfpel[2][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
3872 halfpel[3][p][i]= (20*(src[i] + src[i+ls]) - 5*(src[i-ls] + src[i+2*ls]) + (src[i-2*ls] + src[i+3*ls]) + 16 )>>5;
3880 static int frame_start(SnowContext *s){
3882 int w= s->avctx->width; //FIXME round up to x16 ?
3883 int h= s->avctx->height;
3885 if(s->current_picture.data[0]){
3886 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
3887 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
3888 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
3891 tmp= s->last_picture[s->max_ref_frames-1];
3892 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
3893 memmove(s->halfpel_plane+1, s->halfpel_plane, (s->max_ref_frames-1)*sizeof(void*)*4*4);
3894 #ifdef USE_HALFPEL_PLANE
3895 if(s->current_picture.data[0])
3896 halfpel_interpol(s, s->halfpel_plane[0], &s->current_picture);
3898 s->last_picture[0]= s->current_picture;
3899 s->current_picture= tmp;
3905 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
3906 if(i && s->last_picture[i-1].key_frame)
3911 s->current_picture.reference= 1;
3912 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
3913 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
3917 s->current_picture.key_frame= s->keyframe;
3922 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
3923 SnowContext *s = avctx->priv_data;
3924 RangeCoder * const c= &s->c;
3925 AVFrame *pict = data;
3926 const int width= s->avctx->width;
3927 const int height= s->avctx->height;
3928 int level, orientation, plane_index, i, y;
3929 uint8_t rc_header_bak[sizeof(s->header_state)];
3930 uint8_t rc_block_bak[sizeof(s->block_state)];
3932 ff_init_range_encoder(c, buf, buf_size);
3933 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
3937 for(y=0; y<(height>>shift); y++)
3938 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
3939 &pict->data[i][y * pict->linesize[i]],
3942 s->new_picture = *pict;
3944 s->m.picture_number= avctx->frame_number;
3945 if(avctx->flags&CODEC_FLAG_PASS2){
3947 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
3948 s->keyframe= pict->pict_type==FF_I_TYPE;
3949 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
3950 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
3951 if (pict->quality < 0)
3955 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
3957 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
3960 if(s->pass1_rc && avctx->frame_number == 0)
3961 pict->quality= 2*FF_QP2LAMBDA;
3963 s->qlog= qscale2qlog(pict->quality);
3964 s->lambda = pict->quality * 3/2;
3966 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
3967 s->qlog= LOSSLESS_QLOG;
3969 }//else keep previous frame's qlog until after motion est
3973 s->m.current_picture_ptr= &s->m.current_picture;
3974 if(pict->pict_type == P_TYPE){
3975 int block_width = (width +15)>>4;
3976 int block_height= (height+15)>>4;
3977 int stride= s->current_picture.linesize[0];
3979 assert(s->current_picture.data[0]);
3980 assert(s->last_picture[0].data[0]);
3982 s->m.avctx= s->avctx;
3983 s->m.current_picture.data[0]= s->current_picture.data[0];
3984 s->m. last_picture.data[0]= s->last_picture[0].data[0];
3985 s->m. new_picture.data[0]= s-> input_picture.data[0];
3986 s->m. last_picture_ptr= &s->m. last_picture;
3988 s->m. last_picture.linesize[0]=
3989 s->m. new_picture.linesize[0]=
3990 s->m.current_picture.linesize[0]= stride;
3991 s->m.uvlinesize= s->current_picture.linesize[1];
3993 s->m.height= height;
3994 s->m.mb_width = block_width;
3995 s->m.mb_height= block_height;
3996 s->m.mb_stride= s->m.mb_width+1;
3997 s->m.b8_stride= 2*s->m.mb_width+1;
3999 s->m.pict_type= pict->pict_type;
4000 s->m.me_method= s->avctx->me_method;
4001 s->m.me.scene_change_score=0;
4002 s->m.flags= s->avctx->flags;
4003 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4004 s->m.out_format= FMT_H263;
4005 s->m.unrestricted_mv= 1;
4007 s->m.lambda = s->lambda;
4008 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4009 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4011 s->m.dsp= s->dsp; //move
4017 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4018 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4023 s->m.pict_type = pict->pict_type;
4024 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4027 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4028 encode_blocks(s, 1);
4029 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4031 for(plane_index=0; plane_index<3; plane_index++){
4032 Plane *p= &s->plane[plane_index];
4036 // int bits= put_bits_count(&s->c.pb);
4038 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4040 if(pict->data[plane_index]) //FIXME gray hack
4043 s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4046 predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
4049 && pict->pict_type == P_TYPE
4050 && !(avctx->flags&CODEC_FLAG_PASS2)
4051 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4052 ff_init_range_encoder(c, buf, buf_size);
4053 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4054 pict->pict_type= FF_I_TYPE;
4056 s->current_picture.key_frame=1;
4060 if(s->qlog == LOSSLESS_QLOG){
4063 s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4069 s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<<ENCODER_EXTRA_BITS;
4074 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4076 if(s->pass1_rc && plane_index==0){
4077 int delta_qlog = ratecontrol_1pass(s, pict);
4078 if (delta_qlog <= INT_MIN)
4081 //reordering qlog in the bitstream would eliminate this reset
4082 ff_init_range_encoder(c, buf, buf_size);
4083 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4084 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4086 encode_blocks(s, 0);
4090 for(level=0; level<s->spatial_decomposition_count; level++){
4091 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4092 SubBand *b= &p->band[level][orientation];
4094 quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
4096 decorrelate(s, b, b->ibuf, b->stride, pict->pict_type == P_TYPE, 0);
4097 encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
4098 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4100 correlate(s, b, b->ibuf, b->stride, 1, 0);
4103 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4105 for(level=0; level<s->spatial_decomposition_count; level++){
4106 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4107 SubBand *b= &p->band[level][orientation];
4109 dequantize(s, b, b->ibuf, b->stride);
4113 ff_spatial_idwt(s->spatial_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4114 if(s->qlog == LOSSLESS_QLOG){
4117 s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
4122 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4123 STOP_TIMER("pred-conv")}
4126 if(pict->pict_type == I_TYPE){
4129 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4130 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4134 memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
4135 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4138 if(s->avctx->flags&CODEC_FLAG_PSNR){
4141 if(pict->data[plane_index]) //FIXME gray hack
4144 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4148 s->avctx->error[plane_index] += error;
4149 s->current_picture.error[plane_index] = error;
4153 if(s->last_picture[s->max_ref_frames-1].data[0]){
4154 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4156 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4157 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4160 s->current_picture.coded_picture_number = avctx->frame_number;
4161 s->current_picture.pict_type = pict->pict_type;
4162 s->current_picture.quality = pict->quality;
4163 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4164 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4165 s->m.current_picture.display_picture_number =
4166 s->m.current_picture.coded_picture_number = avctx->frame_number;
4167 s->m.current_picture.quality = pict->quality;
4168 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4170 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4172 if(avctx->flags&CODEC_FLAG_PASS1)
4173 ff_write_pass1_stats(&s->m);
4174 s->m.last_pict_type = s->m.pict_type;
4175 avctx->frame_bits = s->m.frame_bits;
4176 avctx->mv_bits = s->m.mv_bits;
4177 avctx->misc_bits = s->m.misc_bits;
4178 avctx->p_tex_bits = s->m.p_tex_bits;
4182 return ff_rac_terminate(c);
4185 static void common_end(SnowContext *s){
4186 int plane_index, level, orientation, i;
4188 av_freep(&s->spatial_dwt_buffer);
4189 av_freep(&s->spatial_idwt_buffer);
4191 av_freep(&s->m.me.scratchpad);
4192 av_freep(&s->m.me.map);
4193 av_freep(&s->m.me.score_map);
4194 av_freep(&s->m.obmc_scratchpad);
4196 av_freep(&s->block);
4198 for(i=0; i<MAX_REF_FRAMES; i++){
4199 av_freep(&s->ref_mvs[i]);
4200 av_freep(&s->ref_scores[i]);
4201 if(s->last_picture[i].data[0])
4202 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4205 for(plane_index=0; plane_index<3; plane_index++){
4206 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4207 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4208 SubBand *b= &s->plane[plane_index].band[level][orientation];
4210 av_freep(&b->x_coeff);
4216 static int encode_end(AVCodecContext *avctx)
4218 SnowContext *s = avctx->priv_data;
4221 av_free(avctx->stats_out);
4226 static int decode_init(AVCodecContext *avctx)
4228 SnowContext *s = avctx->priv_data;
4231 avctx->pix_fmt= PIX_FMT_YUV420P;
4235 block_size = MB_SIZE >> s->block_max_depth;
4236 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_idwt_buffer);
4241 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4242 SnowContext *s = avctx->priv_data;
4243 RangeCoder * const c= &s->c;
4245 AVFrame *picture = data;
4246 int level, orientation, plane_index, i;
4248 ff_init_range_decoder(c, buf, buf_size);
4249 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4251 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4253 if(!s->block) alloc_blocks(s);
4256 //keyframe flag dupliaction mess FIXME
4257 if(avctx->debug&FF_DEBUG_PICT_INFO)
4258 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4262 for(plane_index=0; plane_index<3; plane_index++){
4263 Plane *p= &s->plane[plane_index];
4267 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4269 if(s->avctx->debug&2048){
4270 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4271 predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
4275 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4276 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4282 for(level=0; level<s->spatial_decomposition_count; level++){
4283 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4284 SubBand *b= &p->band[level][orientation];
4285 unpack_coeffs(s, b, b->parent, orientation);
4288 STOP_TIMER("unpack coeffs");
4292 const int mb_h= s->b_height << s->block_max_depth;
4293 const int block_size = MB_SIZE >> s->block_max_depth;
4294 const int block_w = plane_index ? block_size/2 : block_size;
4296 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4301 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4302 for(mb_y=0; mb_y<=mb_h; mb_y++){
4304 int slice_starty = block_w*mb_y;
4305 int slice_h = block_w*(mb_y+1);
4306 if (!(s->keyframe || s->avctx->debug&512)){
4307 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4308 slice_h -= (block_w >> 1);
4313 for(level=0; level<s->spatial_decomposition_count; level++){
4314 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4315 SubBand *b= &p->band[level][orientation];
4318 int our_mb_start = mb_y;
4319 int our_mb_end = (mb_y + 1);
4321 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4322 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4323 if (!(s->keyframe || s->avctx->debug&512)){
4324 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4325 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4327 start_y = FFMIN(b->height, start_y);
4328 end_y = FFMIN(b->height, end_y);
4330 if (start_y != end_y){
4331 if (orientation == 0){
4332 SubBand * correlate_band = &p->band[0][0];
4333 int correlate_end_y = FFMIN(b->height, end_y + 1);
4334 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4335 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4336 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4337 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->ibuf, correlate_band->stride, start_y, end_y);
4340 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4344 STOP_TIMER("decode_subband_slice");
4348 for(; yd<slice_h; yd+=4){
4349 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4351 STOP_TIMER("idwt slice");}
4354 if(s->qlog == LOSSLESS_QLOG){
4355 for(; yq<slice_h && yq<h; yq++){
4356 IDWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4358 line[x] <<= FRAC_BITS;
4363 predict_slice_buffered(s, &s->sb, s->spatial_idwt_buffer, plane_index, 1, mb_y);
4365 y = FFMIN(p->height, slice_starty);
4366 end_y = FFMIN(p->height, slice_h);
4368 slice_buffer_release(&s->sb, y++);
4371 slice_buffer_flush(&s->sb);
4373 STOP_TIMER("idwt + predict_slices")}
4378 if(s->last_picture[s->max_ref_frames-1].data[0]){
4379 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4381 if(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3])
4382 av_free(s->halfpel_plane[s->max_ref_frames-1][1+i/3][i%3] - EDGE_WIDTH*(1+s->current_picture.linesize[i%3]));
4385 if(!(s->avctx->debug&2048))
4386 *picture= s->current_picture;
4388 *picture= s->mconly_picture;
4390 *data_size = sizeof(AVFrame);
4392 bytes_read= c->bytestream - c->bytestream_start;
4393 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4398 static int decode_end(AVCodecContext *avctx)
4400 SnowContext *s = avctx->priv_data;
4402 slice_buffer_destroy(&s->sb);
4409 AVCodec snow_decoder = {
4413 sizeof(SnowContext),
4418 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4422 #ifdef CONFIG_SNOW_ENCODER
4423 AVCodec snow_encoder = {
4427 sizeof(SnowContext),
4444 int buffer[2][width*height];
4447 s.spatial_decomposition_count=6;
4448 s.spatial_decomposition_type=1;
4450 printf("testing 5/3 DWT\n");
4451 for(i=0; i<width*height; i++)
4452 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4454 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4455 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4457 for(i=0; i<width*height; i++)
4458 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4460 printf("testing 9/7 DWT\n");
4461 s.spatial_decomposition_type=0;
4462 for(i=0; i<width*height; i++)
4463 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4465 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4466 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4468 for(i=0; i<width*height; i++)
4469 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4472 printf("testing AC coder\n");
4473 memset(s.header_state, 0, sizeof(s.header_state));
4474 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4475 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4477 for(i=-256; i<256; i++){
4479 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4480 STOP_TIMER("put_symbol")
4482 ff_rac_terminate(&s.c);
4484 memset(s.header_state, 0, sizeof(s.header_state));
4485 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4486 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4488 for(i=-256; i<256; i++){
4491 j= get_symbol(&s.c, s.header_state, 1);
4492 STOP_TIMER("get_symbol")
4493 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4497 int level, orientation, x, y;
4498 int64_t errors[8][4];
4501 memset(errors, 0, sizeof(errors));
4502 s.spatial_decomposition_count=3;
4503 s.spatial_decomposition_type=0;
4504 for(level=0; level<s.spatial_decomposition_count; level++){
4505 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4506 int w= width >> (s.spatial_decomposition_count-level);
4507 int h= height >> (s.spatial_decomposition_count-level);
4508 int stride= width << (s.spatial_decomposition_count-level);
4509 DWTELEM *buf= buffer[0];
4512 if(orientation&1) buf+=w;
4513 if(orientation>1) buf+=stride>>1;
4515 memset(buffer[0], 0, sizeof(int)*width*height);
4516 buf[w/2 + h/2*stride]= 256*256;
4517 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4518 for(y=0; y<height; y++){
4519 for(x=0; x<width; x++){
4520 int64_t d= buffer[0][x + y*width];
4522 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4524 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4526 error= (int)(sqrt(error)+0.5);
4527 errors[level][orientation]= error;
4528 if(g) g=ff_gcd(g, error);
4532 printf("static int const visual_weight[][4]={\n");
4533 for(level=0; level<s.spatial_decomposition_count; level++){
4535 for(orientation=0; orientation<4; orientation++){
4536 printf("%8"PRId64",", errors[level][orientation]/g);
4544 int w= width >> (s.spatial_decomposition_count-level);
4545 int h= height >> (s.spatial_decomposition_count-level);
4546 int stride= width << (s.spatial_decomposition_count-level);
4547 DWTELEM *buf= buffer[0];
4553 memset(buffer[0], 0, sizeof(int)*width*height);
4555 for(y=0; y<height; y++){
4556 for(x=0; x<width; x++){
4557 int tab[4]={0,2,3,1};
4558 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4561 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4565 buf[x + y*stride ]=169;
4566 buf[x + y*stride-w]=64;
4569 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4571 for(y=0; y<height; y++){
4572 for(x=0; x<width; x++){
4573 int64_t d= buffer[0][x + y*width];
4575 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4577 if(FFABS(height/2-y)<9) printf("\n");