2 * Copyright (C) 2004 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of FFmpeg.
6 * FFmpeg is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * FFmpeg is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with FFmpeg; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
26 #include "rangecoder.h"
28 #include "mpegvideo.h"
33 static const int8_t quant3[256]={
34 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
35 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
36 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
37 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
38 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
39 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
40 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
41 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
42 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
43 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
44 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
45 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
46 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
47 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
48 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
49 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, 0,
51 static const int8_t quant3b[256]={
52 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
53 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
54 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
55 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
56 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
57 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
58 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
59 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
61 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
62 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
63 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
64 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
65 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
66 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
67 -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
69 static const int8_t quant3bA[256]={
70 0, 0, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
71 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
72 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
73 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
74 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
75 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
76 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
77 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
78 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
79 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
80 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
81 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
82 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
83 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
84 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
85 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1, 1,-1,
87 static const int8_t quant5[256]={
88 0, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
89 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
90 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
91 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
92 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
93 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
94 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
95 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
96 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
97 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
98 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
99 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
100 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
101 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
102 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
103 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,-1,
105 static const int8_t quant7[256]={
106 0, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
107 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
108 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
109 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
110 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
111 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
112 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
113 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
114 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
115 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
116 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
117 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
118 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-3,
119 -3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-2,-2,-2,
120 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,
121 -2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-2,-1,-1,
123 static const int8_t quant9[256]={
124 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3,
125 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
126 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
127 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
128 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
129 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
130 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
131 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
133 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
134 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
135 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
136 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
137 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
138 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,
139 -3,-3,-3,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-2,-1,-1,
141 static const int8_t quant11[256]={
142 0, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4,
143 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
144 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
145 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
146 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
147 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
148 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
149 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
150 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
151 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
152 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
153 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
154 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
155 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-4,-4,
156 -4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,-4,
157 -4,-4,-4,-4,-4,-3,-3,-3,-3,-3,-3,-3,-2,-2,-2,-1,
159 static const int8_t quant13[256]={
160 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
161 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
162 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
163 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
164 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
165 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
166 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
167 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
168 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
169 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
170 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
171 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,
172 -6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-6,-5,
173 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
174 -5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,-5,
175 -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1,
179 static const uint8_t obmc32[1024]={
180 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
182 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
183 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
184 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
185 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
186 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
187 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
188 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
189 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
190 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
191 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
192 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
193 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
194 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
195 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
196 0, 2, 4, 8,12,18,23,29,35,41,46,52,56,60,62,64,64,62,60,56,52,46,41,35,29,23,18,12, 8, 4, 2, 0,
197 0, 1, 4, 8,12,17,22,28,34,40,45,51,55,58,61,62,62,61,58,55,51,45,40,34,28,22,17,12, 8, 4, 1, 0,
198 0, 2, 4, 7,12,16,22,27,33,38,44,48,52,56,58,60,60,58,56,52,48,44,38,33,27,22,16,12, 7, 4, 2, 0,
199 0, 1, 4, 7,11,15,20,25,31,36,41,45,49,52,55,56,56,55,52,49,45,41,36,31,25,20,15,11, 7, 4, 1, 0,
200 0, 1, 3, 6,10,14,19,23,28,33,38,42,45,48,51,52,52,51,48,45,42,38,33,28,23,19,14,10, 6, 3, 1, 0,
201 0, 1, 3, 6, 9,12,17,21,25,30,34,38,41,44,45,46,46,45,44,41,38,34,30,25,21,17,12, 9, 6, 3, 1, 0,
202 0, 1, 3, 5, 8,11,15,19,22,26,30,33,36,38,40,41,41,40,38,36,33,30,26,22,19,15,11, 8, 5, 3, 1, 0,
203 0, 1, 2, 4, 7,10,13,16,19,22,25,28,31,33,34,35,35,34,33,31,28,25,22,19,16,13,10, 7, 4, 2, 1, 0,
204 0, 1, 2, 4, 6, 8,10,13,16,19,21,23,25,27,28,29,29,28,27,25,23,21,19,16,13,10, 8, 6, 4, 2, 1, 0,
205 0, 1, 1, 3, 4, 6, 8,10,13,15,17,19,20,22,22,23,23,22,22,20,19,17,15,13,10, 8, 6, 4, 3, 1, 1, 0,
206 0, 1, 1, 2, 3, 5, 6, 8,10,11,13,14,15,16,17,18,18,17,16,15,14,13,11,10, 8, 6, 5, 3, 2, 1, 1, 0,
207 0, 0, 1, 2, 2, 3, 4, 6, 7, 8, 9,10,11,12,12,12,12,12,12,11,10, 9, 8, 7, 6, 4, 3, 2, 2, 1, 0, 0,
208 0, 0, 1, 1, 2, 2, 3, 4, 4, 5, 6, 6, 7, 7, 8, 8, 8, 8, 7, 7, 6, 6, 5, 4, 4, 3, 2, 2, 1, 1, 0, 0,
209 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
210 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
211 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
214 static const uint8_t obmc16[256]={
215 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
216 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
217 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
218 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
219 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
220 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
221 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
222 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
223 1, 6,15,26,38,49,57,62,62,57,49,38,26,15, 6, 1,
224 1, 5,13,24,34,45,53,57,57,53,45,34,24,13, 5, 1,
225 0, 5,11,20,29,38,45,49,49,45,38,29,20,11, 5, 0,
226 0, 4, 9,15,23,29,34,38,38,34,29,23,15, 9, 4, 0,
227 0, 2, 6,11,15,20,24,26,26,24,20,15,11, 6, 2, 0,
228 0, 1, 4, 6, 9,11,13,15,15,13,11, 9, 6, 4, 1, 0,
229 0, 1, 1, 2, 4, 5, 5, 6, 6, 5, 5, 4, 2, 1, 1, 0,
230 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
234 static const uint8_t obmc32[1024]={
235 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
236 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
237 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
238 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
239 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
240 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
241 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
242 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
243 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
244 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
245 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
246 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
247 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
248 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
249 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
250 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
251 8, 24, 40, 56, 68, 84,100,116,132,148,164,180,192,208,224,240,240,224,208,192,180,164,148,132,116,100, 84, 68, 56, 40, 24, 8,
252 8, 20, 36, 52, 64, 80, 96,108,124,136,152,168,180,196,212,224,224,212,196,180,168,152,136,124,108, 96, 80, 64, 52, 36, 20, 8,
253 8, 20, 32, 48, 60, 76, 88,100,116,128,140,156,168,184,196,208,208,196,184,168,156,140,128,116,100, 88, 76, 60, 48, 32, 20, 8,
254 8, 20, 32, 44, 56, 68, 80, 92,108,120,132,144,156,168,180,192,192,180,168,156,144,132,120,108, 92, 80, 68, 56, 44, 32, 20, 8,
255 4, 16, 28, 40, 52, 64, 76, 88, 96,108,120,132,144,156,168,180,180,168,156,144,132,120,108, 96, 88, 76, 64, 52, 40, 28, 16, 4,
256 4, 16, 28, 36, 48, 56, 68, 80, 88,100,112,120,132,140,152,164,164,152,140,132,120,112,100, 88, 80, 68, 56, 48, 36, 28, 16, 4,
257 4, 16, 24, 32, 44, 52, 60, 72, 80, 92,100,108,120,128,136,148,148,136,128,120,108,100, 92, 80, 72, 60, 52, 44, 32, 24, 16, 4,
258 4, 12, 20, 28, 40, 48, 56, 64, 72, 80, 88, 96,108,116,124,132,132,124,116,108, 96, 88, 80, 72, 64, 56, 48, 40, 28, 20, 12, 4,
259 4, 12, 20, 28, 32, 40, 48, 56, 64, 72, 80, 88, 92,100,108,116,116,108,100, 92, 88, 80, 72, 64, 56, 48, 40, 32, 28, 20, 12, 4,
260 4, 8, 16, 24, 28, 36, 44, 48, 56, 60, 68, 76, 80, 88, 96,100,100, 96, 88, 80, 76, 68, 60, 56, 48, 44, 36, 28, 24, 16, 8, 4,
261 4, 8, 12, 20, 24, 32, 36, 40, 48, 52, 56, 64, 68, 76, 80, 84, 84, 80, 76, 68, 64, 56, 52, 48, 40, 36, 32, 24, 20, 12, 8, 4,
262 4, 8, 12, 16, 20, 24, 28, 32, 40, 44, 48, 52, 56, 60, 64, 68, 68, 64, 60, 56, 52, 48, 44, 40, 32, 28, 24, 20, 16, 12, 8, 4,
263 0, 4, 8, 12, 16, 20, 24, 28, 28, 32, 36, 40, 44, 48, 52, 56, 56, 52, 48, 44, 40, 36, 32, 28, 28, 24, 20, 16, 12, 8, 4, 0,
264 0, 4, 8, 8, 12, 12, 16, 20, 20, 24, 28, 28, 32, 32, 36, 40, 40, 36, 32, 32, 28, 28, 24, 20, 20, 16, 12, 12, 8, 8, 4, 0,
265 0, 4, 4, 4, 8, 8, 8, 12, 12, 16, 16, 16, 20, 20, 20, 24, 24, 20, 20, 20, 16, 16, 16, 12, 12, 8, 8, 8, 4, 4, 4, 0,
266 0, 0, 0, 0, 4, 4, 4, 4, 4, 4, 4, 4, 8, 8, 8, 8, 8, 8, 8, 8, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0,
269 static const uint8_t obmc16[256]={
270 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
271 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
272 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
273 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
274 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
275 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
276 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
277 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
278 16, 44, 76,104,136,164,196,224,224,196,164,136,104, 76, 44, 16,
279 12, 40, 64, 92,116,144,168,196,196,168,144,116, 92, 64, 40, 12,
280 12, 32, 56, 76,100,120,144,164,164,144,120,100, 76, 56, 32, 12,
281 8, 28, 44, 64, 80,100,116,136,136,116,100, 80, 64, 44, 28, 8,
282 8, 20, 36, 48, 64, 76, 92,104,104, 92, 76, 64, 48, 36, 20, 8,
283 4, 16, 24, 36, 44, 56, 64, 76, 76, 64, 56, 44, 36, 24, 16, 4,
284 4, 8, 16, 20, 28, 32, 40, 44, 44, 40, 32, 28, 20, 16, 8, 4,
285 0, 4, 4, 8, 8, 12, 12, 16, 16, 12, 12, 8, 8, 4, 4, 0,
289 static const uint8_t obmc32[1024]={
290 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
291 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
292 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
293 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
294 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
295 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
296 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
297 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
298 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
299 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
300 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
301 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
302 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
303 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
304 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
305 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
306 0, 1, 4, 7,12,17,23,29,35,41,47,52,57,60,63,64,64,63,60,57,52,47,41,35,29,23,17,12, 7, 4, 1, 0,
307 0, 1, 4, 7,12,17,22,28,34,40,46,51,56,59,61,63,63,61,59,56,51,46,40,34,28,22,17,12, 7, 4, 1, 0,
308 0, 1, 3, 7,11,16,21,27,33,39,44,49,53,57,59,60,60,59,57,53,49,44,39,33,27,21,16,11, 7, 3, 1, 0,
309 0, 1, 3, 6,11,15,20,26,31,37,42,46,50,53,56,57,57,56,53,50,46,42,37,31,26,20,15,11, 6, 3, 1, 0,
310 0, 1, 3, 6, 9,14,19,24,29,34,38,43,46,49,51,52,52,51,49,46,43,38,34,29,24,19,14, 9, 6, 3, 1, 0,
311 0, 1, 3, 5, 9,12,17,21,26,30,35,38,42,44,46,47,47,46,44,42,38,35,30,26,21,17,12, 9, 5, 3, 1, 0,
312 0, 1, 3, 5, 7,11,15,19,23,26,30,34,37,39,40,41,41,40,39,37,34,30,26,23,19,15,11, 7, 5, 3, 1, 0,
313 0, 1, 2, 4, 6, 9,12,16,19,23,26,29,31,33,34,35,35,34,33,31,29,26,23,19,16,12, 9, 6, 4, 2, 1, 0,
314 0, 1, 2, 3, 5, 8,10,13,16,19,21,24,26,27,28,29,29,28,27,26,24,21,19,16,13,10, 8, 5, 3, 2, 1, 0,
315 0, 1, 1, 2, 4, 6, 8,10,12,15,17,19,20,21,22,23,23,22,21,20,19,17,15,12,10, 8, 6, 4, 2, 1, 1, 0,
316 0, 0, 1, 2, 3, 5, 6, 8, 9,11,12,14,15,16,17,17,17,17,16,15,14,12,11, 9, 8, 6, 5, 3, 2, 1, 0, 0,
317 0, 0, 1, 1, 2, 3, 4, 5, 6, 7, 9,10,11,11,12,12,12,12,11,11,10, 9, 7, 6, 5, 4, 3, 2, 1, 1, 0, 0,
318 0, 0, 1, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 6, 5, 5, 4, 3, 2, 2, 1, 1, 1, 0, 0,
319 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0,
320 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
321 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
324 static const uint8_t obmc16[256]={
325 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
326 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
327 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
328 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
329 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
330 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
331 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
332 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
333 0, 5,14,26,38,49,58,63,63,58,49,38,26,14, 5, 0,
334 1, 5,13,24,35,46,54,58,58,54,46,35,24,13, 5, 1,
335 1, 4,11,20,30,39,46,49,49,46,39,30,20,11, 4, 1,
336 0, 3, 8,16,23,30,35,38,38,35,30,23,16, 8, 3, 0,
337 0, 2, 6,10,15,20,24,26,26,24,20,15,10, 6, 2, 0,
338 0, 1, 3, 6, 8,11,13,14,14,13,11, 8, 6, 3, 1, 0,
339 0, 0, 1, 2, 3, 4, 5, 5, 5, 5, 4, 3, 2, 1, 0, 0,
340 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
346 static const uint8_t obmc8[64]={
347 4, 12, 20, 28, 28, 20, 12, 4,
348 12, 36, 60, 84, 84, 60, 36, 12,
349 20, 60,100,140,140,100, 60, 20,
350 28, 84,140,196,196,140, 84, 28,
351 28, 84,140,196,196,140, 84, 28,
352 20, 60,100,140,140,100, 60, 20,
353 12, 36, 60, 84, 84, 60, 36, 12,
354 4, 12, 20, 28, 28, 20, 12, 4,
359 static const uint8_t obmc4[16]={
367 static const uint8_t *obmc_tab[4]={
368 obmc32, obmc16, obmc8, obmc4
371 static int scale_mv_ref[MAX_REF_FRAMES][MAX_REF_FRAMES];
373 typedef struct BlockNode{
379 //#define TYPE_SPLIT 1
380 #define BLOCK_INTRA 1
382 //#define TYPE_NOCOLOR 4
383 uint8_t level; //FIXME merge into type?
386 static const BlockNode null_block= { //FIXME add border maybe
387 .color= {128,128,128},
395 #define LOG2_MB_SIZE 4
396 #define MB_SIZE (1<<LOG2_MB_SIZE)
398 typedef struct x_and_coeff{
403 typedef struct SubBand{
408 int qlog; ///< log(qscale)/log[2^(1/6)]
412 int stride_line; ///< Stride measured in lines, not pixels.
413 x_and_coeff * x_coeff;
414 struct SubBand *parent;
415 uint8_t state[/*7*2*/ 7 + 512][32];
418 typedef struct Plane{
421 SubBand band[MAX_DECOMPOSITIONS][4];
424 typedef struct SnowContext{
425 // MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
427 AVCodecContext *avctx;
431 AVFrame input_picture; ///< new_picture with the internal linesizes
432 AVFrame current_picture;
433 AVFrame last_picture[MAX_REF_FRAMES];
434 AVFrame mconly_picture;
435 // uint8_t q_context[16];
436 uint8_t header_state[32];
437 uint8_t block_state[128 + 32*128];
441 int spatial_decomposition_type;
442 int last_spatial_decomposition_type;
443 int temporal_decomposition_type;
444 int spatial_decomposition_count;
445 int temporal_decomposition_count;
448 int16_t (*ref_mvs[MAX_REF_FRAMES])[2];
449 uint32_t *ref_scores[MAX_REF_FRAMES];
450 DWTELEM *spatial_dwt_buffer;
454 int spatial_scalability;
464 #define QBIAS_SHIFT 3
468 int last_block_max_depth;
469 Plane plane[MAX_PLANES];
471 #define ME_CACHE_SIZE 1024
472 int me_cache[ME_CACHE_SIZE];
473 int me_cache_generation;
476 MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX)
487 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
488 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
490 static void iterative_me(SnowContext *s);
492 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
496 buf->base_buffer = base_buffer;
497 buf->line_count = line_count;
498 buf->line_width = line_width;
499 buf->data_count = max_allocated_lines;
500 buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
501 buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
503 for (i = 0; i < max_allocated_lines; i++)
505 buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
508 buf->data_stack_top = max_allocated_lines - 1;
511 static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
516 // av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
518 assert(buf->data_stack_top >= 0);
519 // assert(!buf->line[line]);
521 return buf->line[line];
523 offset = buf->line_width * line;
524 buffer = buf->data_stack[buf->data_stack_top];
525 buf->data_stack_top--;
526 buf->line[line] = buffer;
528 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
533 static void slice_buffer_release(slice_buffer * buf, int line)
538 assert(line >= 0 && line < buf->line_count);
539 assert(buf->line[line]);
541 offset = buf->line_width * line;
542 buffer = buf->line[line];
543 buf->data_stack_top++;
544 buf->data_stack[buf->data_stack_top] = buffer;
545 buf->line[line] = NULL;
547 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
550 static void slice_buffer_flush(slice_buffer * buf)
553 for (i = 0; i < buf->line_count; i++)
557 // av_log(NULL, AV_LOG_DEBUG, "slice_buffer_flush: line: %d \n", i);
558 slice_buffer_release(buf, i);
563 static void slice_buffer_destroy(slice_buffer * buf)
566 slice_buffer_flush(buf);
568 for (i = buf->data_count - 1; i >= 0; i--)
570 assert(buf->data_stack[i]);
571 av_freep(&buf->data_stack[i]);
573 assert(buf->data_stack);
574 av_freep(&buf->data_stack);
576 av_freep(&buf->line);
580 // Avoid a name clash on SGI IRIX
583 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
584 static uint8_t qexp[QROOT];
586 static inline int mirror(int v, int m){
587 while((unsigned)v > (unsigned)m){
594 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
598 const int a= FFABS(v);
599 const int e= av_log2(a);
601 const int el= FFMIN(e, 10);
602 put_rac(c, state+0, 0);
605 put_rac(c, state+1+i, 1); //1..10
608 put_rac(c, state+1+9, 1); //1..10
610 put_rac(c, state+1+FFMIN(i,9), 0);
612 for(i=e-1; i>=el; i--){
613 put_rac(c, state+22+9, (a>>i)&1); //22..31
616 put_rac(c, state+22+i, (a>>i)&1); //22..31
620 put_rac(c, state+11 + el, v < 0); //11..21
623 put_rac(c, state+0, 0);
626 put_rac(c, state+1+i, 1); //1..10
628 put_rac(c, state+1+i, 0);
630 for(i=e-1; i>=0; i--){
631 put_rac(c, state+22+i, (a>>i)&1); //22..31
635 put_rac(c, state+11 + e, v < 0); //11..21
638 put_rac(c, state+1+FFMIN(i,9), 1); //1..10
640 put_rac(c, state+1+FFMIN(i,9), 0);
642 for(i=e-1; i>=0; i--){
643 put_rac(c, state+22+FFMIN(i,9), (a>>i)&1); //22..31
647 put_rac(c, state+11 + FFMIN(e,10), v < 0); //11..21
651 put_rac(c, state+0, 1);
655 static inline int get_symbol(RangeCoder *c, uint8_t *state, int is_signed){
656 if(get_rac(c, state+0))
661 while(get_rac(c, state+1 + FFMIN(e,9))){ //1..10
666 for(i=e-1; i>=0; i--){
667 a += a + get_rac(c, state+22 + FFMIN(i,9)); //22..31
670 if(is_signed && get_rac(c, state+11 + FFMIN(e,10))) //11..21
677 static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
679 int r= log2>=0 ? 1<<log2 : 1;
685 put_rac(c, state+4+log2, 1);
690 put_rac(c, state+4+log2, 0);
692 for(i=log2-1; i>=0; i--){
693 put_rac(c, state+31-i, (v>>i)&1);
697 static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
699 int r= log2>=0 ? 1<<log2 : 1;
704 while(get_rac(c, state+4+log2)){
710 for(i=log2-1; i>=0; i--){
711 v+= get_rac(c, state+31-i)<<i;
717 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
718 const int mirror_left= !highpass;
719 const int mirror_right= (width&1) ^ highpass;
720 const int w= (width>>1) - 1 + (highpass & width);
723 #define LIFT(src, ref, inv) ((src) + ((inv) ? - (ref) : + (ref)))
725 dst[0] = LIFT(src[0], ((mul*2*ref[0]+add)>>shift), inverse);
731 dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
735 dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
740 static av_always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
741 const int mirror_left= !highpass;
742 const int mirror_right= (width&1) ^ highpass;
743 const int w= (width>>1) - 1 + (highpass & width);
750 dst[0] = LIFT(src[0], ((r+add)>>shift), inverse);
756 int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
759 dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
763 int r= 3*2*ref[w*ref_step];
766 dst[w*dst_step] = LIFT(src[w*src_step], ((r+add)>>shift), inverse);
772 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst_step, int src_step, int ref_step, int width, int mul, int add, int shift, int highpass, int inverse){
773 const int mirror_left= !highpass;
774 const int mirror_right= (width&1) ^ highpass;
775 const int w= (width>>1) - 1 + (highpass & width);
779 #define LIFTS(src, ref, inv) ((inv) ? (src) - (((ref) - 4*(src))>>shift): (16*4*(src) + 4*(ref) + 8 + (5<<27))/(5*16) - (1<<23))
781 dst[0] = LIFTS(src[0], mul*2*ref[0]+add, inverse);
787 dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
791 dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
797 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
800 for(x=start; x<width; x+=2){
804 int x2= x + 2*i - n + 1;
806 else if(x2>=width) x2= 2*width-x2-2;
807 sum += coeffs[i]*(int64_t)dst[x2];
809 if(inverse) dst[x] -= (sum + (1<<shift)/2)>>shift;
810 else dst[x] += (sum + (1<<shift)/2)>>shift;
814 static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *coeffs, int n, int shift, int start, int inverse){
816 for(y=start; y<height; y+=2){
817 for(x=0; x<width; x++){
821 int y2= y + 2*i - n + 1;
823 else if(y2>=height) y2= 2*height-y2-2;
824 sum += coeffs[i]*(int64_t)dst[x + y2*stride];
826 if(inverse) dst[x + y*stride] -= (sum + (1<<shift)/2)>>shift;
827 else dst[x + y*stride] += (sum + (1<<shift)/2)>>shift;
836 #if 0 // more accurate 9/7
839 #define COEFFS1 (int[]){-25987,-25987}
842 #define COEFFS2 (int[]){-27777,-27777}
845 #define COEFFS3 (int[]){28931,28931}
848 #define COEFFS4 (int[]){14533,14533}
852 #define COEFFS1 (int[]){1,-9,-9,1}
855 #define COEFFS2 (int[]){-1,5,5,-1}
868 #define COEFFS1 (int[]){1,1}
871 #define COEFFS2 (int[]){-1,-1}
884 #define COEFFS2 (int[]){-1,-1}
887 #define COEFFS3 (int[]){-1,-1}
890 #define COEFFS4 (int[]){-5,29,29,-5}
895 #define COEFFS1 (int[]){-203,-203}
898 #define COEFFS2 (int[]){-217,-217}
901 #define COEFFS3 (int[]){113,113}
904 #define COEFFS4 (int[]){227,227}
912 #define COEFFS2 (int[]){-1,-1}
915 #define COEFFS3 (int[]){-1,-1}
918 #define COEFFS4 (int[]){3,3}
922 #define COEFFS1 (int[]){1,-9,-9,1}
925 #define COEFFS2 (int[]){1,1}
935 #define COEFFS1 (int[]){1,-9,-9,1}
938 #define COEFFS2 (int[]){-1,5,5,-1}
946 static void horizontal_decomposeX(DWTELEM *b, int width){
948 const int width2= width>>1;
949 const int w2= (width+1)>>1;
952 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 0);
953 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
954 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
955 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
957 for(x=0; x<width2; x++){
959 temp[x+w2]= b[2*x + 1];
963 memcpy(b, temp, width*sizeof(int));
966 static void horizontal_composeX(DWTELEM *b, int width){
968 const int width2= width>>1;
970 const int w2= (width+1)>>1;
972 memcpy(temp, b, width*sizeof(int));
973 for(x=0; x<width2; x++){
975 b[2*x + 1]= temp[x+w2];
980 inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 1);
981 inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 1);
982 inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 1);
983 inplace_lift(b, width, COEFFS1, N1, SHIFT1, LX1, 1);
986 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
989 for(y=0; y<height; y++){
990 for(x=0; x<width; x++){
991 buffer[y*stride + x] *= SCALEX;
995 for(y=0; y<height; y++){
996 horizontal_decomposeX(buffer + y*stride, width);
999 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
1000 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
1001 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
1002 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
1005 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
1008 inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
1009 inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
1010 inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
1011 inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 1);
1013 for(y=0; y<height; y++){
1014 horizontal_composeX(buffer + y*stride, width);
1017 for(y=0; y<height; y++){
1018 for(x=0; x<width; x++){
1019 buffer[y*stride + x] /= SCALEX;
1024 static void horizontal_decompose53i(DWTELEM *b, int width){
1025 DWTELEM temp[width];
1026 const int width2= width>>1;
1028 const int w2= (width+1)>>1;
1030 for(x=0; x<width2; x++){
1032 temp[x+w2]= b[2*x + 1];
1046 for(x=1; x+1<width2; x+=2){
1050 A2 += (A1 + A3 + 2)>>2;
1054 A1= temp[x+1+width2];
1057 A4 += (A1 + A3 + 2)>>2;
1063 A2 += (A1 + A3 + 2)>>2;
1068 lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
1069 lift(b , temp , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
1073 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1076 for(i=0; i<width; i++){
1077 b1[i] -= (b0[i] + b2[i])>>1;
1081 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1084 for(i=0; i<width; i++){
1085 b1[i] += (b0[i] + b2[i] + 2)>>2;
1089 static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int stride){
1091 DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
1092 DWTELEM *b1= buffer + mirror(-2 , height-1)*stride;
1094 for(y=-2; y<height; y+=2){
1095 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1096 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1099 if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
1100 if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
1101 STOP_TIMER("horizontal_decompose53i")}
1104 if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
1105 if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
1106 STOP_TIMER("vertical_decompose53i*")}
1113 static void horizontal_decompose97i(DWTELEM *b, int width){
1114 DWTELEM temp[width];
1115 const int w2= (width+1)>>1;
1117 lift (temp+w2, b +1, b , 1, 2, 2, width, -W_AM, W_AO, W_AS, 1, 0);
1118 liftS(temp , b , temp+w2, 1, 2, 1, width, -W_BM, W_BO, W_BS, 0, 0);
1119 lift5(b +w2, temp+w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
1120 lift (b , temp , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
1124 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1127 for(i=0; i<width; i++){
1128 b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1132 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1135 for(i=0; i<width; i++){
1137 b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1139 int r= 3*(b0[i] + b2[i]);
1142 b1[i] += (r+W_CO)>>W_CS;
1147 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1150 for(i=0; i<width; i++){
1152 b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1154 b1[i] = (16*4*b1[i] - 4*(b0[i] + b2[i]) + 8*5 + (5<<27)) / (5*16) - (1<<23);
1159 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1162 for(i=0; i<width; i++){
1163 b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1167 static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int stride){
1169 DWTELEM *b0= buffer + mirror(-4-1, height-1)*stride;
1170 DWTELEM *b1= buffer + mirror(-4 , height-1)*stride;
1171 DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
1172 DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
1174 for(y=-4; y<height; y+=2){
1175 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1176 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1179 if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
1180 if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
1182 STOP_TIMER("horizontal_decompose97i")
1186 if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
1187 if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
1188 if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
1189 if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
1192 STOP_TIMER("vertical_decompose97i")
1202 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1205 for(level=0; level<decomposition_count; level++){
1207 case DWT_97: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
1208 case DWT_53: spatial_decompose53i(buffer, width>>level, height>>level, stride<<level); break;
1209 case DWT_X: spatial_decomposeX (buffer, width>>level, height>>level, stride<<level); break;
1214 static void horizontal_compose53i(DWTELEM *b, int width){
1215 DWTELEM temp[width];
1216 const int width2= width>>1;
1217 const int w2= (width+1)>>1;
1229 for(x=1; x+1<width2; x+=2){
1233 A2 += (A1 + A3 + 2)>>2;
1237 A1= temp[x+1+width2];
1240 A4 += (A1 + A3 + 2)>>2;
1246 A2 += (A1 + A3 + 2)>>2;
1250 lift(temp , b , b+w2, 1, 1, 1, width, 1, 2, 2, 0, 1);
1251 lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
1253 for(x=0; x<width2; x++){
1255 b[2*x + 1]= temp[x+w2];
1261 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1264 for(i=0; i<width; i++){
1265 b1[i] += (b0[i] + b2[i])>>1;
1269 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1272 for(i=0; i<width; i++){
1273 b1[i] -= (b0[i] + b2[i] + 2)>>2;
1277 static void spatial_compose53i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1278 cs->b0 = slice_buffer_get_line(sb, mirror(-1-1, height-1) * stride_line);
1279 cs->b1 = slice_buffer_get_line(sb, mirror(-1 , height-1) * stride_line);
1283 static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1284 cs->b0 = buffer + mirror(-1-1, height-1)*stride;
1285 cs->b1 = buffer + mirror(-1 , height-1)*stride;
1289 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1292 DWTELEM *b0= cs->b0;
1293 DWTELEM *b1= cs->b1;
1294 DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
1295 DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
1298 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1299 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1300 STOP_TIMER("vertical_compose53i*")}
1303 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1304 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1305 STOP_TIMER("horizontal_compose53i")}
1312 static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1314 DWTELEM *b0= cs->b0;
1315 DWTELEM *b1= cs->b1;
1316 DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
1317 DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
1320 if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
1321 if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
1322 STOP_TIMER("vertical_compose53i*")}
1325 if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
1326 if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
1327 STOP_TIMER("horizontal_compose53i")}
1334 static void spatial_compose53i(DWTELEM *buffer, int width, int height, int stride){
1336 spatial_compose53i_init(&cs, buffer, height, stride);
1337 while(cs.y <= height)
1338 spatial_compose53i_dy(&cs, buffer, width, height, stride);
1342 void ff_snow_horizontal_compose97i(DWTELEM *b, int width){
1343 DWTELEM temp[width];
1344 const int w2= (width+1)>>1;
1346 lift (temp , b , b +w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
1347 lift5(temp+w2, b +w2, temp , 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 1);
1348 liftS(b , temp , temp+w2, 2, 1, 1, width, -W_BM, W_BO, W_BS, 0, 1);
1349 lift (b+1 , temp+w2, b , 2, 1, 2, width, -W_AM, W_AO, W_AS, 1, 1);
1352 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1355 for(i=0; i<width; i++){
1356 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1360 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1363 for(i=0; i<width; i++){
1365 b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
1367 int r= 3*(b0[i] + b2[i]);
1370 b1[i] -= (r+W_CO)>>W_CS;
1375 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1378 for(i=0; i<width; i++){
1380 b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
1382 b1[i] += (W_BM*(b0[i] + b2[i])+4*b1[i]+W_BO)>>W_BS;
1387 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
1390 for(i=0; i<width; i++){
1391 b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
1395 void ff_snow_vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
1398 for(i=0; i<width; i++){
1402 b4[i] -= (W_DM*(b3[i] + b5[i])+W_DO)>>W_DS;
1404 b3[i] -= (W_CM*(b2[i] + b4[i])+W_CO)>>W_CS;
1406 r= 3*(b2[i] + b4[i]);
1409 b3[i] -= (r+W_CO)>>W_CS;
1412 b2[i] += (W_BM*(b1[i] + b3[i])+W_BO)>>W_BS;
1414 b2[i] += (W_BM*(b1[i] + b3[i])+4*b2[i]+W_BO)>>W_BS;
1416 b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
1420 static void spatial_compose97i_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int height, int stride_line){
1421 cs->b0 = slice_buffer_get_line(sb, mirror(-3-1, height-1) * stride_line);
1422 cs->b1 = slice_buffer_get_line(sb, mirror(-3 , height-1) * stride_line);
1423 cs->b2 = slice_buffer_get_line(sb, mirror(-3+1, height-1) * stride_line);
1424 cs->b3 = slice_buffer_get_line(sb, mirror(-3+2, height-1) * stride_line);
1428 static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int height, int stride){
1429 cs->b0 = buffer + mirror(-3-1, height-1)*stride;
1430 cs->b1 = buffer + mirror(-3 , height-1)*stride;
1431 cs->b2 = buffer + mirror(-3+1, height-1)*stride;
1432 cs->b3 = buffer + mirror(-3+2, height-1)*stride;
1436 static void spatial_compose97i_dy_buffered(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
1439 DWTELEM *b0= cs->b0;
1440 DWTELEM *b1= cs->b1;
1441 DWTELEM *b2= cs->b2;
1442 DWTELEM *b3= cs->b3;
1443 DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
1444 DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
1447 if(y>0 && y+4<height){
1448 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
1450 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1451 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1452 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1453 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1456 STOP_TIMER("vertical_compose97i")}}
1459 if(y-1<(unsigned)height) dsp->horizontal_compose97i(b0, width);
1460 if(y+0<(unsigned)height) dsp->horizontal_compose97i(b1, width);
1461 if(width>400 && y+0<(unsigned)height){
1462 STOP_TIMER("horizontal_compose97i")}}
1471 static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride){
1473 DWTELEM *b0= cs->b0;
1474 DWTELEM *b1= cs->b1;
1475 DWTELEM *b2= cs->b2;
1476 DWTELEM *b3= cs->b3;
1477 DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
1478 DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
1481 if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
1482 if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
1483 if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
1484 if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
1486 STOP_TIMER("vertical_compose97i")}}
1489 if(y-1<(unsigned)height) ff_snow_horizontal_compose97i(b0, width);
1490 if(y+0<(unsigned)height) ff_snow_horizontal_compose97i(b1, width);
1491 if(width>400 && b0 <= b2){
1492 STOP_TIMER("horizontal_compose97i")}}
1501 static void spatial_compose97i(DWTELEM *buffer, int width, int height, int stride){
1503 spatial_compose97i_init(&cs, buffer, height, stride);
1504 while(cs.y <= height)
1505 spatial_compose97i_dy(&cs, buffer, width, height, stride);
1508 static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
1510 for(level=decomposition_count-1; level>=0; level--){
1512 case DWT_97: spatial_compose97i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1513 case DWT_53: spatial_compose53i_buffered_init(cs+level, sb, height>>level, stride_line<<level); break;
1514 /* not slicified yet */
1515 case DWT_X: /*spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;*/
1516 av_log(NULL, AV_LOG_ERROR, "spatial_composeX neither buffered nor slicified yet.\n"); break;
1521 static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1523 for(level=decomposition_count-1; level>=0; level--){
1525 case DWT_97: spatial_compose97i_init(cs+level, buffer, height>>level, stride<<level); break;
1526 case DWT_53: spatial_compose53i_init(cs+level, buffer, height>>level, stride<<level); break;
1527 /* not slicified yet */
1528 case DWT_X: spatial_composeX(buffer, width>>level, height>>level, stride<<level); break;
1533 static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
1534 const int support = type==1 ? 3 : 5;
1538 for(level=decomposition_count-1; level>=0; level--){
1539 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1541 case DWT_97: spatial_compose97i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1543 case DWT_53: spatial_compose53i_dy(cs+level, buffer, width>>level, height>>level, stride<<level);
1551 static void ff_spatial_idwt_buffered_slice(DSPContext *dsp, dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
1552 const int support = type==1 ? 3 : 5;
1556 for(level=decomposition_count-1; level>=0; level--){
1557 while(cs[level].y <= FFMIN((y>>level)+support, height>>level)){
1559 case DWT_97: spatial_compose97i_dy_buffered(dsp, cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1561 case DWT_53: spatial_compose53i_dy_buffered(cs+level, slice_buf, width>>level, height>>level, stride_line<<level);
1569 static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
1572 for(level=decomposition_count-1; level>=0; level--)
1573 spatial_composeX (buffer, width>>level, height>>level, stride<<level);
1575 dwt_compose_t cs[MAX_DECOMPOSITIONS];
1577 ff_spatial_idwt_init(cs, buffer, width, height, stride, type, decomposition_count);
1578 for(y=0; y<height; y+=4)
1579 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type, decomposition_count, y);
1583 static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1584 const int w= b->width;
1585 const int h= b->height;
1597 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1598 v= src[x + y*stride];
1601 t= src[x + (y-1)*stride];
1603 lt= src[x - 1 + (y-1)*stride];
1606 rt= src[x + 1 + (y-1)*stride];
1610 l= src[x - 1 + y*stride];
1612 if(orientation==1) ll= src[y + (x-2)*stride];
1613 else ll= src[x - 2 + y*stride];
1619 if(px<b->parent->width && py<b->parent->height)
1620 p= parent[px + py*2*stride];
1622 if(!(/*ll|*/l|lt|t|rt|p)){
1624 runs[run_index++]= run;
1632 max_index= run_index;
1633 runs[run_index++]= run;
1635 run= runs[run_index++];
1637 put_symbol2(&s->c, b->state[30], max_index, 0);
1638 if(run_index <= max_index)
1639 put_symbol2(&s->c, b->state[1], run, 3);
1642 if(s->c.bytestream_end - s->c.bytestream < w*40){
1643 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1648 int /*ll=0, */l=0, lt=0, t=0, rt=0;
1649 v= src[x + y*stride];
1652 t= src[x + (y-1)*stride];
1654 lt= src[x - 1 + (y-1)*stride];
1657 rt= src[x + 1 + (y-1)*stride];
1661 l= src[x - 1 + y*stride];
1663 if(orientation==1) ll= src[y + (x-2)*stride];
1664 else ll= src[x - 2 + y*stride];
1670 if(px<b->parent->width && py<b->parent->height)
1671 p= parent[px + py*2*stride];
1673 if(/*ll|*/l|lt|t|rt|p){
1674 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1676 put_rac(&s->c, &b->state[0][context], !!v);
1679 run= runs[run_index++];
1681 if(run_index <= max_index)
1682 put_symbol2(&s->c, b->state[1], run, 3);
1690 int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
1691 int l2= 2*FFABS(l) + (l<0);
1692 int t2= 2*FFABS(t) + (t<0);
1694 put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
1695 put_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l2&0xFF] + 3*quant3bA[t2&0xFF]], v<0);
1703 static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
1704 // encode_subband_qtree(s, b, src, parent, stride, orientation);
1705 // encode_subband_z0run(s, b, src, parent, stride, orientation);
1706 return encode_subband_c0run(s, b, src, parent, stride, orientation);
1707 // encode_subband_dzr(s, b, src, parent, stride, orientation);
1710 static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, int orientation){
1711 const int w= b->width;
1712 const int h= b->height;
1717 x_and_coeff *xc= b->x_coeff;
1718 x_and_coeff *prev_xc= NULL;
1719 x_and_coeff *prev2_xc= xc;
1720 x_and_coeff *parent_xc= parent ? parent->x_coeff : NULL;
1721 x_and_coeff *prev_parent_xc= parent_xc;
1723 runs= get_symbol2(&s->c, b->state[30], 0);
1724 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1729 int lt=0, t=0, rt=0;
1731 if(y && prev_xc->x == 0){
1743 if(prev_xc->x == x + 1)
1749 if(x>>1 > parent_xc->x){
1752 if(x>>1 == parent_xc->x){
1753 p= parent_xc->coeff;
1756 if(/*ll|*/l|lt|t|rt|p){
1757 int context= av_log2(/*FFABS(ll) + */3*(l>>1) + (lt>>1) + (t&~1) + (rt>>1) + (p>>1));
1759 v=get_rac(&s->c, &b->state[0][context]);
1761 v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
1762 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
1769 if(runs-- > 0) run= get_symbol2(&s->c, b->state[1], 3);
1771 v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
1772 v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
1781 if(y) max_run= FFMIN(run, prev_xc->x - x - 2);
1782 else max_run= FFMIN(run, w-x-1);
1784 max_run= FFMIN(max_run, 2*parent_xc->x - x - 1);
1790 (xc++)->x= w+1; //end marker
1796 while(parent_xc->x != parent->width+1)
1799 prev_parent_xc= parent_xc;
1801 parent_xc= prev_parent_xc;
1806 (xc++)->x= w+1; //end marker
1810 static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, slice_buffer * sb, int start_y, int h, int save_state[1]){
1811 const int w= b->width;
1813 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1814 int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1815 int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1820 if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
1825 /* If we are on the second or later slice, restore our index. */
1827 new_index = save_state[0];
1830 for(y=start_y; y<h; y++){
1833 DWTELEM * line = slice_buffer_get_line(sb, y * b->stride_line + b->buf_y_offset) + b->buf_x_offset;
1834 memset(line, 0, b->width*sizeof(DWTELEM));
1835 v = b->x_coeff[new_index].coeff;
1836 x = b->x_coeff[new_index++].x;
1839 register int t= ( (v>>1)*qmul + qadd)>>QEXPSHIFT;
1840 register int u= -(v&1);
1841 line[x] = (t^u) - u;
1843 v = b->x_coeff[new_index].coeff;
1844 x = b->x_coeff[new_index++].x;
1847 if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
1848 STOP_TIMER("decode_subband")
1851 /* Save our variables for the next slice. */
1852 save_state[0] = new_index;
1857 static void reset_contexts(SnowContext *s){ //FIXME better initial contexts
1858 int plane_index, level, orientation;
1860 for(plane_index=0; plane_index<3; plane_index++){
1861 for(level=0; level<s->spatial_decomposition_count; level++){
1862 for(orientation=level ? 1:0; orientation<4; orientation++){
1863 memset(s->plane[plane_index].band[level][orientation].state, MID_STATE, sizeof(s->plane[plane_index].band[level][orientation].state));
1867 memset(s->header_state, MID_STATE, sizeof(s->header_state));
1868 memset(s->block_state, MID_STATE, sizeof(s->block_state));
1871 static int alloc_blocks(SnowContext *s){
1872 int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
1873 int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
1878 s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
1882 static inline void copy_rac_state(RangeCoder *d, RangeCoder *s){
1883 uint8_t *bytestream= d->bytestream;
1884 uint8_t *bytestream_start= d->bytestream_start;
1886 d->bytestream= bytestream;
1887 d->bytestream_start= bytestream_start;
1890 //near copy & paste from dsputil, FIXME
1891 static int pix_sum(uint8_t * pix, int line_size, int w)
1896 for (i = 0; i < w; i++) {
1897 for (j = 0; j < w; j++) {
1901 pix += line_size - w;
1906 //near copy & paste from dsputil, FIXME
1907 static int pix_norm1(uint8_t * pix, int line_size, int w)
1910 uint32_t *sq = ff_squareTbl + 256;
1913 for (i = 0; i < w; i++) {
1914 for (j = 0; j < w; j ++) {
1918 pix += line_size - w;
1923 static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, int cb, int cr, int mx, int my, int ref, int type){
1924 const int w= s->b_width << s->block_max_depth;
1925 const int rem_depth= s->block_max_depth - level;
1926 const int index= (x + y*w) << rem_depth;
1927 const int block_w= 1<<rem_depth;
1940 for(j=0; j<block_w; j++){
1941 for(i=0; i<block_w; i++){
1942 s->block[index + i + j*w]= block;
1947 static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3], uint8_t *ref2[3], int x, int y, int ref_index){
1948 const int offset[3]= {
1950 ((y*c->uvstride + x)>>1),
1951 ((y*c->uvstride + x)>>1),
1955 c->src[0][i]= src [i];
1956 c->ref[0][i]= ref [i] + offset[i];
1961 static inline void pred_mv(SnowContext *s, int *mx, int *my, int ref,
1962 const BlockNode *left, const BlockNode *top, const BlockNode *tr){
1963 if(s->ref_frames == 1){
1964 *mx = mid_pred(left->mx, top->mx, tr->mx);
1965 *my = mid_pred(left->my, top->my, tr->my);
1967 const int *scale = scale_mv_ref[ref];
1968 *mx = mid_pred((left->mx * scale[left->ref] + 128) >>8,
1969 (top ->mx * scale[top ->ref] + 128) >>8,
1970 (tr ->mx * scale[tr ->ref] + 128) >>8);
1971 *my = mid_pred((left->my * scale[left->ref] + 128) >>8,
1972 (top ->my * scale[top ->ref] + 128) >>8,
1973 (tr ->my * scale[tr ->ref] + 128) >>8);
1980 #define P_TOPRIGHT P[3]
1981 #define P_MEDIAN P[4]
1983 #define FLAG_QPEL 1 //must be 1
1985 static int encode_q_branch(SnowContext *s, int level, int x, int y){
1986 uint8_t p_buffer[1024];
1987 uint8_t i_buffer[1024];
1988 uint8_t p_state[sizeof(s->block_state)];
1989 uint8_t i_state[sizeof(s->block_state)];
1991 uint8_t *pbbak= s->c.bytestream;
1992 uint8_t *pbbak_start= s->c.bytestream_start;
1993 int score, score2, iscore, i_len, p_len, block_s, sum;
1994 const int w= s->b_width << s->block_max_depth;
1995 const int h= s->b_height << s->block_max_depth;
1996 const int rem_depth= s->block_max_depth - level;
1997 const int index= (x + y*w) << rem_depth;
1998 const int block_w= 1<<(LOG2_MB_SIZE - level);
1999 int trx= (x+1)<<rem_depth;
2000 int try= (y+1)<<rem_depth;
2001 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2002 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2003 const BlockNode *right = trx<w ? &s->block[index+1] : &null_block;
2004 const BlockNode *bottom= try<h ? &s->block[index+w] : &null_block;
2005 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2006 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2007 int pl = left->color[0];
2008 int pcb= left->color[1];
2009 int pcr= left->color[2];
2013 const int stride= s->current_picture.linesize[0];
2014 const int uvstride= s->current_picture.linesize[1];
2015 uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y* stride)*block_w,
2016 s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
2017 s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
2019 int16_t last_mv[3][2];
2020 int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
2021 const int shift= 1+qpel;
2022 MotionEstContext *c= &s->m.me;
2023 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2024 int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2025 int my_context= av_log2(2*FFABS(left->my - top->my));
2026 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2027 int ref, best_ref, ref_score, ref_mx, ref_my;
2029 assert(sizeof(s->block_state) >= 256);
2031 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2035 // clip predictors / edge ?
2037 P_LEFT[0]= left->mx;
2038 P_LEFT[1]= left->my;
2041 P_TOPRIGHT[0]= tr->mx;
2042 P_TOPRIGHT[1]= tr->my;
2044 last_mv[0][0]= s->block[index].mx;
2045 last_mv[0][1]= s->block[index].my;
2046 last_mv[1][0]= right->mx;
2047 last_mv[1][1]= right->my;
2048 last_mv[2][0]= bottom->mx;
2049 last_mv[2][1]= bottom->my;
2056 assert(c-> stride == stride);
2057 assert(c->uvstride == uvstride);
2059 c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
2060 c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
2061 c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
2062 c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
2064 c->xmin = - x*block_w - 16+2;
2065 c->ymin = - y*block_w - 16+2;
2066 c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2067 c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
2069 if(P_LEFT[0] > (c->xmax<<shift)) P_LEFT[0] = (c->xmax<<shift);
2070 if(P_LEFT[1] > (c->ymax<<shift)) P_LEFT[1] = (c->ymax<<shift);
2071 if(P_TOP[0] > (c->xmax<<shift)) P_TOP[0] = (c->xmax<<shift);
2072 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1] = (c->ymax<<shift);
2073 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
2074 if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift); //due to pmx no clip
2075 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
2077 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
2078 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
2081 c->pred_x= P_LEFT[0];
2082 c->pred_y= P_LEFT[1];
2084 c->pred_x = P_MEDIAN[0];
2085 c->pred_y = P_MEDIAN[1];
2090 for(ref=0; ref<s->ref_frames; ref++){
2091 init_ref(c, current_data, s->last_picture[ref].data, NULL, block_w*x, block_w*y, 0);
2093 ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
2094 (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
2096 assert(ref_mx >= c->xmin);
2097 assert(ref_mx <= c->xmax);
2098 assert(ref_my >= c->ymin);
2099 assert(ref_my <= c->ymax);
2101 ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
2102 ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
2103 ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
2104 if(s->ref_mvs[ref]){
2105 s->ref_mvs[ref][index][0]= ref_mx;
2106 s->ref_mvs[ref][index][1]= ref_my;
2107 s->ref_scores[ref][index]= ref_score;
2109 if(score > ref_score){
2116 //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
2120 pc.bytestream_start=
2121 pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
2122 memcpy(p_state, s->block_state, sizeof(s->block_state));
2124 if(level!=s->block_max_depth)
2125 put_rac(&pc, &p_state[4 + s_context], 1);
2126 put_rac(&pc, &p_state[1 + left->type + top->type], 0);
2127 if(s->ref_frames > 1)
2128 put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
2129 pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
2130 put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
2131 put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
2132 p_len= pc.bytestream - pc.bytestream_start;
2133 score += (s->lambda2*(p_len*8
2134 + (pc.outstanding_count - s->c.outstanding_count)*8
2135 + (-av_log2(pc.range) + av_log2(s->c.range))
2136 ))>>FF_LAMBDA_SHIFT;
2138 block_s= block_w*block_w;
2139 sum = pix_sum(current_data[0], stride, block_w);
2140 l= (sum + block_s/2)/block_s;
2141 iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
2143 block_s= block_w*block_w>>2;
2144 sum = pix_sum(current_data[1], uvstride, block_w>>1);
2145 cb= (sum + block_s/2)/block_s;
2146 // iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
2147 sum = pix_sum(current_data[2], uvstride, block_w>>1);
2148 cr= (sum + block_s/2)/block_s;
2149 // iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
2152 ic.bytestream_start=
2153 ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
2154 memcpy(i_state, s->block_state, sizeof(s->block_state));
2155 if(level!=s->block_max_depth)
2156 put_rac(&ic, &i_state[4 + s_context], 1);
2157 put_rac(&ic, &i_state[1 + left->type + top->type], 1);
2158 put_symbol(&ic, &i_state[32], l-pl , 1);
2159 put_symbol(&ic, &i_state[64], cb-pcb, 1);
2160 put_symbol(&ic, &i_state[96], cr-pcr, 1);
2161 i_len= ic.bytestream - ic.bytestream_start;
2162 iscore += (s->lambda2*(i_len*8
2163 + (ic.outstanding_count - s->c.outstanding_count)*8
2164 + (-av_log2(ic.range) + av_log2(s->c.range))
2165 ))>>FF_LAMBDA_SHIFT;
2167 // assert(score==256*256*256*64-1);
2168 assert(iscore < 255*255*256 + s->lambda2*10);
2169 assert(iscore >= 0);
2170 assert(l>=0 && l<=255);
2171 assert(pl>=0 && pl<=255);
2174 int varc= iscore >> 8;
2175 int vard= score >> 8;
2176 if (vard <= 64 || vard < varc)
2177 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
2179 c->scene_change_score+= s->m.qscale;
2182 if(level!=s->block_max_depth){
2183 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2184 score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
2185 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
2186 score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
2187 score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
2188 score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
2190 if(score2 < score && score2 < iscore)
2195 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2196 memcpy(pbbak, i_buffer, i_len);
2198 s->c.bytestream_start= pbbak_start;
2199 s->c.bytestream= pbbak + i_len;
2200 set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
2201 memcpy(s->block_state, i_state, sizeof(s->block_state));
2204 memcpy(pbbak, p_buffer, p_len);
2206 s->c.bytestream_start= pbbak_start;
2207 s->c.bytestream= pbbak + p_len;
2208 set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
2209 memcpy(s->block_state, p_state, sizeof(s->block_state));
2214 static av_always_inline int same_block(BlockNode *a, BlockNode *b){
2215 if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
2216 return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
2218 return !((a->mx - b->mx) | (a->my - b->my) | (a->ref - b->ref) | ((a->type ^ b->type)&BLOCK_INTRA));
2222 static void encode_q_branch2(SnowContext *s, int level, int x, int y){
2223 const int w= s->b_width << s->block_max_depth;
2224 const int rem_depth= s->block_max_depth - level;
2225 const int index= (x + y*w) << rem_depth;
2226 int trx= (x+1)<<rem_depth;
2227 BlockNode *b= &s->block[index];
2228 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2229 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2230 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2231 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2232 int pl = left->color[0];
2233 int pcb= left->color[1];
2234 int pcr= left->color[2];
2236 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2237 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
2238 int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
2239 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2242 set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
2246 if(level!=s->block_max_depth){
2247 if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
2248 put_rac(&s->c, &s->block_state[4 + s_context], 1);
2250 put_rac(&s->c, &s->block_state[4 + s_context], 0);
2251 encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
2252 encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
2253 encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
2254 encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
2258 if(b->type & BLOCK_INTRA){
2259 pred_mv(s, &pmx, &pmy, 0, left, top, tr);
2260 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
2261 put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
2262 put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
2263 put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
2264 set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
2266 pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
2267 put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
2268 if(s->ref_frames > 1)
2269 put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
2270 put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
2271 put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
2272 set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
2276 static void decode_q_branch(SnowContext *s, int level, int x, int y){
2277 const int w= s->b_width << s->block_max_depth;
2278 const int rem_depth= s->block_max_depth - level;
2279 const int index= (x + y*w) << rem_depth;
2280 int trx= (x+1)<<rem_depth;
2281 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2282 const BlockNode *top = y ? &s->block[index-w] : &null_block;
2283 const BlockNode *tl = y && x ? &s->block[index-w-1] : left;
2284 const BlockNode *tr = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
2285 int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
2288 set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, null_block.ref, BLOCK_INTRA);
2292 if(level==s->block_max_depth || get_rac(&s->c, &s->block_state[4 + s_context])){
2294 int l = left->color[0];
2295 int cb= left->color[1];
2296 int cr= left->color[2];
2298 int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
2299 int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 0*av_log2(2*FFABS(tr->mx - top->mx));
2300 int my_context= av_log2(2*FFABS(left->my - top->my)) + 0*av_log2(2*FFABS(tr->my - top->my));
2302 type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
2305 pred_mv(s, &mx, &my, 0, left, top, tr);
2306 l += get_symbol(&s->c, &s->block_state[32], 1);
2307 cb+= get_symbol(&s->c, &s->block_state[64], 1);
2308 cr+= get_symbol(&s->c, &s->block_state[96], 1);
2310 if(s->ref_frames > 1)
2311 ref= get_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], 0);
2312 pred_mv(s, &mx, &my, ref, left, top, tr);
2313 mx+= get_symbol(&s->c, &s->block_state[128 + 32*(mx_context + 16*!!ref)], 1);
2314 my+= get_symbol(&s->c, &s->block_state[128 + 32*(my_context + 16*!!ref)], 1);
2316 set_blocks(s, level, x, y, l, cb, cr, mx, my, ref, type);
2318 decode_q_branch(s, level+1, 2*x+0, 2*y+0);
2319 decode_q_branch(s, level+1, 2*x+1, 2*y+0);
2320 decode_q_branch(s, level+1, 2*x+0, 2*y+1);
2321 decode_q_branch(s, level+1, 2*x+1, 2*y+1);
2325 static void encode_blocks(SnowContext *s, int search){
2330 if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
2334 if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
2335 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
2339 if(s->avctx->me_method == ME_ITER || !search)
2340 encode_q_branch2(s, 0, x, y);
2342 encode_q_branch (s, 0, x, y);
2347 static void decode_blocks(SnowContext *s){
2354 decode_q_branch(s, 0, x, y);
2359 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
2362 for(y=0; y < b_h+5; y++){
2363 for(x=0; x < b_w; x++){
2370 // int am= 9*(a1+a2) - (a0+a3);
2371 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2372 // int am= 18*(a2+a3) - 2*(a1+a4);
2373 // int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2374 // int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;
2376 // if(b_w==16) am= 8*(a1+a2);
2378 if(dx<8) am = (32*a2*( 8-dx) + am* dx + 128)>>8;
2379 else am = ( am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
2381 /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
2382 if(am&(~255)) am= ~(am>>31);
2386 /* if (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) + aL* dx + 32)>>6;
2387 else if(dx< 8) tmp[x + y*stride]= ( aL*( 8-dx) + am*(dx- 4) + 32)>>6;
2388 else if(dx<12) tmp[x + y*stride]= ( am*(12-dx) + aR*(dx- 8) + 32)>>6;
2389 else tmp[x + y*stride]= ( aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
2394 tmp -= (b_h+5)*stride;
2396 for(y=0; y < b_h; y++){
2397 for(x=0; x < b_w; x++){
2398 int a0= tmp[x + 0*stride];
2399 int a1= tmp[x + 1*stride];
2400 int a2= tmp[x + 2*stride];
2401 int a3= tmp[x + 3*stride];
2402 int a4= tmp[x + 4*stride];
2403 int a5= tmp[x + 5*stride];
2404 int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
2405 // int am= 18*(a2+a3) - 2*(a1+a4);
2406 /* int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
2407 int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
2409 // if(b_w==16) am= 8*(a1+a2);
2411 if(dy<8) am = (32*a2*( 8-dy) + am* dy + 128)>>8;
2412 else am = ( am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
2414 if(am&(~255)) am= ~(am>>31);
2417 /* if (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) + aL* dy + 32)>>6;
2418 else if(dy< 8) tmp[x + y*stride]= ( aL*( 8-dy) + am*(dy- 4) + 32)>>6;
2419 else if(dy<12) tmp[x + y*stride]= ( am*(12-dy) + aR*(dy- 8) + 32)>>6;
2420 else tmp[x + y*stride]= ( aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
2425 STOP_TIMER("mc_block")
2428 #define mca(dx,dy,b_w)\
2429 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, uint8_t *src, int stride, int h){\
2430 uint8_t tmp[stride*(b_w+5)];\
2432 mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
2444 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
2445 if(block->type & BLOCK_INTRA){
2447 const int color = block->color[plane_index];
2448 const int color4= color*0x01010101;
2450 for(y=0; y < b_h; y++){
2451 *(uint32_t*)&dst[0 + y*stride]= color4;
2452 *(uint32_t*)&dst[4 + y*stride]= color4;
2453 *(uint32_t*)&dst[8 + y*stride]= color4;
2454 *(uint32_t*)&dst[12+ y*stride]= color4;
2455 *(uint32_t*)&dst[16+ y*stride]= color4;
2456 *(uint32_t*)&dst[20+ y*stride]= color4;
2457 *(uint32_t*)&dst[24+ y*stride]= color4;
2458 *(uint32_t*)&dst[28+ y*stride]= color4;
2461 for(y=0; y < b_h; y++){
2462 *(uint32_t*)&dst[0 + y*stride]= color4;
2463 *(uint32_t*)&dst[4 + y*stride]= color4;
2464 *(uint32_t*)&dst[8 + y*stride]= color4;
2465 *(uint32_t*)&dst[12+ y*stride]= color4;
2468 for(y=0; y < b_h; y++){
2469 *(uint32_t*)&dst[0 + y*stride]= color4;
2470 *(uint32_t*)&dst[4 + y*stride]= color4;
2473 for(y=0; y < b_h; y++){
2474 *(uint32_t*)&dst[0 + y*stride]= color4;
2477 for(y=0; y < b_h; y++){
2478 for(x=0; x < b_w; x++){
2479 dst[x + y*stride]= color;
2484 uint8_t *src= s->last_picture[block->ref].data[plane_index];
2485 const int scale= plane_index ? s->mv_scale : 2*s->mv_scale;
2486 int mx= block->mx*scale;
2487 int my= block->my*scale;
2488 const int dx= mx&15;
2489 const int dy= my&15;
2490 const int tab_index= 3 - (b_w>>2) + (b_w>>4);
2493 src += sx + sy*stride;
2494 if( (unsigned)sx >= w - b_w - 4
2495 || (unsigned)sy >= h - b_h - 4){
2496 ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
2499 // assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
2500 // assert(!(b_w&(b_w-1)));
2501 assert(b_w>1 && b_h>1);
2502 assert(tab_index>=0 && tab_index<4 || b_w==32);
2503 if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)))
2504 mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
2507 for(y=0; y<b_h; y+=16){
2508 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
2509 s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
2512 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
2513 else if(b_w==2*b_h){
2514 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride);
2515 s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
2518 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride);
2519 s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
2524 void ff_snow_inner_add_yblock(const uint8_t *obmc, const int obmc_stride, uint8_t * * block, int b_w, int b_h,
2525 int src_x, int src_y, int src_stride, slice_buffer * sb, int add, uint8_t * dst8){
2528 for(y=0; y<b_h; y++){
2529 //FIXME ugly missue of obmc_stride
2530 const uint8_t *obmc1= obmc + y*obmc_stride;
2531 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2532 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2533 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2534 dst = slice_buffer_get_line(sb, src_y + y);
2535 for(x=0; x<b_w; x++){
2536 int v= obmc1[x] * block[3][x + y*src_stride]
2537 +obmc2[x] * block[2][x + y*src_stride]
2538 +obmc3[x] * block[1][x + y*src_stride]
2539 +obmc4[x] * block[0][x + y*src_stride];
2541 v <<= 8 - LOG2_OBMC_MAX;
2543 v += 1<<(7 - FRAC_BITS);
2544 v >>= 8 - FRAC_BITS;
2547 v += dst[x + src_x];
2548 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2549 if(v&(~255)) v= ~(v>>31);
2550 dst8[x + y*src_stride] = v;
2552 dst[x + src_x] -= v;
2558 //FIXME name clenup (b_w, block_w, b_width stuff)
2559 static av_always_inline void add_yblock(SnowContext *s, int sliced, slice_buffer *sb, DWTELEM *dst, uint8_t *dst8, const uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
2560 const int b_width = s->b_width << s->block_max_depth;
2561 const int b_height= s->b_height << s->block_max_depth;
2562 const int b_stride= b_width;
2563 BlockNode *lt= &s->block[b_x + b_y*b_stride];
2564 BlockNode *rt= lt+1;
2565 BlockNode *lb= lt+b_stride;
2566 BlockNode *rb= lb+1;
2568 int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
2569 uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
2576 }else if(b_x + 1 >= b_width){
2583 }else if(b_y + 1 >= b_height){
2588 if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
2591 if(!sliced && !offset_dst)
2594 }else if(src_x + b_w > w){
2598 obmc -= src_y*obmc_stride;
2600 if(!sliced && !offset_dst)
2601 dst -= src_y*dst_stride;
2603 }else if(src_y + b_h> h){
2607 if(b_w<=0 || b_h<=0) return;
2609 assert(src_stride > 2*MB_SIZE + 5);
2610 if(!sliced && offset_dst)
2611 dst += src_x + src_y*dst_stride;
2612 dst8+= src_x + src_y*src_stride;
2613 // src += src_x + src_y*src_stride;
2615 ptmp= tmp + 3*tmp_step;
2618 pred_block(s, block[0], tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
2620 if(same_block(lt, rt)){
2625 pred_block(s, block[1], tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
2628 if(same_block(lt, lb)){
2630 }else if(same_block(rt, lb)){
2635 pred_block(s, block[2], tmp, src_stride, src_x, src_y, b_w, b_h, lb, plane_index, w, h);
2638 if(same_block(lt, rb) ){
2640 }else if(same_block(rt, rb)){
2642 }else if(same_block(lb, rb)){
2646 pred_block(s, block[3], tmp, src_stride, src_x, src_y, b_w, b_h, rb, plane_index, w, h);
2649 for(y=0; y<b_h; y++){
2650 for(x=0; x<b_w; x++){
2651 int v= obmc [x + y*obmc_stride] * block[3][x + y*src_stride] * (256/OBMC_MAX);
2652 if(add) dst[x + y*dst_stride] += v;
2653 else dst[x + y*dst_stride] -= v;
2656 for(y=0; y<b_h; y++){
2657 uint8_t *obmc2= obmc + (obmc_stride>>1);
2658 for(x=0; x<b_w; x++){
2659 int v= obmc2[x + y*obmc_stride] * block[2][x + y*src_stride] * (256/OBMC_MAX);
2660 if(add) dst[x + y*dst_stride] += v;
2661 else dst[x + y*dst_stride] -= v;
2664 for(y=0; y<b_h; y++){
2665 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2666 for(x=0; x<b_w; x++){
2667 int v= obmc3[x + y*obmc_stride] * block[1][x + y*src_stride] * (256/OBMC_MAX);
2668 if(add) dst[x + y*dst_stride] += v;
2669 else dst[x + y*dst_stride] -= v;
2672 for(y=0; y<b_h; y++){
2673 uint8_t *obmc3= obmc + obmc_stride*(obmc_stride>>1);
2674 uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2675 for(x=0; x<b_w; x++){
2676 int v= obmc4[x + y*obmc_stride] * block[0][x + y*src_stride] * (256/OBMC_MAX);
2677 if(add) dst[x + y*dst_stride] += v;
2678 else dst[x + y*dst_stride] -= v;
2685 s->dsp.inner_add_yblock(obmc, obmc_stride, block, b_w, b_h, src_x,src_y, src_stride, sb, add, dst8);
2686 STOP_TIMER("inner_add_yblock")
2688 for(y=0; y<b_h; y++){
2689 //FIXME ugly missue of obmc_stride
2690 const uint8_t *obmc1= obmc + y*obmc_stride;
2691 const uint8_t *obmc2= obmc1+ (obmc_stride>>1);
2692 const uint8_t *obmc3= obmc1+ obmc_stride*(obmc_stride>>1);
2693 const uint8_t *obmc4= obmc3+ (obmc_stride>>1);
2694 for(x=0; x<b_w; x++){
2695 int v= obmc1[x] * block[3][x + y*src_stride]
2696 +obmc2[x] * block[2][x + y*src_stride]
2697 +obmc3[x] * block[1][x + y*src_stride]
2698 +obmc4[x] * block[0][x + y*src_stride];
2700 v <<= 8 - LOG2_OBMC_MAX;
2702 v += 1<<(7 - FRAC_BITS);
2703 v >>= 8 - FRAC_BITS;
2706 v += dst[x + y*dst_stride];
2707 v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS;
2708 if(v&(~255)) v= ~(v>>31);
2709 dst8[x + y*src_stride] = v;
2711 dst[x + y*dst_stride] -= v;
2718 static av_always_inline void predict_slice_buffered(SnowContext *s, slice_buffer * sb, DWTELEM * old_buffer, int plane_index, int add, int mb_y){
2719 Plane *p= &s->plane[plane_index];
2720 const int mb_w= s->b_width << s->block_max_depth;
2721 const int mb_h= s->b_height << s->block_max_depth;
2723 int block_size = MB_SIZE >> s->block_max_depth;
2724 int block_w = plane_index ? block_size/2 : block_size;
2725 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2726 int obmc_stride= plane_index ? block_size : 2*block_size;
2727 int ref_stride= s->current_picture.linesize[plane_index];
2728 uint8_t *dst8= s->current_picture.data[plane_index];
2733 if(s->keyframe || (s->avctx->debug&512)){
2738 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2740 // DWTELEM * line = slice_buffer_get_line(sb, y);
2741 DWTELEM * line = sb->line[y];
2744 // int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2745 int v= line[x] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2747 if(v&(~255)) v= ~(v>>31);
2748 dst8[x + y*ref_stride]= v;
2752 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++)
2754 // DWTELEM * line = slice_buffer_get_line(sb, y);
2755 DWTELEM * line = sb->line[y];
2758 line[x] -= 128 << FRAC_BITS;
2759 // buf[x + y*w]-= 128<<FRAC_BITS;
2767 for(mb_x=0; mb_x<=mb_w; mb_x++){
2770 add_yblock(s, 1, sb, old_buffer, dst8, obmc,
2771 block_w*mb_x - block_w/2,
2772 block_w*mb_y - block_w/2,
2775 w, ref_stride, obmc_stride,
2777 add, 0, plane_index);
2779 STOP_TIMER("add_yblock")
2782 STOP_TIMER("predict_slice")
2785 static av_always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_index, int add, int mb_y){
2786 Plane *p= &s->plane[plane_index];
2787 const int mb_w= s->b_width << s->block_max_depth;
2788 const int mb_h= s->b_height << s->block_max_depth;
2790 int block_size = MB_SIZE >> s->block_max_depth;
2791 int block_w = plane_index ? block_size/2 : block_size;
2792 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2793 const int obmc_stride= plane_index ? block_size : 2*block_size;
2794 int ref_stride= s->current_picture.linesize[plane_index];
2795 uint8_t *dst8= s->current_picture.data[plane_index];
2800 if(s->keyframe || (s->avctx->debug&512)){
2805 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2807 int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1));
2809 if(v&(~255)) v= ~(v>>31);
2810 dst8[x + y*ref_stride]= v;
2814 for(y=block_w*mb_y; y<FFMIN(h,block_w*(mb_y+1)); y++){
2816 buf[x + y*w]-= 128<<FRAC_BITS;
2824 for(mb_x=0; mb_x<=mb_w; mb_x++){
2827 add_yblock(s, 0, NULL, buf, dst8, obmc,
2828 block_w*mb_x - block_w/2,
2829 block_w*mb_y - block_w/2,
2832 w, ref_stride, obmc_stride,
2834 add, 1, plane_index);
2836 STOP_TIMER("add_yblock")
2839 STOP_TIMER("predict_slice")
2842 static av_always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_index, int add){
2843 const int mb_h= s->b_height << s->block_max_depth;
2845 for(mb_y=0; mb_y<=mb_h; mb_y++)
2846 predict_slice(s, buf, plane_index, add, mb_y);
2849 static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
2851 Plane *p= &s->plane[plane_index];
2852 const int block_size = MB_SIZE >> s->block_max_depth;
2853 const int block_w = plane_index ? block_size/2 : block_size;
2854 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
2855 const int obmc_stride= plane_index ? block_size : 2*block_size;
2856 const int ref_stride= s->current_picture.linesize[plane_index];
2857 uint8_t *src= s-> input_picture.data[plane_index];
2858 DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2859 const int b_stride = s->b_width << s->block_max_depth;
2860 const int w= p->width;
2861 const int h= p->height;
2862 int index= mb_x + mb_y*b_stride;
2863 BlockNode *b= &s->block[index];
2864 BlockNode backup= *b;
2868 b->type|= BLOCK_INTRA;
2869 b->color[plane_index]= 0;
2870 memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
2873 int mb_x2= mb_x + (i &1) - 1;
2874 int mb_y2= mb_y + (i>>1) - 1;
2875 int x= block_w*mb_x2 + block_w/2;
2876 int y= block_w*mb_y2 + block_w/2;
2878 add_yblock(s, 0, NULL, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, obmc,
2879 x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
2881 for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
2882 for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
2883 int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
2884 int obmc_v= obmc[index];
2886 if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
2887 if(x<0) obmc_v += obmc[index + block_w];
2888 if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
2889 if(x+block_w>w) obmc_v += obmc[index - block_w];
2890 //FIXME precalc this or simplify it somehow else
2892 d = -dst[index] + (1<<(FRAC_BITS-1));
2894 ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
2895 aa += obmc_v * obmc_v; //FIXME precalclate this
2901 return av_clip(((ab<<LOG2_OBMC_MAX) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
2904 static inline int get_block_bits(SnowContext *s, int x, int y, int w){
2905 const int b_stride = s->b_width << s->block_max_depth;
2906 const int b_height = s->b_height<< s->block_max_depth;
2907 int index= x + y*b_stride;
2908 const BlockNode *b = &s->block[index];
2909 const BlockNode *left = x ? &s->block[index-1] : &null_block;
2910 const BlockNode *top = y ? &s->block[index-b_stride] : &null_block;
2911 const BlockNode *tl = y && x ? &s->block[index-b_stride-1] : left;
2912 const BlockNode *tr = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
2914 // int mx_context= av_log2(2*FFABS(left->mx - top->mx));
2915 // int my_context= av_log2(2*FFABS(left->my - top->my));
2917 if(x<0 || x>=b_stride || y>=b_height)
2924 00001XXXX 15-30 8-15
2926 //FIXME try accurate rate
2927 //FIXME intra and inter predictors if surrounding blocks arent the same type
2928 if(b->type & BLOCK_INTRA){
2929 return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
2930 + av_log2(2*FFABS(left->color[1] - b->color[1]))
2931 + av_log2(2*FFABS(left->color[2] - b->color[2])));
2933 pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
2936 return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
2937 + av_log2(2*FFABS(dmy))
2938 + av_log2(2*b->ref));
2942 static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
2943 Plane *p= &s->plane[plane_index];
2944 const int block_size = MB_SIZE >> s->block_max_depth;
2945 const int block_w = plane_index ? block_size/2 : block_size;
2946 const int obmc_stride= plane_index ? block_size : 2*block_size;
2947 const int ref_stride= s->current_picture.linesize[plane_index];
2948 uint8_t *dst= s->current_picture.data[plane_index];
2949 uint8_t *src= s-> input_picture.data[plane_index];
2950 DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
2951 uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
2952 uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
2953 const int b_stride = s->b_width << s->block_max_depth;
2954 const int b_height = s->b_height<< s->block_max_depth;
2955 const int w= p->width;
2956 const int h= p->height;
2959 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
2960 int sx= block_w*mb_x - block_w/2;
2961 int sy= block_w*mb_y - block_w/2;
2962 int x0= FFMAX(0,-sx);
2963 int y0= FFMAX(0,-sy);
2964 int x1= FFMIN(block_w*2, w-sx);
2965 int y1= FFMIN(block_w*2, h-sy);
2968 pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
2970 for(y=y0; y<y1; y++){
2971 const uint8_t *obmc1= obmc_edged + y*obmc_stride;
2972 const DWTELEM *pred1 = pred + y*obmc_stride;
2973 uint8_t *cur1 = cur + y*ref_stride;
2974 uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
2975 for(x=x0; x<x1; x++){
2976 int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
2977 v = (v + pred1[x]) >> FRAC_BITS;
2978 if(v&(~255)) v= ~(v>>31);
2983 /* copy the regions where obmc[] = (uint8_t)256 */
2984 if(LOG2_OBMC_MAX == 8
2985 && (mb_x == 0 || mb_x == b_stride-1)
2986 && (mb_y == 0 || mb_y == b_height-1)){
2995 for(y=y0; y<y1; y++)
2996 memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
3000 /* FIXME rearrange dsputil to fit 32x32 cmp functions */
3001 /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
3002 /* FIXME cmps overlap but don't cover the wavelet's whole support,
3003 * so improving the score of one block is not strictly guaranteed to
3004 * improve the score of the whole frame, so iterative motion est
3005 * doesn't always converge. */
3006 if(s->avctx->me_cmp == FF_CMP_W97)
3007 distortion = w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3008 else if(s->avctx->me_cmp == FF_CMP_W53)
3009 distortion = w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
3013 int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
3014 distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
3019 distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
3028 rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
3030 if(mb_x == b_stride-2)
3031 rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
3033 return distortion + rate*penalty_factor;
3036 static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
3038 Plane *p= &s->plane[plane_index];
3039 const int block_size = MB_SIZE >> s->block_max_depth;
3040 const int block_w = plane_index ? block_size/2 : block_size;
3041 const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
3042 const int obmc_stride= plane_index ? block_size : 2*block_size;
3043 const int ref_stride= s->current_picture.linesize[plane_index];
3044 uint8_t *dst= s->current_picture.data[plane_index];
3045 uint8_t *src= s-> input_picture.data[plane_index];
3046 static const DWTELEM zero_dst[4096]; //FIXME
3047 const int b_stride = s->b_width << s->block_max_depth;
3048 const int w= p->width;
3049 const int h= p->height;
3052 const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
3055 int mb_x2= mb_x + (i%3) - 1;
3056 int mb_y2= mb_y + (i/3) - 1;
3057 int x= block_w*mb_x2 + block_w/2;
3058 int y= block_w*mb_y2 + block_w/2;
3060 add_yblock(s, 0, NULL, zero_dst, dst, obmc,
3061 x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
3063 //FIXME find a cleaner/simpler way to skip the outside stuff
3064 for(y2= y; y2<0; y2++)
3065 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3066 for(y2= h; y2<y+block_w; y2++)
3067 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
3069 for(y2= y; y2<y+block_w; y2++)
3070 memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
3073 for(y2= y; y2<y+block_w; y2++)
3074 memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
3077 assert(block_w== 8 || block_w==16);
3078 distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
3082 BlockNode *b= &s->block[mb_x+mb_y*b_stride];
3083 int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
3091 rate = get_block_bits(s, mb_x, mb_y, 2);
3092 for(i=merged?4:0; i<9; i++){
3093 static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
3094 rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
3097 return distortion + rate*penalty_factor;
3100 static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
3101 const int b_stride= s->b_width << s->block_max_depth;
3102 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3103 BlockNode backup= *block;
3104 int rd, index, value;
3106 assert(mb_x>=0 && mb_y>=0);
3107 assert(mb_x<b_stride);
3110 block->color[0] = p[0];
3111 block->color[1] = p[1];
3112 block->color[2] = p[2];
3113 block->type |= BLOCK_INTRA;
3115 index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
3116 value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
3117 if(s->me_cache[index] == value)
3119 s->me_cache[index]= value;
3123 block->type &= ~BLOCK_INTRA;
3126 rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
3138 /* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
3139 static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, const uint8_t *obmc_edged, int *best_rd){
3140 int p[2] = {p0, p1};
3141 return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
3144 static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
3145 const int b_stride= s->b_width << s->block_max_depth;
3146 BlockNode *block= &s->block[mb_x + mb_y * b_stride];
3147 BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
3148 int rd, index, value;
3150 assert(mb_x>=0 && mb_y>=0);
3151 assert(mb_x<b_stride);
3152 assert(((mb_x|mb_y)&1) == 0);
3154 index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
3155 value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
3156 if(s->me_cache[index] == value)
3158 s->me_cache[index]= value;
3163 block->type &= ~BLOCK_INTRA;
3164 block[1]= block[b_stride]= block[b_stride+1]= *block;
3166 rd= get_4block_rd(s, mb_x, mb_y, 0);
3173 block[0]= backup[0];
3174 block[1]= backup[1];
3175 block[b_stride]= backup[2];
3176 block[b_stride+1]= backup[3];
3181 static void iterative_me(SnowContext *s){
3182 int pass, mb_x, mb_y;
3183 const int b_width = s->b_width << s->block_max_depth;
3184 const int b_height= s->b_height << s->block_max_depth;
3185 const int b_stride= b_width;
3189 RangeCoder r = s->c;
3190 uint8_t state[sizeof(s->block_state)];
3191 memcpy(state, s->block_state, sizeof(s->block_state));
3192 for(mb_y= 0; mb_y<s->b_height; mb_y++)
3193 for(mb_x= 0; mb_x<s->b_width; mb_x++)
3194 encode_q_branch(s, 0, mb_x, mb_y);
3196 memcpy(s->block_state, state, sizeof(s->block_state));
3199 for(pass=0; pass<25; pass++){
3202 for(mb_y= 0; mb_y<b_height; mb_y++){
3203 for(mb_x= 0; mb_x<b_width; mb_x++){
3204 int dia_change, i, j, ref;
3205 int best_rd= INT_MAX, ref_rd;
3206 BlockNode backup, ref_b;
3207 const int index= mb_x + mb_y * b_stride;
3208 BlockNode *block= &s->block[index];
3209 BlockNode *tb = mb_y ? &s->block[index-b_stride ] : NULL;
3210 BlockNode *lb = mb_x ? &s->block[index -1] : NULL;
3211 BlockNode *rb = mb_x+1<b_width ? &s->block[index +1] : NULL;
3212 BlockNode *bb = mb_y+1<b_height ? &s->block[index+b_stride ] : NULL;
3213 BlockNode *tlb= mb_x && mb_y ? &s->block[index-b_stride-1] : NULL;
3214 BlockNode *trb= mb_x+1<b_width && mb_y ? &s->block[index-b_stride+1] : NULL;
3215 BlockNode *blb= mb_x && mb_y+1<b_height ? &s->block[index+b_stride-1] : NULL;
3216 BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : NULL;
3217 const int b_w= (MB_SIZE >> s->block_max_depth);
3218 uint8_t obmc_edged[b_w*2][b_w*2];
3220 if(pass && (block->type & BLOCK_OPT))
3222 block->type |= BLOCK_OPT;
3226 if(!s->me_cache_generation)
3227 memset(s->me_cache, 0, sizeof(s->me_cache));
3228 s->me_cache_generation += 1<<22;
3233 memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
3235 for(y=0; y<b_w*2; y++)
3236 memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
3237 if(mb_x==b_stride-1)
3238 for(y=0; y<b_w*2; y++)
3239 memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
3241 for(x=0; x<b_w*2; x++)
3242 obmc_edged[0][x] += obmc_edged[b_w-1][x];
3243 for(y=1; y<b_w; y++)
3244 memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
3246 if(mb_y==b_height-1){
3247 for(x=0; x<b_w*2; x++)
3248 obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
3249 for(y=b_w; y<b_w*2-1; y++)
3250 memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
3254 //skip stuff outside the picture
3255 if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
3257 uint8_t *src= s-> input_picture.data[0];
3258 uint8_t *dst= s->current_picture.data[0];
3259 const int stride= s->current_picture.linesize[0];
3260 const int block_w= MB_SIZE >> s->block_max_depth;
3261 const int sx= block_w*mb_x - block_w/2;
3262 const int sy= block_w*mb_y - block_w/2;
3263 const int w= s->plane[0].width;
3264 const int h= s->plane[0].height;
3268 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3269 for(y=h; y<sy+block_w*2; y++)
3270 memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
3272 for(y=sy; y<sy+block_w*2; y++)
3273 memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
3275 if(sx+block_w*2 > w){
3276 for(y=sy; y<sy+block_w*2; y++)
3277 memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
3281 // intra(black) = neighbors' contribution to the current block
3283 color[i]= get_dc(s, mb_x, mb_y, i);
3285 // get previous score (cant be cached due to OBMC)
3286 if(pass > 0 && (block->type&BLOCK_INTRA)){
3287 int color0[3]= {block->color[0], block->color[1], block->color[2]};
3288 check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
3290 check_block_inter(s, mb_x, mb_y, block->mx, block->my, *obmc_edged, &best_rd);
3294 for(ref=0; ref < s->ref_frames; ref++){
3295 int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
3296 if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
3301 check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], *obmc_edged, &best_rd);
3302 check_block_inter(s, mb_x, mb_y, 0, 0, *obmc_edged, &best_rd);
3304 check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], *obmc_edged, &best_rd);
3306 check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], *obmc_edged, &best_rd);
3308 check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], *obmc_edged, &best_rd);
3310 check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], *obmc_edged, &best_rd);
3313 //FIXME avoid subpel interpol / round to nearest integer
3316 for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
3318 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3319 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3320 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), *obmc_edged, &best_rd);
3321 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), *obmc_edged, &best_rd);
3327 static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
3330 dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], *obmc_edged, &best_rd);
3332 //FIXME or try the standard 2 pass qpel or similar
3334 mvr[0][0]= block->mx;
3335 mvr[0][1]= block->my;
3336 if(ref_rd > best_rd){
3344 check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
3345 //FIXME RD style color selection
3347 if(!same_block(block, &backup)){
3348 if(tb ) tb ->type &= ~BLOCK_OPT;
3349 if(lb ) lb ->type &= ~BLOCK_OPT;
3350 if(rb ) rb ->type &= ~BLOCK_OPT;
3351 if(bb ) bb ->type &= ~BLOCK_OPT;
3352 if(tlb) tlb->type &= ~BLOCK_OPT;
3353 if(trb) trb->type &= ~BLOCK_OPT;
3354 if(blb) blb->type &= ~BLOCK_OPT;
3355 if(brb) brb->type &= ~BLOCK_OPT;
3360 av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
3365 if(s->block_max_depth == 1){
3367 for(mb_y= 0; mb_y<b_height; mb_y+=2){
3368 for(mb_x= 0; mb_x<b_width; mb_x+=2){
3370 int best_rd, init_rd;
3371 const int index= mb_x + mb_y * b_stride;
3374 b[0]= &s->block[index];
3376 b[2]= b[0]+b_stride;
3378 if(same_block(b[0], b[1]) &&
3379 same_block(b[0], b[2]) &&
3380 same_block(b[0], b[3]))
3383 if(!s->me_cache_generation)
3384 memset(s->me_cache, 0, sizeof(s->me_cache));
3385 s->me_cache_generation += 1<<22;
3387 init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
3389 //FIXME more multiref search?
3390 check_4block_inter(s, mb_x, mb_y,
3391 (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
3392 (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
3395 if(!(b[i]->type&BLOCK_INTRA))
3396 check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
3398 if(init_rd != best_rd)
3402 av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
3406 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
3407 const int level= b->level;
3408 const int w= b->width;
3409 const int h= b->height;
3410 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3411 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3412 int x,y, thres1, thres2;
3415 if(s->qlog == LOSSLESS_QLOG) return;
3417 bias= bias ? 0 : (3*qmul)>>3;
3418 thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
3424 int i= src[x + y*stride];
3426 if((unsigned)(i+thres1) > thres2){
3429 i/= qmul; //FIXME optimize
3430 src[x + y*stride]= i;
3434 i/= qmul; //FIXME optimize
3435 src[x + y*stride]= -i;
3438 src[x + y*stride]= 0;
3444 int i= src[x + y*stride];
3446 if((unsigned)(i+thres1) > thres2){
3449 i= (i + bias) / qmul; //FIXME optimize
3450 src[x + y*stride]= i;
3454 i= (i + bias) / qmul; //FIXME optimize
3455 src[x + y*stride]= -i;
3458 src[x + y*stride]= 0;
3462 if(level+1 == s->spatial_decomposition_count){
3463 // STOP_TIMER("quantize")
3467 static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int start_y, int end_y){
3468 const int w= b->width;
3469 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3470 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3471 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3475 if(s->qlog == LOSSLESS_QLOG) return;
3477 for(y=start_y; y<end_y; y++){
3478 // DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3479 DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3483 line[x]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3485 line[x]= (( i*qmul + qadd)>>(QEXPSHIFT));
3489 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3490 STOP_TIMER("dquant")
3494 static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
3495 const int w= b->width;
3496 const int h= b->height;
3497 const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
3498 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3499 const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
3503 if(s->qlog == LOSSLESS_QLOG) return;
3507 int i= src[x + y*stride];
3509 src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
3511 src[x + y*stride]= (( i*qmul + qadd)>>(QEXPSHIFT));
3515 if(w > 200 /*level+1 == s->spatial_decomposition_count*/){
3516 STOP_TIMER("dquant")
3520 static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3521 const int w= b->width;
3522 const int h= b->height;
3525 for(y=h-1; y>=0; y--){
3526 for(x=w-1; x>=0; x--){
3527 int i= x + y*stride;
3531 if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3532 else src[i] -= src[i - 1];
3534 if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3535 else src[i] -= src[i - 1];
3538 if(y) src[i] -= src[i - stride];
3544 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
3545 const int w= b->width;
3550 DWTELEM * line=0; // silence silly "could be used without having been initialized" warning
3554 line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3556 for(y=start_y; y<end_y; y++){
3558 // line = slice_buffer_get_line_from_address(sb, src + (y * stride));
3559 line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
3563 if(y && x+1<w) line[x] += mid_pred(line[x - 1], prev[x], prev[x + 1]);
3564 else line[x] += line[x - 1];
3566 if(y) line[x] += mid_pred(line[x - 1], prev[x], line[x - 1] + prev[x] - prev[x - 1]);
3567 else line[x] += line[x - 1];
3570 if(y) line[x] += prev[x];
3575 // STOP_TIMER("correlate")
3578 static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median){
3579 const int w= b->width;
3580 const int h= b->height;
3585 int i= x + y*stride;
3589 if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
3590 else src[i] += src[i - 1];
3592 if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
3593 else src[i] += src[i - 1];
3596 if(y) src[i] += src[i - stride];
3602 static void encode_header(SnowContext *s){
3603 int plane_index, level, orientation;
3606 memset(kstate, MID_STATE, sizeof(kstate));
3608 put_rac(&s->c, kstate, s->keyframe);
3609 if(s->keyframe || s->always_reset){
3611 s->last_spatial_decomposition_type=
3615 s->last_block_max_depth= 0;
3618 put_symbol(&s->c, s->header_state, s->version, 0);
3619 put_rac(&s->c, s->header_state, s->always_reset);
3620 put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
3621 put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
3622 put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
3623 put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
3624 put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
3625 put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
3626 put_rac(&s->c, s->header_state, s->spatial_scalability);
3627 // put_rac(&s->c, s->header_state, s->rate_scalability);
3628 put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
3630 for(plane_index=0; plane_index<2; plane_index++){
3631 for(level=0; level<s->spatial_decomposition_count; level++){
3632 for(orientation=level ? 1:0; orientation<4; orientation++){
3633 if(orientation==2) continue;
3634 put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
3639 put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
3640 put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1);
3641 put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1);
3642 put_symbol(&s->c, s->header_state, s->qbias - s->last_qbias , 1);
3643 put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
3645 s->last_spatial_decomposition_type= s->spatial_decomposition_type;
3646 s->last_qlog = s->qlog;
3647 s->last_qbias = s->qbias;
3648 s->last_mv_scale = s->mv_scale;
3649 s->last_block_max_depth = s->block_max_depth;
3652 static int decode_header(SnowContext *s){
3653 int plane_index, level, orientation;
3656 memset(kstate, MID_STATE, sizeof(kstate));
3658 s->keyframe= get_rac(&s->c, kstate);
3659 if(s->keyframe || s->always_reset){
3661 s->spatial_decomposition_type=
3665 s->block_max_depth= 0;
3668 s->version= get_symbol(&s->c, s->header_state, 0);
3670 av_log(s->avctx, AV_LOG_ERROR, "version %d not supported", s->version);
3673 s->always_reset= get_rac(&s->c, s->header_state);
3674 s->temporal_decomposition_type= get_symbol(&s->c, s->header_state, 0);
3675 s->temporal_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3676 s->spatial_decomposition_count= get_symbol(&s->c, s->header_state, 0);
3677 s->colorspace_type= get_symbol(&s->c, s->header_state, 0);
3678 s->chroma_h_shift= get_symbol(&s->c, s->header_state, 0);
3679 s->chroma_v_shift= get_symbol(&s->c, s->header_state, 0);
3680 s->spatial_scalability= get_rac(&s->c, s->header_state);
3681 // s->rate_scalability= get_rac(&s->c, s->header_state);
3682 s->max_ref_frames= get_symbol(&s->c, s->header_state, 0)+1;
3684 for(plane_index=0; plane_index<3; plane_index++){
3685 for(level=0; level<s->spatial_decomposition_count; level++){
3686 for(orientation=level ? 1:0; orientation<4; orientation++){
3688 if (plane_index==2) q= s->plane[1].band[level][orientation].qlog;
3689 else if(orientation==2) q= s->plane[plane_index].band[level][1].qlog;
3690 else q= get_symbol(&s->c, s->header_state, 1);
3691 s->plane[plane_index].band[level][orientation].qlog= q;
3697 s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
3698 if(s->spatial_decomposition_type > 2){
3699 av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
3703 s->qlog += get_symbol(&s->c, s->header_state, 1);
3704 s->mv_scale += get_symbol(&s->c, s->header_state, 1);
3705 s->qbias += get_symbol(&s->c, s->header_state, 1);
3706 s->block_max_depth+= get_symbol(&s->c, s->header_state, 1);
3707 if(s->block_max_depth > 1 || s->block_max_depth < 0){
3708 av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
3709 s->block_max_depth= 0;
3716 static void init_qexp(void){
3720 for(i=0; i<QROOT; i++){
3722 v *= pow(2, 1.0 / QROOT);
3726 static int common_init(AVCodecContext *avctx){
3727 SnowContext *s = avctx->priv_data;
3729 int level, orientation, plane_index, dec;
3734 dsputil_init(&s->dsp, avctx);
3737 s->dsp.put_qpel_pixels_tab [0][dy+dx/4]=\
3738 s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
3739 s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];\
3740 s->dsp.put_qpel_pixels_tab [1][dy+dx/4]=\
3741 s->dsp.put_no_rnd_qpel_pixels_tab[1][dy+dx/4]=\
3742 s->dsp.put_h264_qpel_pixels_tab[1][dy+dx/4];
3761 #define mcfh(dx,dy)\
3762 s->dsp.put_pixels_tab [0][dy/4+dx/8]=\
3763 s->dsp.put_no_rnd_pixels_tab[0][dy/4+dx/8]=\
3764 mc_block_hpel ## dx ## dy ## 16;\
3765 s->dsp.put_pixels_tab [1][dy/4+dx/8]=\
3766 s->dsp.put_no_rnd_pixels_tab[1][dy/4+dx/8]=\
3767 mc_block_hpel ## dx ## dy ## 8;
3777 dec= s->spatial_decomposition_count= 5;
3778 s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
3780 s->chroma_h_shift= 1; //FIXME XXX
3781 s->chroma_v_shift= 1;
3783 // dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
3785 width= s->avctx->width;
3786 height= s->avctx->height;
3788 s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
3790 s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
3791 s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
3793 for(plane_index=0; plane_index<3; plane_index++){
3794 int w= s->avctx->width;
3795 int h= s->avctx->height;
3798 w>>= s->chroma_h_shift;
3799 h>>= s->chroma_v_shift;
3801 s->plane[plane_index].width = w;
3802 s->plane[plane_index].height= h;
3803 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
3804 for(level=s->spatial_decomposition_count-1; level>=0; level--){
3805 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3806 SubBand *b= &s->plane[plane_index].band[level][orientation];
3808 b->buf= s->spatial_dwt_buffer;
3810 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
3811 b->width = (w + !(orientation&1))>>1;
3812 b->height= (h + !(orientation>1))>>1;
3814 b->stride_line = 1 << (s->spatial_decomposition_count - level);
3815 b->buf_x_offset = 0;
3816 b->buf_y_offset = 0;
3820 b->buf_x_offset = (w+1)>>1;
3823 b->buf += b->stride>>1;
3824 b->buf_y_offset = b->stride_line >> 1;
3828 b->parent= &s->plane[plane_index].band[level-1][orientation];
3829 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
3836 for(i=0; i<MAX_REF_FRAMES; i++)
3837 for(j=0; j<MAX_REF_FRAMES; j++)
3838 scale_mv_ref[i][j] = 256*(i+1)/(j+1);
3842 width= s->width= avctx->width;
3843 height= s->height= avctx->height;
3845 assert(width && height);
3847 s->avctx->get_buffer(s->avctx, &s->mconly_picture);
3852 static int qscale2qlog(int qscale){
3853 return rint(QROOT*log(qscale / (float)FF_QP2LAMBDA)/log(2))
3854 + 61*QROOT/8; //<64 >60
3857 static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
3859 /* estimate the frame's complexity as a sum of weighted dwt coefs.
3860 * FIXME we know exact mv bits at this point,
3861 * but ratecontrol isn't set up to include them. */
3862 uint32_t coef_sum= 0;
3863 int level, orientation, delta_qlog;
3865 for(level=0; level<s->spatial_decomposition_count; level++){
3866 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3867 SubBand *b= &s->plane[0].band[level][orientation];
3868 DWTELEM *buf= b->buf;
3869 const int w= b->width;
3870 const int h= b->height;
3871 const int stride= b->stride;
3872 const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
3873 const int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
3874 const int qdiv= (1<<16)/qmul;
3877 decorrelate(s, b, buf, stride, 1, 0);
3880 coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
3882 correlate(s, b, buf, stride, 1, 0);
3886 /* ugly, ratecontrol just takes a sqrt again */
3887 coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
3888 assert(coef_sum < INT_MAX);
3890 if(pict->pict_type == I_TYPE){
3891 s->m.current_picture.mb_var_sum= coef_sum;
3892 s->m.current_picture.mc_mb_var_sum= 0;
3894 s->m.current_picture.mc_mb_var_sum= coef_sum;
3895 s->m.current_picture.mb_var_sum= 0;
3898 pict->quality= ff_rate_estimate_qscale(&s->m, 1);
3899 if (pict->quality < 0)
3901 s->lambda= pict->quality * 3/2;
3902 delta_qlog= qscale2qlog(pict->quality) - s->qlog;
3903 s->qlog+= delta_qlog;
3907 static void calculate_vissual_weight(SnowContext *s, Plane *p){
3908 int width = p->width;
3909 int height= p->height;
3910 int level, orientation, x, y;
3912 for(level=0; level<s->spatial_decomposition_count; level++){
3913 for(orientation=level ? 1 : 0; orientation<4; orientation++){
3914 SubBand *b= &p->band[level][orientation];
3915 DWTELEM *buf= b->buf;
3918 memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
3919 buf[b->width/2 + b->height/2*b->stride]= 256*256;
3920 ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
3921 for(y=0; y<height; y++){
3922 for(x=0; x<width; x++){
3923 int64_t d= s->spatial_dwt_buffer[x + y*width];
3928 b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
3929 // av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", level, orientation, b->qlog/*, sqrt(error)*/);
3934 static int encode_init(AVCodecContext *avctx)
3936 SnowContext *s = avctx->priv_data;
3939 if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
3940 av_log(avctx, AV_LOG_ERROR, "this codec is under development, files encoded with it may not be decodable with future versions!!!\n"
3941 "use vstrict=-2 / -strict -2 to use it anyway\n");
3945 if(avctx->prediction_method == DWT_97
3946 && (avctx->flags & CODEC_FLAG_QSCALE)
3947 && avctx->global_quality == 0){
3948 av_log(avctx, AV_LOG_ERROR, "the 9/7 wavelet is incompatible with lossless mode\n");
3958 s->m.flags = avctx->flags;
3959 s->m.bit_rate= avctx->bit_rate;
3961 s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
3962 s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3963 s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
3964 s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
3965 h263_encode_init(&s->m); //mv_penalty
3967 s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
3969 if(avctx->flags&CODEC_FLAG_PASS1){
3970 if(!avctx->stats_out)
3971 avctx->stats_out = av_mallocz(256);
3973 if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
3974 if(ff_rate_control_init(&s->m) < 0)
3977 s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
3979 for(plane_index=0; plane_index<3; plane_index++){
3980 calculate_vissual_weight(s, &s->plane[plane_index]);
3984 avctx->coded_frame= &s->current_picture;
3985 switch(avctx->pix_fmt){
3986 // case PIX_FMT_YUV444P:
3987 // case PIX_FMT_YUV422P:
3988 case PIX_FMT_YUV420P:
3990 // case PIX_FMT_YUV411P:
3991 // case PIX_FMT_YUV410P:
3992 s->colorspace_type= 0;
3994 /* case PIX_FMT_RGB32:
3998 av_log(avctx, AV_LOG_ERROR, "format not supported\n");
4001 // avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
4002 s->chroma_h_shift= 1;
4003 s->chroma_v_shift= 1;
4005 ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
4006 ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
4008 s->avctx->get_buffer(s->avctx, &s->input_picture);
4010 if(s->avctx->me_method == ME_ITER){
4012 int size= s->b_width * s->b_height << 2*s->block_max_depth;
4013 for(i=0; i<s->max_ref_frames; i++){
4014 s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
4015 s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
4022 static int frame_start(SnowContext *s){
4024 int w= s->avctx->width; //FIXME round up to x16 ?
4025 int h= s->avctx->height;
4027 if(s->current_picture.data[0]){
4028 draw_edges(s->current_picture.data[0], s->current_picture.linesize[0], w , h , EDGE_WIDTH );
4029 draw_edges(s->current_picture.data[1], s->current_picture.linesize[1], w>>1, h>>1, EDGE_WIDTH/2);
4030 draw_edges(s->current_picture.data[2], s->current_picture.linesize[2], w>>1, h>>1, EDGE_WIDTH/2);
4033 tmp= s->last_picture[s->max_ref_frames-1];
4034 memmove(s->last_picture+1, s->last_picture, (s->max_ref_frames-1)*sizeof(AVFrame));
4035 s->last_picture[0]= s->current_picture;
4036 s->current_picture= tmp;
4042 for(i=0; i<s->max_ref_frames && s->last_picture[i].data[0]; i++)
4043 if(i && s->last_picture[i-1].key_frame)
4048 s->current_picture.reference= 1;
4049 if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
4050 av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
4054 s->current_picture.key_frame= s->keyframe;
4059 static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
4060 SnowContext *s = avctx->priv_data;
4061 RangeCoder * const c= &s->c;
4062 AVFrame *pict = data;
4063 const int width= s->avctx->width;
4064 const int height= s->avctx->height;
4065 int level, orientation, plane_index, i, y;
4066 uint8_t rc_header_bak[sizeof(s->header_state)];
4067 uint8_t rc_block_bak[sizeof(s->block_state)];
4069 ff_init_range_encoder(c, buf, buf_size);
4070 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4074 for(y=0; y<(height>>shift); y++)
4075 memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
4076 &pict->data[i][y * pict->linesize[i]],
4079 s->new_picture = *pict;
4081 s->m.picture_number= avctx->frame_number;
4082 if(avctx->flags&CODEC_FLAG_PASS2){
4084 pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
4085 s->keyframe= pict->pict_type==FF_I_TYPE;
4086 if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
4087 pict->quality= ff_rate_estimate_qscale(&s->m, 0);
4088 if (pict->quality < 0)
4092 s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
4094 pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
4097 if(s->pass1_rc && avctx->frame_number == 0)
4098 pict->quality= 2*FF_QP2LAMBDA;
4100 s->qlog= qscale2qlog(pict->quality);
4101 s->lambda = pict->quality * 3/2;
4103 if(s->qlog < 0 || (!pict->quality && (avctx->flags & CODEC_FLAG_QSCALE))){
4104 s->qlog= LOSSLESS_QLOG;
4106 }//else keep previous frame's qlog until after motion est
4110 s->m.current_picture_ptr= &s->m.current_picture;
4111 if(pict->pict_type == P_TYPE){
4112 int block_width = (width +15)>>4;
4113 int block_height= (height+15)>>4;
4114 int stride= s->current_picture.linesize[0];
4116 assert(s->current_picture.data[0]);
4117 assert(s->last_picture[0].data[0]);
4119 s->m.avctx= s->avctx;
4120 s->m.current_picture.data[0]= s->current_picture.data[0];
4121 s->m. last_picture.data[0]= s->last_picture[0].data[0];
4122 s->m. new_picture.data[0]= s-> input_picture.data[0];
4123 s->m. last_picture_ptr= &s->m. last_picture;
4125 s->m. last_picture.linesize[0]=
4126 s->m. new_picture.linesize[0]=
4127 s->m.current_picture.linesize[0]= stride;
4128 s->m.uvlinesize= s->current_picture.linesize[1];
4130 s->m.height= height;
4131 s->m.mb_width = block_width;
4132 s->m.mb_height= block_height;
4133 s->m.mb_stride= s->m.mb_width+1;
4134 s->m.b8_stride= 2*s->m.mb_width+1;
4136 s->m.pict_type= pict->pict_type;
4137 s->m.me_method= s->avctx->me_method;
4138 s->m.me.scene_change_score=0;
4139 s->m.flags= s->avctx->flags;
4140 s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
4141 s->m.out_format= FMT_H263;
4142 s->m.unrestricted_mv= 1;
4144 s->m.lambda = s->lambda;
4145 s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
4146 s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
4148 s->m.dsp= s->dsp; //move
4154 memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
4155 memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
4160 s->m.pict_type = pict->pict_type;
4161 s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
4164 s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4165 encode_blocks(s, 1);
4166 s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
4168 for(plane_index=0; plane_index<3; plane_index++){
4169 Plane *p= &s->plane[plane_index];
4173 // int bits= put_bits_count(&s->c.pb);
4175 if(!(avctx->flags2 & CODEC_FLAG2_MEMC_ONLY)){
4177 if(pict->data[plane_index]) //FIXME gray hack
4180 s->spatial_dwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<<FRAC_BITS;
4183 predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
4186 && pict->pict_type == P_TYPE
4187 && !(avctx->flags&CODEC_FLAG_PASS2)
4188 && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
4189 ff_init_range_encoder(c, buf, buf_size);
4190 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4191 pict->pict_type= FF_I_TYPE;
4193 s->current_picture.key_frame=1;
4197 if(s->qlog == LOSSLESS_QLOG){
4200 s->spatial_dwt_buffer[y*w + x]= (s->spatial_dwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
4205 ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4207 if(s->pass1_rc && plane_index==0){
4208 int delta_qlog = ratecontrol_1pass(s, pict);
4209 if (delta_qlog <= INT_MIN)
4212 //reordering qlog in the bitstream would eliminate this reset
4213 ff_init_range_encoder(c, buf, buf_size);
4214 memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
4215 memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
4217 encode_blocks(s, 0);
4221 for(level=0; level<s->spatial_decomposition_count; level++){
4222 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4223 SubBand *b= &p->band[level][orientation];
4225 quantize(s, b, b->buf, b->stride, s->qbias);
4227 decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
4228 encode_subband(s, b, b->buf, b->parent ? b->parent->buf : NULL, b->stride, orientation);
4229 assert(b->parent==NULL || b->parent->stride == b->stride*2);
4231 correlate(s, b, b->buf, b->stride, 1, 0);
4234 // av_log(NULL, AV_LOG_DEBUG, "plane:%d bits:%d\n", plane_index, put_bits_count(&s->c.pb) - bits);
4236 for(level=0; level<s->spatial_decomposition_count; level++){
4237 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4238 SubBand *b= &p->band[level][orientation];
4240 dequantize(s, b, b->buf, b->stride);
4244 ff_spatial_idwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
4245 if(s->qlog == LOSSLESS_QLOG){
4248 s->spatial_dwt_buffer[y*w + x]<<=FRAC_BITS;
4253 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4254 STOP_TIMER("pred-conv")}
4257 if(pict->pict_type == I_TYPE){
4260 s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]=
4261 pict->data[plane_index][y*pict->linesize[plane_index] + x];
4265 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4266 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4269 if(s->avctx->flags&CODEC_FLAG_PSNR){
4272 if(pict->data[plane_index]) //FIXME gray hack
4275 int d= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
4279 s->avctx->error[plane_index] += error;
4280 s->current_picture.error[plane_index] = error;
4284 if(s->last_picture[s->max_ref_frames-1].data[0])
4285 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4287 s->current_picture.coded_picture_number = avctx->frame_number;
4288 s->current_picture.pict_type = pict->pict_type;
4289 s->current_picture.quality = pict->quality;
4290 s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
4291 s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
4292 s->m.current_picture.display_picture_number =
4293 s->m.current_picture.coded_picture_number = avctx->frame_number;
4294 s->m.current_picture.quality = pict->quality;
4295 s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
4297 if (ff_rate_estimate_qscale(&s->m, 0) < 0)
4299 if(avctx->flags&CODEC_FLAG_PASS1)
4300 ff_write_pass1_stats(&s->m);
4301 s->m.last_pict_type = s->m.pict_type;
4302 avctx->frame_bits = s->m.frame_bits;
4303 avctx->mv_bits = s->m.mv_bits;
4304 avctx->misc_bits = s->m.misc_bits;
4305 avctx->p_tex_bits = s->m.p_tex_bits;
4309 return ff_rac_terminate(c);
4312 static void common_end(SnowContext *s){
4313 int plane_index, level, orientation, i;
4315 av_freep(&s->spatial_dwt_buffer);
4317 av_freep(&s->m.me.scratchpad);
4318 av_freep(&s->m.me.map);
4319 av_freep(&s->m.me.score_map);
4320 av_freep(&s->m.obmc_scratchpad);
4322 av_freep(&s->block);
4324 for(i=0; i<MAX_REF_FRAMES; i++){
4325 av_freep(&s->ref_mvs[i]);
4326 av_freep(&s->ref_scores[i]);
4327 if(s->last_picture[i].data[0])
4328 s->avctx->release_buffer(s->avctx, &s->last_picture[i]);
4331 for(plane_index=0; plane_index<3; plane_index++){
4332 for(level=s->spatial_decomposition_count-1; level>=0; level--){
4333 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4334 SubBand *b= &s->plane[plane_index].band[level][orientation];
4336 av_freep(&b->x_coeff);
4342 static int encode_end(AVCodecContext *avctx)
4344 SnowContext *s = avctx->priv_data;
4347 av_free(avctx->stats_out);
4352 static int decode_init(AVCodecContext *avctx)
4354 SnowContext *s = avctx->priv_data;
4357 avctx->pix_fmt= PIX_FMT_YUV420P;
4361 block_size = MB_SIZE >> s->block_max_depth;
4362 slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
4367 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size){
4368 SnowContext *s = avctx->priv_data;
4369 RangeCoder * const c= &s->c;
4371 AVFrame *picture = data;
4372 int level, orientation, plane_index;
4374 ff_init_range_decoder(c, buf, buf_size);
4375 ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
4377 s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
4379 if(!s->block) alloc_blocks(s);
4382 //keyframe flag dupliaction mess FIXME
4383 if(avctx->debug&FF_DEBUG_PICT_INFO)
4384 av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
4388 for(plane_index=0; plane_index<3; plane_index++){
4389 Plane *p= &s->plane[plane_index];
4393 int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
4395 if(s->avctx->debug&2048){
4396 memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
4397 predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
4401 int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x];
4402 s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v;
4408 for(level=0; level<s->spatial_decomposition_count; level++){
4409 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4410 SubBand *b= &p->band[level][orientation];
4411 unpack_coeffs(s, b, b->parent, orientation);
4414 STOP_TIMER("unpack coeffs");
4418 const int mb_h= s->b_height << s->block_max_depth;
4419 const int block_size = MB_SIZE >> s->block_max_depth;
4420 const int block_w = plane_index ? block_size/2 : block_size;
4422 dwt_compose_t cs[MAX_DECOMPOSITIONS];
4427 ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
4428 for(mb_y=0; mb_y<=mb_h; mb_y++){
4430 int slice_starty = block_w*mb_y;
4431 int slice_h = block_w*(mb_y+1);
4432 if (!(s->keyframe || s->avctx->debug&512)){
4433 slice_starty = FFMAX(0, slice_starty - (block_w >> 1));
4434 slice_h -= (block_w >> 1);
4439 for(level=0; level<s->spatial_decomposition_count; level++){
4440 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4441 SubBand *b= &p->band[level][orientation];
4444 int our_mb_start = mb_y;
4445 int our_mb_end = (mb_y + 1);
4447 start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
4448 end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
4449 if (!(s->keyframe || s->avctx->debug&512)){
4450 start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4451 end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
4453 start_y = FFMIN(b->height, start_y);
4454 end_y = FFMIN(b->height, end_y);
4456 if (start_y != end_y){
4457 if (orientation == 0){
4458 SubBand * correlate_band = &p->band[0][0];
4459 int correlate_end_y = FFMIN(b->height, end_y + 1);
4460 int correlate_start_y = FFMIN(b->height, (start_y ? start_y + 1 : 0));
4461 decode_subband_slice_buffered(s, correlate_band, &s->sb, correlate_start_y, correlate_end_y, decode_state[0][0]);
4462 correlate_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, 1, 0, correlate_start_y, correlate_end_y);
4463 dequantize_slice_buffered(s, &s->sb, correlate_band, correlate_band->buf, correlate_band->stride, start_y, end_y);
4466 decode_subband_slice_buffered(s, b, &s->sb, start_y, end_y, decode_state[level][orientation]);
4470 STOP_TIMER("decode_subband_slice");
4474 for(; yd<slice_h; yd+=4){
4475 ff_spatial_idwt_buffered_slice(&s->dsp, cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
4477 STOP_TIMER("idwt slice");}
4480 if(s->qlog == LOSSLESS_QLOG){
4481 for(; yq<slice_h && yq<h; yq++){
4482 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
4484 line[x] <<= FRAC_BITS;
4489 predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
4491 y = FFMIN(p->height, slice_starty);
4492 end_y = FFMIN(p->height, slice_h);
4494 slice_buffer_release(&s->sb, y++);
4497 slice_buffer_flush(&s->sb);
4499 STOP_TIMER("idwt + predict_slices")}
4504 if(s->last_picture[s->max_ref_frames-1].data[0])
4505 avctx->release_buffer(avctx, &s->last_picture[s->max_ref_frames-1]);
4507 if(!(s->avctx->debug&2048))
4508 *picture= s->current_picture;
4510 *picture= s->mconly_picture;
4512 *data_size = sizeof(AVFrame);
4514 bytes_read= c->bytestream - c->bytestream_start;
4515 if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
4520 static int decode_end(AVCodecContext *avctx)
4522 SnowContext *s = avctx->priv_data;
4524 slice_buffer_destroy(&s->sb);
4531 AVCodec snow_decoder = {
4535 sizeof(SnowContext),
4540 0 /*CODEC_CAP_DR1*/ /*| CODEC_CAP_DRAW_HORIZ_BAND*/,
4544 #ifdef CONFIG_ENCODERS
4545 AVCodec snow_encoder = {
4549 sizeof(SnowContext),
4565 int buffer[2][width*height];
4568 s.spatial_decomposition_count=6;
4569 s.spatial_decomposition_type=1;
4571 printf("testing 5/3 DWT\n");
4572 for(i=0; i<width*height; i++)
4573 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4575 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4576 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4578 for(i=0; i<width*height; i++)
4579 if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4581 printf("testing 9/7 DWT\n");
4582 s.spatial_decomposition_type=0;
4583 for(i=0; i<width*height; i++)
4584 buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
4586 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4587 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4589 for(i=0; i<width*height; i++)
4590 if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
4593 printf("testing AC coder\n");
4594 memset(s.header_state, 0, sizeof(s.header_state));
4595 ff_init_range_encoder(&s.c, buffer[0], 256*256);
4596 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4598 for(i=-256; i<256; i++){
4600 put_symbol(&s.c, s.header_state, i*i*i/3*FFABS(i), 1);
4601 STOP_TIMER("put_symbol")
4603 ff_rac_terminate(&s.c);
4605 memset(s.header_state, 0, sizeof(s.header_state));
4606 ff_init_range_decoder(&s.c, buffer[0], 256*256);
4607 ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
4609 for(i=-256; i<256; i++){
4612 j= get_symbol(&s.c, s.header_state, 1);
4613 STOP_TIMER("get_symbol")
4614 if(j!=i*i*i/3*FFABS(i)) printf("fsck: %d != %d\n", i, j);
4618 int level, orientation, x, y;
4619 int64_t errors[8][4];
4622 memset(errors, 0, sizeof(errors));
4623 s.spatial_decomposition_count=3;
4624 s.spatial_decomposition_type=0;
4625 for(level=0; level<s.spatial_decomposition_count; level++){
4626 for(orientation=level ? 1 : 0; orientation<4; orientation++){
4627 int w= width >> (s.spatial_decomposition_count-level);
4628 int h= height >> (s.spatial_decomposition_count-level);
4629 int stride= width << (s.spatial_decomposition_count-level);
4630 DWTELEM *buf= buffer[0];
4633 if(orientation&1) buf+=w;
4634 if(orientation>1) buf+=stride>>1;
4636 memset(buffer[0], 0, sizeof(int)*width*height);
4637 buf[w/2 + h/2*stride]= 256*256;
4638 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4639 for(y=0; y<height; y++){
4640 for(x=0; x<width; x++){
4641 int64_t d= buffer[0][x + y*width];
4643 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
4645 if(FFABS(height/2-y)<9 && level==2) printf("\n");
4647 error= (int)(sqrt(error)+0.5);
4648 errors[level][orientation]= error;
4649 if(g) g=ff_gcd(g, error);
4653 printf("static int const visual_weight[][4]={\n");
4654 for(level=0; level<s.spatial_decomposition_count; level++){
4656 for(orientation=0; orientation<4; orientation++){
4657 printf("%8"PRId64",", errors[level][orientation]/g);
4665 int w= width >> (s.spatial_decomposition_count-level);
4666 int h= height >> (s.spatial_decomposition_count-level);
4667 int stride= width << (s.spatial_decomposition_count-level);
4668 DWTELEM *buf= buffer[0];
4674 memset(buffer[0], 0, sizeof(int)*width*height);
4676 for(y=0; y<height; y++){
4677 for(x=0; x<width; x++){
4678 int tab[4]={0,2,3,1};
4679 buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
4682 ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4686 buf[x + y*stride ]=169;
4687 buf[x + y*stride-w]=64;
4690 ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
4692 for(y=0; y<height; y++){
4693 for(x=0; x<width; x++){
4694 int64_t d= buffer[0][x + y*width];
4696 if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
4698 if(FFABS(height/2-y)<9) printf("\n");