2 * Copyright (C) 2004-2010 Michael Niedermayer <michaelni@gmx.at>
4 * This file is part of Libav.
6 * Libav is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * Libav is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with Libav; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 #include "libavutil/attributes.h"
25 void ff_slice_buffer_init(slice_buffer *buf, int line_count,
26 int max_allocated_lines, int line_width,
27 IDWTELEM *base_buffer)
31 buf->base_buffer = base_buffer;
32 buf->line_count = line_count;
33 buf->line_width = line_width;
34 buf->data_count = max_allocated_lines;
35 buf->line = av_mallocz(sizeof(IDWTELEM *) * line_count);
36 buf->data_stack = av_malloc(sizeof(IDWTELEM *) * max_allocated_lines);
38 for (i = 0; i < max_allocated_lines; i++)
39 buf->data_stack[i] = av_malloc(sizeof(IDWTELEM) * line_width);
41 buf->data_stack_top = max_allocated_lines - 1;
44 IDWTELEM *ff_slice_buffer_load_line(slice_buffer *buf, int line)
48 assert(buf->data_stack_top >= 0);
49 // assert(!buf->line[line]);
51 return buf->line[line];
53 buffer = buf->data_stack[buf->data_stack_top];
54 buf->data_stack_top--;
55 buf->line[line] = buffer;
60 void ff_slice_buffer_release(slice_buffer *buf, int line)
64 assert(line >= 0 && line < buf->line_count);
65 assert(buf->line[line]);
67 buffer = buf->line[line];
68 buf->data_stack_top++;
69 buf->data_stack[buf->data_stack_top] = buffer;
70 buf->line[line] = NULL;
73 void ff_slice_buffer_flush(slice_buffer *buf)
76 for (i = 0; i < buf->line_count; i++)
78 ff_slice_buffer_release(buf, i);
81 void ff_slice_buffer_destroy(slice_buffer *buf)
84 ff_slice_buffer_flush(buf);
86 for (i = buf->data_count - 1; i >= 0; i--)
87 av_freep(&buf->data_stack[i]);
88 av_freep(&buf->data_stack);
92 static inline int mirror(int v, int m)
94 while ((unsigned)v > (unsigned)m) {
102 static av_always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
103 int dst_step, int src_step, int ref_step,
104 int width, int mul, int add, int shift,
105 int highpass, int inverse)
107 const int mirror_left = !highpass;
108 const int mirror_right = (width & 1) ^ highpass;
109 const int w = (width >> 1) - 1 + (highpass & width);
112 #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
114 dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
119 for (i = 0; i < w; i++)
120 dst[i * dst_step] = LIFT(src[i * src_step],
121 ((mul * (ref[i * ref_step] +
122 ref[(i + 1) * ref_step]) +
127 dst[w * dst_step] = LIFT(src[w * src_step],
128 ((mul * 2 * ref[w * ref_step] + add) >> shift),
132 static av_always_inline void inv_lift(IDWTELEM *dst, IDWTELEM *src, IDWTELEM *ref,
133 int dst_step, int src_step, int ref_step,
134 int width, int mul, int add, int shift,
135 int highpass, int inverse)
137 const int mirror_left = !highpass;
138 const int mirror_right = (width & 1) ^ highpass;
139 const int w = (width >> 1) - 1 + (highpass & width);
142 #define LIFT(src, ref, inv) ((src) + ((inv) ? -(ref) : +(ref)))
144 dst[0] = LIFT(src[0], ((mul * 2 * ref[0] + add) >> shift), inverse);
149 for (i = 0; i < w; i++)
150 dst[i * dst_step] = LIFT(src[i * src_step],
151 ((mul * (ref[i * ref_step] +
152 ref[(i + 1) * ref_step]) +
157 dst[w * dst_step] = LIFT(src[w * src_step],
158 ((mul * 2 * ref[w * ref_step] + add) >> shift),
164 static av_always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref,
165 int dst_step, int src_step, int ref_step,
166 int width, int mul, int add, int shift,
167 int highpass, int inverse)
169 const int mirror_left = !highpass;
170 const int mirror_right = (width & 1) ^ highpass;
171 const int w = (width >> 1) - 1 + (highpass & width);
175 #define LIFTS(src, ref, inv) \
176 ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \
177 : -((-16 * (src) + (ref) + add / \
178 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
180 dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
185 for (i = 0; i < w; i++)
186 dst[i * dst_step] = LIFTS(src[i * src_step],
187 mul * (ref[i * ref_step] +
188 ref[(i + 1) * ref_step]) + add,
192 dst[w * dst_step] = LIFTS(src[w * src_step],
193 mul * 2 * ref[w * ref_step] + add,
197 static av_always_inline void inv_liftS(IDWTELEM *dst, IDWTELEM *src,
198 IDWTELEM *ref, int dst_step,
199 int src_step, int ref_step,
200 int width, int mul, int add, int shift,
201 int highpass, int inverse)
203 const int mirror_left = !highpass;
204 const int mirror_right = (width & 1) ^ highpass;
205 const int w = (width >> 1) - 1 + (highpass & width);
209 #define LIFTS(src, ref, inv) \
210 ((inv) ? (src) + (((ref) + 4 * (src)) >> shift) \
211 : -((-16 * (src) + (ref) + add / \
212 4 + 1 + (5 << 25)) / (5 * 4) - (1 << 23)))
214 dst[0] = LIFTS(src[0], mul * 2 * ref[0] + add, inverse);
219 for (i = 0; i < w; i++)
220 dst[i * dst_step] = LIFTS(src[i * src_step],
221 mul * (ref[i * ref_step] +
222 ref[(i + 1) * ref_step]) + add,
226 dst[w * dst_step] = LIFTS(src[w * src_step],
227 mul * 2 * ref[w * ref_step] + add, inverse);
231 static void horizontal_decompose53i(DWTELEM *b, int width)
234 const int width2 = width >> 1;
236 const int w2 = (width + 1) >> 1;
238 for (x = 0; x < width2; x++) {
240 temp[x + w2] = b[2 * x + 1];
249 A1 = temp[0 + width2];
250 A1 -= (A2 + A4) >> 1;
254 for (x = 1; x + 1 < width2; x += 2) {
255 A3 = temp[x + width2];
257 A3 -= (A2 + A4) >> 1;
258 A2 += (A1 + A3 + 2) >> 2;
262 A1 = temp[x + 1 + width2];
264 A1 -= (A2 + A4) >> 1;
265 A4 += (A1 + A3 + 2) >> 2;
266 b[x + 1 + width2] = A1;
269 A3 = temp[width - 1];
271 A2 += (A1 + A3 + 2) >> 2;
276 lift(b + w2, temp + w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
277 lift(b, temp, b + w2, 1, 1, 1, width, 1, 2, 2, 0, 0);
281 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
286 for (i = 0; i < width; i++)
287 b1[i] -= (b0[i] + b2[i]) >> 1;
290 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
295 for (i = 0; i < width; i++)
296 b1[i] += (b0[i] + b2[i] + 2) >> 2;
299 static void spatial_decompose53i(DWTELEM *buffer, int width, int height,
303 DWTELEM *b0 = buffer + mirror(-2 - 1, height - 1) * stride;
304 DWTELEM *b1 = buffer + mirror(-2, height - 1) * stride;
306 for (y = -2; y < height; y += 2) {
307 DWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride;
308 DWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
310 if (y + 1 < (unsigned)height)
311 horizontal_decompose53i(b2, width);
312 if (y + 2 < (unsigned)height)
313 horizontal_decompose53i(b3, width);
315 if (y + 1 < (unsigned)height)
316 vertical_decompose53iH0(b1, b2, b3, width);
317 if (y + 0 < (unsigned)height)
318 vertical_decompose53iL0(b0, b1, b2, width);
325 static void horizontal_decompose97i(DWTELEM *b, int width)
328 const int w2 = (width + 1) >> 1;
330 lift(temp + w2, b + 1, b, 1, 2, 2, width, W_AM, W_AO, W_AS, 1, 1);
331 liftS(temp, b, temp + w2, 1, 2, 1, width, W_BM, W_BO, W_BS, 0, 0);
332 lift(b + w2, temp + w2, temp, 1, 1, 1, width, W_CM, W_CO, W_CS, 1, 0);
333 lift(b, temp, b + w2, 1, 1, 1, width, W_DM, W_DO, W_DS, 0, 0);
336 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
341 for (i = 0; i < width; i++)
342 b1[i] -= (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
345 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
350 for (i = 0; i < width; i++)
351 b1[i] += (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
354 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
359 for (i = 0; i < width; i++)
361 b1[i] -= (W_BM * (b0[i] + b2[i]) + W_BO) >> W_BS;
363 b1[i] = (16 * 4 * b1[i] - 4 * (b0[i] + b2[i]) + W_BO * 5 + (5 << 27)) /
364 (5 * 16) - (1 << 23);
368 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2,
373 for (i = 0; i < width; i++)
374 b1[i] += (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
377 static void spatial_decompose97i(DWTELEM *buffer, int width, int height,
381 DWTELEM *b0 = buffer + mirror(-4 - 1, height - 1) * stride;
382 DWTELEM *b1 = buffer + mirror(-4, height - 1) * stride;
383 DWTELEM *b2 = buffer + mirror(-4 + 1, height - 1) * stride;
384 DWTELEM *b3 = buffer + mirror(-4 + 2, height - 1) * stride;
386 for (y = -4; y < height; y += 2) {
387 DWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride;
388 DWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
390 if (y + 3 < (unsigned)height)
391 horizontal_decompose97i(b4, width);
392 if (y + 4 < (unsigned)height)
393 horizontal_decompose97i(b5, width);
395 if (y + 3 < (unsigned)height)
396 vertical_decompose97iH0(b3, b4, b5, width);
397 if (y + 2 < (unsigned)height)
398 vertical_decompose97iL0(b2, b3, b4, width);
399 if (y + 1 < (unsigned)height)
400 vertical_decompose97iH1(b1, b2, b3, width);
401 if (y + 0 < (unsigned)height)
402 vertical_decompose97iL1(b0, b1, b2, width);
411 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride,
412 int type, int decomposition_count)
416 for (level = 0; level < decomposition_count; level++) {
419 spatial_decompose97i(buffer,
420 width >> level, height >> level,
424 spatial_decompose53i(buffer,
425 width >> level, height >> level,
432 static void horizontal_compose53i(IDWTELEM *b, int width)
434 IDWTELEM temp[width];
435 const int width2 = width >> 1;
436 const int w2 = (width + 1) >> 1;
439 for (x = 0; x < width2; x++) {
441 temp[2 * x + 1] = b[x + w2];
446 b[0] = temp[0] - ((temp[1] + 1) >> 1);
447 for (x = 2; x < width - 1; x += 2) {
448 b[x] = temp[x] - ((temp[x - 1] + temp[x + 1] + 2) >> 2);
449 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
452 b[x] = temp[x] - ((temp[x - 1] + 1) >> 1);
453 b[x - 1] = temp[x - 1] + ((b[x - 2] + b[x] + 1) >> 1);
455 b[x - 1] = temp[x - 1] + b[x - 2];
458 static void vertical_compose53iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
463 for (i = 0; i < width; i++)
464 b1[i] += (b0[i] + b2[i]) >> 1;
467 static void vertical_compose53iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
472 for (i = 0; i < width; i++)
473 b1[i] -= (b0[i] + b2[i] + 2) >> 2;
476 static void spatial_compose53i_buffered_init(DWTCompose *cs, slice_buffer *sb,
477 int height, int stride_line)
479 cs->b0 = slice_buffer_get_line(sb,
480 mirror(-1 - 1, height - 1) * stride_line);
481 cs->b1 = slice_buffer_get_line(sb, mirror(-1, height - 1) * stride_line);
485 static void spatial_compose53i_init(DWTCompose *cs, IDWTELEM *buffer,
486 int height, int stride)
488 cs->b0 = buffer + mirror(-1 - 1, height - 1) * stride;
489 cs->b1 = buffer + mirror(-1, height - 1) * stride;
493 static void spatial_compose53i_dy_buffered(DWTCompose *cs, slice_buffer *sb,
494 int width, int height,
499 IDWTELEM *b0 = cs->b0;
500 IDWTELEM *b1 = cs->b1;
501 IDWTELEM *b2 = slice_buffer_get_line(sb,
502 mirror(y + 1, height - 1) *
504 IDWTELEM *b3 = slice_buffer_get_line(sb,
505 mirror(y + 2, height - 1) *
508 if (y + 1 < (unsigned)height && y < (unsigned)height) {
511 for (x = 0; x < width; x++) {
512 b2[x] -= (b1[x] + b3[x] + 2) >> 2;
513 b1[x] += (b0[x] + b2[x]) >> 1;
516 if (y + 1 < (unsigned)height)
517 vertical_compose53iL0(b1, b2, b3, width);
518 if (y + 0 < (unsigned)height)
519 vertical_compose53iH0(b0, b1, b2, width);
522 if (y - 1 < (unsigned)height)
523 horizontal_compose53i(b0, width);
524 if (y + 0 < (unsigned)height)
525 horizontal_compose53i(b1, width);
532 static void spatial_compose53i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
533 int height, int stride)
536 IDWTELEM *b0 = cs->b0;
537 IDWTELEM *b1 = cs->b1;
538 IDWTELEM *b2 = buffer + mirror(y + 1, height - 1) * stride;
539 IDWTELEM *b3 = buffer + mirror(y + 2, height - 1) * stride;
541 if (y + 1 < (unsigned)height)
542 vertical_compose53iL0(b1, b2, b3, width);
543 if (y + 0 < (unsigned)height)
544 vertical_compose53iH0(b0, b1, b2, width);
546 if (y - 1 < (unsigned)height)
547 horizontal_compose53i(b0, width);
548 if (y + 0 < (unsigned)height)
549 horizontal_compose53i(b1, width);
556 static void av_unused spatial_compose53i(IDWTELEM *buffer, int width,
557 int height, int stride)
560 spatial_compose53i_init(&cs, buffer, height, stride);
561 while (cs.y <= height)
562 spatial_compose53i_dy(&cs, buffer, width, height, stride);
565 void ff_snow_horizontal_compose97i(IDWTELEM *b, int width)
567 IDWTELEM temp[width];
568 const int w2 = (width + 1) >> 1;
570 #if 0 //maybe more understadable but slower
571 inv_lift(temp, b, b + w2, 2, 1, 1, width, W_DM, W_DO, W_DS, 0, 1);
572 inv_lift(temp + 1, b + w2, temp, 2, 1, 2, width, W_CM, W_CO, W_CS, 1, 1);
574 inv_liftS(b, temp, temp + 1, 2, 2, 2, width, W_BM, W_BO, W_BS, 0, 1);
575 inv_lift(b + 1, temp + 1, b, 2, 2, 2, width, W_AM, W_AO, W_AS, 1, 0);
578 temp[0] = b[0] - ((3 * b[w2] + 2) >> 2);
579 for (x = 1; x < (width >> 1); x++) {
580 temp[2 * x] = b[x] - ((3 * (b[x + w2 - 1] + b[x + w2]) + 4) >> 3);
581 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
584 temp[2 * x] = b[x] - ((3 * b[x + w2 - 1] + 2) >> 2);
585 temp[2 * x - 1] = b[x + w2 - 1] - temp[2 * x - 2] - temp[2 * x];
587 temp[2 * x - 1] = b[x + w2 - 1] - 2 * temp[2 * x - 2];
589 b[0] = temp[0] + ((2 * temp[0] + temp[1] + 4) >> 3);
590 for (x = 2; x < width - 1; x += 2) {
591 b[x] = temp[x] + ((4 * temp[x] + temp[x - 1] + temp[x + 1] + 8) >> 4);
592 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
595 b[x] = temp[x] + ((2 * temp[x] + temp[x - 1] + 4) >> 3);
596 b[x - 1] = temp[x - 1] + ((3 * (b[x - 2] + b[x])) >> 1);
598 b[x - 1] = temp[x - 1] + 3 * b[x - 2];
602 static void vertical_compose97iH0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
607 for (i = 0; i < width; i++)
608 b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
611 static void vertical_compose97iH1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
616 for (i = 0; i < width; i++)
617 b1[i] -= (W_CM * (b0[i] + b2[i]) + W_CO) >> W_CS;
620 static void vertical_compose97iL0(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
625 for (i = 0; i < width; i++)
627 b1[i] += (W_BM * (b0[i] + b2[i]) + W_BO) >> W_BS;
629 b1[i] += (W_BM * (b0[i] + b2[i]) + 4 * b1[i] + W_BO) >> W_BS;
633 static void vertical_compose97iL1(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
638 for (i = 0; i < width; i++)
639 b1[i] -= (W_DM * (b0[i] + b2[i]) + W_DO) >> W_DS;
642 void ff_snow_vertical_compose97i(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM *b2,
643 IDWTELEM *b3, IDWTELEM *b4, IDWTELEM *b5,
648 for (i = 0; i < width; i++) {
649 b4[i] -= (W_DM * (b3[i] + b5[i]) + W_DO) >> W_DS;
650 b3[i] -= (W_CM * (b2[i] + b4[i]) + W_CO) >> W_CS;
652 b2[i] += (W_BM * (b1[i] + b3[i]) + W_BO) >> W_BS;
654 b2[i] += (W_BM * (b1[i] + b3[i]) + 4 * b2[i] + W_BO) >> W_BS;
656 b1[i] += (W_AM * (b0[i] + b2[i]) + W_AO) >> W_AS;
660 static void spatial_compose97i_buffered_init(DWTCompose *cs, slice_buffer *sb,
661 int height, int stride_line)
663 cs->b0 = slice_buffer_get_line(sb, mirror(-3 - 1, height - 1) * stride_line);
664 cs->b1 = slice_buffer_get_line(sb, mirror(-3, height - 1) * stride_line);
665 cs->b2 = slice_buffer_get_line(sb, mirror(-3 + 1, height - 1) * stride_line);
666 cs->b3 = slice_buffer_get_line(sb, mirror(-3 + 2, height - 1) * stride_line);
670 static void spatial_compose97i_init(DWTCompose *cs, IDWTELEM *buffer, int height,
673 cs->b0 = buffer + mirror(-3 - 1, height - 1) * stride;
674 cs->b1 = buffer + mirror(-3, height - 1) * stride;
675 cs->b2 = buffer + mirror(-3 + 1, height - 1) * stride;
676 cs->b3 = buffer + mirror(-3 + 2, height - 1) * stride;
680 static void spatial_compose97i_dy_buffered(DWTContext *dsp, DWTCompose *cs,
681 slice_buffer *sb, int width,
682 int height, int stride_line)
686 IDWTELEM *b0 = cs->b0;
687 IDWTELEM *b1 = cs->b1;
688 IDWTELEM *b2 = cs->b2;
689 IDWTELEM *b3 = cs->b3;
690 IDWTELEM *b4 = slice_buffer_get_line(sb,
691 mirror(y + 3, height - 1) *
693 IDWTELEM *b5 = slice_buffer_get_line(sb,
694 mirror(y + 4, height - 1) *
697 if (y > 0 && y + 4 < height) {
698 dsp->vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
700 if (y + 3 < (unsigned)height)
701 vertical_compose97iL1(b3, b4, b5, width);
702 if (y + 2 < (unsigned)height)
703 vertical_compose97iH1(b2, b3, b4, width);
704 if (y + 1 < (unsigned)height)
705 vertical_compose97iL0(b1, b2, b3, width);
706 if (y + 0 < (unsigned)height)
707 vertical_compose97iH0(b0, b1, b2, width);
710 if (y - 1 < (unsigned)height)
711 dsp->horizontal_compose97i(b0, width);
712 if (y + 0 < (unsigned)height)
713 dsp->horizontal_compose97i(b1, width);
722 static void spatial_compose97i_dy(DWTCompose *cs, IDWTELEM *buffer, int width,
723 int height, int stride)
726 IDWTELEM *b0 = cs->b0;
727 IDWTELEM *b1 = cs->b1;
728 IDWTELEM *b2 = cs->b2;
729 IDWTELEM *b3 = cs->b3;
730 IDWTELEM *b4 = buffer + mirror(y + 3, height - 1) * stride;
731 IDWTELEM *b5 = buffer + mirror(y + 4, height - 1) * stride;
733 if (y + 3 < (unsigned)height)
734 vertical_compose97iL1(b3, b4, b5, width);
735 if (y + 2 < (unsigned)height)
736 vertical_compose97iH1(b2, b3, b4, width);
737 if (y + 1 < (unsigned)height)
738 vertical_compose97iL0(b1, b2, b3, width);
739 if (y + 0 < (unsigned)height)
740 vertical_compose97iH0(b0, b1, b2, width);
742 if (y - 1 < (unsigned)height)
743 ff_snow_horizontal_compose97i(b0, width);
744 if (y + 0 < (unsigned)height)
745 ff_snow_horizontal_compose97i(b1, width);
754 static void av_unused spatial_compose97i(IDWTELEM *buffer, int width,
755 int height, int stride)
758 spatial_compose97i_init(&cs, buffer, height, stride);
759 while (cs.y <= height)
760 spatial_compose97i_dy(&cs, buffer, width, height, stride);
763 void ff_spatial_idwt_buffered_init(DWTCompose *cs, slice_buffer *sb, int width,
764 int height, int stride_line, int type,
765 int decomposition_count)
768 for (level = decomposition_count - 1; level >= 0; level--) {
771 spatial_compose97i_buffered_init(cs + level, sb, height >> level,
772 stride_line << level);
775 spatial_compose53i_buffered_init(cs + level, sb, height >> level,
776 stride_line << level);
782 void ff_spatial_idwt_buffered_slice(DWTContext *dsp, DWTCompose *cs,
783 slice_buffer *slice_buf, int width,
784 int height, int stride_line, int type,
785 int decomposition_count, int y)
787 const int support = type == 1 ? 3 : 5;
792 for (level = decomposition_count - 1; level >= 0; level--)
793 while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
796 spatial_compose97i_dy_buffered(dsp, cs + level, slice_buf,
799 stride_line << level);
802 spatial_compose53i_dy_buffered(cs + level, slice_buf,
805 stride_line << level);
811 static void ff_spatial_idwt_init(DWTCompose *cs, IDWTELEM *buffer, int width,
812 int height, int stride, int type,
813 int decomposition_count)
816 for (level = decomposition_count - 1; level >= 0; level--) {
819 spatial_compose97i_init(cs + level, buffer, height >> level,
823 spatial_compose53i_init(cs + level, buffer, height >> level,
830 static void ff_spatial_idwt_slice(DWTCompose *cs, IDWTELEM *buffer, int width,
831 int height, int stride, int type,
832 int decomposition_count, int y)
834 const int support = type == 1 ? 3 : 5;
839 for (level = decomposition_count - 1; level >= 0; level--)
840 while (cs[level].y <= FFMIN((y >> level) + support, height >> level)) {
843 spatial_compose97i_dy(cs + level, buffer, width >> level,
844 height >> level, stride << level);
847 spatial_compose53i_dy(cs + level, buffer, width >> level,
848 height >> level, stride << level);
854 void ff_spatial_idwt(IDWTELEM *buffer, int width, int height, int stride,
855 int type, int decomposition_count)
857 DWTCompose cs[MAX_DECOMPOSITIONS];
859 ff_spatial_idwt_init(cs, buffer, width, height, stride, type,
860 decomposition_count);
861 for (y = 0; y < height; y += 4)
862 ff_spatial_idwt_slice(cs, buffer, width, height, stride, type,
863 decomposition_count, y);
866 static inline int w_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size,
867 int w, int h, int type)
870 const int dec_count = w == 8 ? 3 : 4;
873 static const int scale[2][2][4][4] = {
876 { 268, 239, 239, 213 },
877 { 0, 224, 224, 152 },
878 { 0, 135, 135, 110 },
880 { // 9/7 16x16 or 32x32 dec=4
881 { 344, 310, 310, 280 },
882 { 0, 320, 320, 228 },
883 { 0, 175, 175, 136 },
884 { 0, 129, 129, 102 },
889 { 275, 245, 245, 218 },
890 { 0, 230, 230, 156 },
891 { 0, 138, 138, 113 },
893 { // 5/3 16x16 or 32x32 dec=4
894 { 352, 317, 317, 286 },
895 { 0, 328, 328, 233 },
896 { 0, 180, 180, 140 },
897 { 0, 132, 132, 105 },
902 for (i = 0; i < h; i++) {
903 for (j = 0; j < w; j += 4) {
904 tmp[32 * i + j + 0] = (pix1[j + 0] - pix2[j + 0]) << 4;
905 tmp[32 * i + j + 1] = (pix1[j + 1] - pix2[j + 1]) << 4;
906 tmp[32 * i + j + 2] = (pix1[j + 2] - pix2[j + 2]) << 4;
907 tmp[32 * i + j + 3] = (pix1[j + 3] - pix2[j + 3]) << 4;
913 ff_spatial_dwt(tmp, w, h, 32, type, dec_count);
917 for (level = 0; level < dec_count; level++)
918 for (ori = level ? 1 : 0; ori < 4; ori++) {
919 int size = w >> (dec_count - level);
920 int sx = (ori & 1) ? size : 0;
921 int stride = 32 << (dec_count - level);
922 int sy = (ori & 2) ? stride >> 1 : 0;
924 for (i = 0; i < size; i++)
925 for (j = 0; j < size; j++) {
926 int v = tmp[sx + sy + i * stride + j] *
927 scale[type][dec_count - 3][level][ori];
935 static int w53_8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
937 return w_c(v, pix1, pix2, line_size, 8, h, 1);
940 static int w97_8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
942 return w_c(v, pix1, pix2, line_size, 8, h, 0);
945 static int w53_16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
947 return w_c(v, pix1, pix2, line_size, 16, h, 1);
950 static int w97_16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
952 return w_c(v, pix1, pix2, line_size, 16, h, 0);
955 int ff_w53_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
957 return w_c(v, pix1, pix2, line_size, 32, h, 1);
960 int ff_w97_32_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
962 return w_c(v, pix1, pix2, line_size, 32, h, 0);
965 void ff_dsputil_init_dwt(DSPContext *c)
967 c->w53[0] = w53_16_c;
969 c->w97[0] = w97_16_c;
973 void ff_dwt_init(DWTContext *c)
975 c->vertical_compose97i = ff_snow_vertical_compose97i;
976 c->horizontal_compose97i = ff_snow_horizontal_compose97i;
977 c->inner_add_yblock = ff_snow_inner_add_yblock;