2 * This file is part of MPlayer.
4 * MPlayer is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
9 * MPlayer is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License along
15 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
23 #include "libavutil/x86/asm.h"
31 static int diff_y_mmx(unsigned char *a, unsigned char *b, int s)
36 "pxor %%mm4, %%mm4 \n\t"
37 "pxor %%mm7, %%mm7 \n\t"
41 "movq (%%"REG_S"), %%mm0 \n\t"
42 "movq (%%"REG_S"), %%mm2 \n\t"
43 "add %%"REG_a", %%"REG_S" \n\t"
44 "movq (%%"REG_D"), %%mm1 \n\t"
45 "add %%"REG_a", %%"REG_D" \n\t"
46 "psubusb %%mm1, %%mm2 \n\t"
47 "psubusb %%mm0, %%mm1 \n\t"
48 "movq %%mm2, %%mm0 \n\t"
49 "movq %%mm1, %%mm3 \n\t"
50 "punpcklbw %%mm7, %%mm0 \n\t"
51 "punpcklbw %%mm7, %%mm1 \n\t"
52 "punpckhbw %%mm7, %%mm2 \n\t"
53 "punpckhbw %%mm7, %%mm3 \n\t"
54 "paddw %%mm0, %%mm4 \n\t"
55 "paddw %%mm1, %%mm4 \n\t"
56 "paddw %%mm2, %%mm4 \n\t"
57 "paddw %%mm3, %%mm4 \n\t"
62 "movq %%mm4, %%mm3 \n\t"
63 "punpcklwd %%mm7, %%mm4 \n\t"
64 "punpckhwd %%mm7, %%mm3 \n\t"
65 "paddd %%mm4, %%mm3 \n\t"
66 "movd %%mm3, %%eax \n\t"
67 "psrlq $32, %%mm3 \n\t"
68 "movd %%mm3, %%edx \n\t"
69 "addl %%edx, %%eax \n\t"
72 : "S" (a), "D" (b), "a" (s)
78 static int licomb_y_mmx(unsigned char *a, unsigned char *b, int s)
83 "pxor %%mm6, %%mm6 \n\t"
84 "pxor %%mm7, %%mm7 \n\t"
85 "sub %%"REG_a", %%"REG_D" \n\t"
89 "movq (%%"REG_D"), %%mm0 \n\t"
90 "movq (%%"REG_D"), %%mm1 \n\t"
91 "punpcklbw %%mm7, %%mm0 \n\t"
92 "movq (%%"REG_D",%%"REG_a"), %%mm2 \n\t"
93 "punpcklbw %%mm7, %%mm1 \n\t"
94 "punpcklbw %%mm7, %%mm2 \n\t"
95 "paddw %%mm0, %%mm0 \n\t"
96 "paddw %%mm2, %%mm1 \n\t"
97 "movq %%mm0, %%mm2 \n\t"
98 "psubusw %%mm1, %%mm0 \n\t"
99 "psubusw %%mm2, %%mm1 \n\t"
100 "paddw %%mm0, %%mm6 \n\t"
101 "paddw %%mm1, %%mm6 \n\t"
103 "movq (%%"REG_S"), %%mm0 \n\t"
104 "movq (%%"REG_D"), %%mm1 \n\t"
105 "punpckhbw %%mm7, %%mm0 \n\t"
106 "movq (%%"REG_D",%%"REG_a"), %%mm2 \n\t"
107 "punpckhbw %%mm7, %%mm1 \n\t"
108 "punpckhbw %%mm7, %%mm2 \n\t"
109 "paddw %%mm0, %%mm0 \n\t"
110 "paddw %%mm2, %%mm1 \n\t"
111 "movq %%mm0, %%mm2 \n\t"
112 "psubusw %%mm1, %%mm0 \n\t"
113 "psubusw %%mm2, %%mm1 \n\t"
114 "paddw %%mm0, %%mm6 \n\t"
115 "paddw %%mm1, %%mm6 \n\t"
117 "movq (%%"REG_D",%%"REG_a"), %%mm0 \n\t"
118 "movq (%%"REG_S"), %%mm1 \n\t"
119 "punpcklbw %%mm7, %%mm0 \n\t"
120 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t"
121 "punpcklbw %%mm7, %%mm1 \n\t"
122 "punpcklbw %%mm7, %%mm2 \n\t"
123 "paddw %%mm0, %%mm0 \n\t"
124 "paddw %%mm2, %%mm1 \n\t"
125 "movq %%mm0, %%mm2 \n\t"
126 "psubusw %%mm1, %%mm0 \n\t"
127 "psubusw %%mm2, %%mm1 \n\t"
128 "paddw %%mm0, %%mm6 \n\t"
129 "paddw %%mm1, %%mm6 \n\t"
131 "movq (%%"REG_D",%%"REG_a"), %%mm0 \n\t"
132 "movq (%%"REG_S"), %%mm1 \n\t"
133 "punpckhbw %%mm7, %%mm0 \n\t"
134 "movq (%%"REG_S",%%"REG_a"), %%mm2 \n\t"
135 "punpckhbw %%mm7, %%mm1 \n\t"
136 "punpckhbw %%mm7, %%mm2 \n\t"
137 "paddw %%mm0, %%mm0 \n\t"
138 "paddw %%mm2, %%mm1 \n\t"
139 "movq %%mm0, %%mm2 \n\t"
140 "psubusw %%mm1, %%mm0 \n\t"
141 "psubusw %%mm2, %%mm1 \n\t"
142 "paddw %%mm0, %%mm6 \n\t"
143 "paddw %%mm1, %%mm6 \n\t"
145 "add %%"REG_a", %%"REG_S" \n\t"
146 "add %%"REG_a", %%"REG_D" \n\t"
150 "movq %%mm6, %%mm5 \n\t"
151 "punpcklwd %%mm7, %%mm6 \n\t"
152 "punpckhwd %%mm7, %%mm5 \n\t"
153 "paddd %%mm6, %%mm5 \n\t"
154 "movd %%mm5, %%eax \n\t"
155 "psrlq $32, %%mm5 \n\t"
156 "movd %%mm5, %%edx \n\t"
157 "addl %%edx, %%eax \n\t"
161 : "S" (a), "D" (b), "a" (s)
167 static int var_y_mmx(unsigned char *a, unsigned char *b, int s)
171 "movl $3, %%ecx \n\t"
172 "pxor %%mm4, %%mm4 \n\t"
173 "pxor %%mm7, %%mm7 \n\t"
177 "movq (%%"REG_S"), %%mm0 \n\t"
178 "movq (%%"REG_S"), %%mm2 \n\t"
179 "movq (%%"REG_S",%%"REG_a"), %%mm1 \n\t"
180 "add %%"REG_a", %%"REG_S" \n\t"
181 "psubusb %%mm1, %%mm2 \n\t"
182 "psubusb %%mm0, %%mm1 \n\t"
183 "movq %%mm2, %%mm0 \n\t"
184 "movq %%mm1, %%mm3 \n\t"
185 "punpcklbw %%mm7, %%mm0 \n\t"
186 "punpcklbw %%mm7, %%mm1 \n\t"
187 "punpckhbw %%mm7, %%mm2 \n\t"
188 "punpckhbw %%mm7, %%mm3 \n\t"
189 "paddw %%mm0, %%mm4 \n\t"
190 "paddw %%mm1, %%mm4 \n\t"
191 "paddw %%mm2, %%mm4 \n\t"
192 "paddw %%mm3, %%mm4 \n\t"
197 "movq %%mm4, %%mm3 \n\t"
198 "punpcklwd %%mm7, %%mm4 \n\t"
199 "punpckhwd %%mm7, %%mm3 \n\t"
200 "paddd %%mm4, %%mm3 \n\t"
201 "movd %%mm3, %%eax \n\t"
202 "psrlq $32, %%mm3 \n\t"
203 "movd %%mm3, %%edx \n\t"
204 "addl %%edx, %%eax \n\t"
215 #define ABS(a) (((a)^((a)>>31))-((a)>>31))
217 static int diff_y(unsigned char *a, unsigned char *b, int s)
221 for (j=0; j<8; j++) diff += ABS(a[j]-b[j]);
227 static int licomb_y(unsigned char *a, unsigned char *b, int s)
232 diff += ABS((a[j]<<1) - b[j-s] - b[j])
233 + ABS((b[j]<<1) - a[j] - a[j+s]);
240 static int qpcomb_y(unsigned char *a, unsigned char *b, int s)
245 diff += ABS(a[j] - 3*b[j-s] + 3*a[j+s] - b[j]);
251 static int licomb_y_test(unsigned char *a, unsigned char *b, int s)
253 int c = licomb_y(a,b,s);
254 int m = licomb_y_mmx(a,b,s);
255 if (c != m) printf("%d != %d\n", c, m);
260 static int var_y(unsigned char *a, unsigned char *b, int s)
264 for (j=0; j<8; j++) {
265 var += ABS(a[j]-a[j+s]);
269 return 4*var; /* match comb scaling */
280 static void alloc_buffer(struct pullup_context *c, struct pullup_buffer *b)
283 if (b->planes) return;
284 b->planes = calloc(c->nplanes, sizeof(unsigned char *));
285 for (i = 0; i < c->nplanes; i++) {
286 b->planes[i] = malloc(c->h[i]*c->stride[i]);
287 /* Deal with idiotic 128=0 for chroma: */
288 memset(b->planes[i], c->background[i], c->h[i]*c->stride[i]);
292 struct pullup_buffer *ff_pullup_lock_buffer(struct pullup_buffer *b, int parity)
295 if ((parity+1) & 1) b->lock[0]++;
296 if ((parity+1) & 2) b->lock[1]++;
300 void ff_pullup_release_buffer(struct pullup_buffer *b, int parity)
303 if ((parity+1) & 1) b->lock[0]--;
304 if ((parity+1) & 2) b->lock[1]--;
307 struct pullup_buffer *ff_pullup_get_buffer(struct pullup_context *c, int parity)
311 /* Try first to get the sister buffer for the previous field */
312 if (parity < 2 && c->last && parity != c->last->parity
313 && !c->last->buffer->lock[parity]) {
314 alloc_buffer(c, c->last->buffer);
315 return ff_pullup_lock_buffer(c->last->buffer, parity);
318 /* Prefer a buffer with both fields open */
319 for (i = 0; i < c->nbuffers; i++) {
320 if (c->buffers[i].lock[0]) continue;
321 if (c->buffers[i].lock[1]) continue;
322 alloc_buffer(c, &c->buffers[i]);
323 return ff_pullup_lock_buffer(&c->buffers[i], parity);
326 if (parity == 2) return 0;
328 /* Search for any half-free buffer */
329 for (i = 0; i < c->nbuffers; i++) {
330 if (((parity+1) & 1) && c->buffers[i].lock[0]) continue;
331 if (((parity+1) & 2) && c->buffers[i].lock[1]) continue;
332 alloc_buffer(c, &c->buffers[i]);
333 return ff_pullup_lock_buffer(&c->buffers[i], parity);
344 static void compute_metric(struct pullup_context *c,
345 struct pullup_field *fa, int pa,
346 struct pullup_field *fb, int pb,
347 int (*func)(unsigned char *, unsigned char *, int), int *dest)
349 unsigned char *a, *b;
351 int mp = c->metric_plane;
352 int xstep = c->bpp[mp];
353 int ystep = c->stride[mp]<<3;
354 int s = c->stride[mp]<<1; /* field stride */
355 int w = c->metric_w*xstep;
357 if (!fa->buffer || !fb->buffer) return;
359 /* Shortcut for duplicate fields (e.g. from RFF flag) */
360 if (fa->buffer == fb->buffer && pa == pb) {
361 memset(dest, 0, c->metric_len * sizeof(int));
365 a = fa->buffer->planes[mp] + pa * c->stride[mp] + c->metric_offset;
366 b = fb->buffer->planes[mp] + pb * c->stride[mp] + c->metric_offset;
368 for (y = c->metric_h; y; y--) {
369 for (x = 0; x < w; x += xstep) {
370 *dest++ = func(a + x, b + x, s);
372 a += ystep; b += ystep;
380 static void alloc_metrics(struct pullup_context *c, struct pullup_field *f)
382 f->diffs = calloc(c->metric_len, sizeof(int));
383 f->comb = calloc(c->metric_len, sizeof(int));
384 f->var = calloc(c->metric_len, sizeof(int));
385 /* add more metrics here as needed */
388 static struct pullup_field *make_field_queue(struct pullup_context *c, int len)
390 struct pullup_field *head, *f;
391 f = head = calloc(1, sizeof(struct pullup_field));
393 for (; len > 0; len--) {
394 f->next = calloc(1, sizeof(struct pullup_field));
404 static void check_field_queue(struct pullup_context *c)
406 if (c->head->next == c->first) {
407 struct pullup_field *f = calloc(1, sizeof(struct pullup_field));
416 void ff_pullup_submit_field(struct pullup_context *c, struct pullup_buffer *b, int parity)
418 struct pullup_field *f;
420 /* Grow the circular list if needed */
421 check_field_queue(c);
423 /* Cannot have two fields of same parity in a row; drop the new one */
424 if (c->last && c->last->parity == parity) return;
428 f->buffer = ff_pullup_lock_buffer(b, parity);
433 compute_metric(c, f, parity, f->prev->prev, parity, c->diff, f->diffs);
434 compute_metric(c, parity?f->prev:f, 0, parity?f:f->prev, 1, c->comb, f->comb);
435 compute_metric(c, f, parity, f, -1, c->var, f->var);
437 /* Advance the circular list */
438 if (!c->first) c->first = c->head;
440 c->head = c->head->next;
443 void ff_pullup_flush_fields(struct pullup_context *c)
445 struct pullup_field *f;
447 for (f = c->first; f && f != c->head; f = f->next) {
448 ff_pullup_release_buffer(f->buffer, f->parity);
451 c->first = c->last = 0;
461 #define F_HAVE_BREAKS 1
462 #define F_HAVE_AFFINITY 2
466 #define BREAK_RIGHT 2
471 static int queue_length(struct pullup_field *begin, struct pullup_field *end)
474 struct pullup_field *f;
476 if (!begin || !end) return 0;
477 for (f = begin; f != end; f = f->next) count++;
481 static int find_first_break(struct pullup_field *f, int max)
484 for (i = 0; i < max; i++) {
485 if (f->breaks & BREAK_RIGHT || f->next->breaks & BREAK_LEFT)
492 static void compute_breaks(struct pullup_context *c, struct pullup_field *f0)
495 struct pullup_field *f1 = f0->next;
496 struct pullup_field *f2 = f1->next;
497 struct pullup_field *f3 = f2->next;
498 int l, max_l=0, max_r=0;
499 //struct pullup_field *ff;
500 //for (i=0, ff=c->first; ff != f0; i++, ff=ff->next);
502 if (f0->flags & F_HAVE_BREAKS) return;
503 //printf("\n%d: ", i);
504 f0->flags |= F_HAVE_BREAKS;
506 /* Special case when fields are 100% identical */
507 if (f0->buffer == f2->buffer && f1->buffer != f3->buffer) {
508 f2->breaks |= BREAK_RIGHT;
511 if (f0->buffer != f2->buffer && f1->buffer == f3->buffer) {
512 f1->breaks |= BREAK_LEFT;
516 for (i = 0; i < c->metric_len; i++) {
517 l = f2->diffs[i] - f3->diffs[i];
518 if (l > max_l) max_l = l;
519 if (-l > max_r) max_r = -l;
521 /* Don't get tripped up when differences are mostly quant error */
522 //printf("%d %d\n", max_l, max_r);
523 if (max_l + max_r < 128) return;
524 if (max_l > 4*max_r) f1->breaks |= BREAK_LEFT;
525 if (max_r > 4*max_l) f2->breaks |= BREAK_RIGHT;
528 static void compute_affinity(struct pullup_context *c, struct pullup_field *f)
531 int max_l=0, max_r=0, l;
532 if (f->flags & F_HAVE_AFFINITY) return;
533 f->flags |= F_HAVE_AFFINITY;
534 if (f->buffer == f->next->next->buffer) {
536 f->next->affinity = 0;
537 f->next->next->affinity = -1;
538 f->next->flags |= F_HAVE_AFFINITY;
539 f->next->next->flags |= F_HAVE_AFFINITY;
543 for (i = 0; i < c->metric_len; i++) {
544 int lv = f->prev->var[i];
545 int rv = f->next->var[i];
547 int lc = f->comb[i] - (v+lv) + ABS(v-lv);
548 int rc = f->next->comb[i] - (v+rv) + ABS(v-rv);
552 if (l > max_l) max_l = l;
553 if (-l > max_r) max_r = -l;
555 if (max_l + max_r < 64) return;
556 if (max_r > 6*max_l) f->affinity = -1;
557 else if (max_l > 6*max_r) f->affinity = 1;
559 for (i = 0; i < c->metric_len; i++) {
560 l = f->comb[i] - f->next->comb[i];
561 if (l > max_l) max_l = l;
562 if (-l > max_r) max_r = -l;
564 if (max_l + max_r < 64) return;
565 if (max_r > 2*max_l) f->affinity = -1;
566 else if (max_l > 2*max_r) f->affinity = 1;
570 static void foo(struct pullup_context *c)
572 struct pullup_field *f = c->first;
573 int i, n = queue_length(f, c->last);
574 for (i = 0; i < n-1; i++) {
575 if (i < n-3) compute_breaks(c, f);
576 compute_affinity(c, f);
581 static int decide_frame_length(struct pullup_context *c)
583 struct pullup_field *f0 = c->first;
584 struct pullup_field *f1 = f0->next;
585 struct pullup_field *f2 = f1->next;
588 if (queue_length(c->first, c->last) < 4) return 0;
591 if (f0->affinity == -1) return 1;
593 l = find_first_break(f0, 3);
594 if (l == 1 && c->strict_breaks < 0) l = 0;
598 if (c->strict_breaks < 1 && f0->affinity == 1 && f1->affinity == -1)
602 /* FIXME: strictly speaking, f0->prev is no longer valid... :) */
604 && (f0->prev->breaks & BREAK_RIGHT) && (f2->breaks & BREAK_LEFT)
605 && (f0->affinity != 1 || f1->affinity != -1) )
607 if (f1->affinity == 1) return 1;
610 if (f2->affinity == 1) return 2;
613 /* 9 possibilities covered before switch */
614 if (f1->affinity == 1) return 1; /* covers 6 */
615 else if (f1->affinity == -1) return 2; /* covers 6 */
616 else if (f2->affinity == -1) { /* covers 2 */
617 if (f0->affinity == 1) return 3;
620 else return 2; /* the remaining 6 */
625 static void print_aff_and_breaks(struct pullup_context *c, struct pullup_field *f)
628 struct pullup_field *f0 = f;
629 const char aff_l[] = "+..", aff_r[] = "..+";
630 printf("\naffinity: ");
631 for (i = 0; i < 4; i++) {
632 printf("%c%d%c", aff_l[1+f->affinity], i, aff_r[1+f->affinity]);
636 printf("\nbreaks: ");
637 for (i=0; i<4; i++) {
638 printf("%c%d%c", f->breaks & BREAK_LEFT ? '|' : '.', i, f->breaks & BREAK_RIGHT ? '|' : '.');
648 struct pullup_frame *ff_pullup_get_frame(struct pullup_context *c)
651 struct pullup_frame *fr = c->frame;
652 int n = decide_frame_length(c);
653 int aff = c->first->next->affinity;
656 if (fr->lock) return 0;
659 print_aff_and_breaks(c, c->first);
660 printf("duration: %d \n", n);
665 fr->parity = c->first->parity;
667 for (i = 0; i < n; i++) {
668 /* We cheat and steal the buffer without release+relock */
669 fr->ifields[i] = c->first->buffer;
670 c->first->buffer = 0;
671 c->first = c->first->next;
675 fr->ofields[fr->parity] = fr->ifields[0];
676 fr->ofields[fr->parity^1] = 0;
678 fr->ofields[fr->parity] = fr->ifields[0];
679 fr->ofields[fr->parity^1] = fr->ifields[1];
682 aff = (fr->ifields[0] == fr->ifields[1]) ? -1 : 1;
683 /* else if (c->verbose) printf("forced aff: %d \n", aff); */
684 fr->ofields[fr->parity] = fr->ifields[1+aff];
685 fr->ofields[fr->parity^1] = fr->ifields[1];
687 ff_pullup_lock_buffer(fr->ofields[0], 0);
688 ff_pullup_lock_buffer(fr->ofields[1], 1);
690 if (fr->ofields[0] == fr->ofields[1]) {
691 fr->buffer = fr->ofields[0];
692 ff_pullup_lock_buffer(fr->buffer, 2);
698 static void copy_field(struct pullup_context *c, struct pullup_buffer *dest,
699 struct pullup_buffer *src, int parity)
702 unsigned char *d, *s;
703 for (i = 0; i < c->nplanes; i++) {
704 s = src->planes[i] + parity*c->stride[i];
705 d = dest->planes[i] + parity*c->stride[i];
706 for (j = c->h[i]>>1; j; j--) {
707 memcpy(d, s, c->stride[i]);
708 s += c->stride[i]<<1;
709 d += c->stride[i]<<1;
714 void ff_pullup_pack_frame(struct pullup_context *c, struct pullup_frame *fr)
717 if (fr->buffer) return;
718 if (fr->length < 2) return; /* FIXME: deal with this */
719 for (i = 0; i < 2; i++)
721 if (fr->ofields[i]->lock[i^1]) continue;
722 fr->buffer = fr->ofields[i];
723 ff_pullup_lock_buffer(fr->buffer, 2);
724 copy_field(c, fr->buffer, fr->ofields[i^1], i^1);
727 fr->buffer = ff_pullup_get_buffer(c, 2);
728 copy_field(c, fr->buffer, fr->ofields[0], 0);
729 copy_field(c, fr->buffer, fr->ofields[1], 1);
732 void ff_pullup_release_frame(struct pullup_frame *fr)
735 for (i = 0; i < fr->length; i++)
736 ff_pullup_release_buffer(fr->ifields[i], fr->parity ^ (i&1));
737 ff_pullup_release_buffer(fr->ofields[0], 0);
738 ff_pullup_release_buffer(fr->ofields[1], 1);
739 if (fr->buffer) ff_pullup_release_buffer(fr->buffer, 2);
748 struct pullup_context *ff_pullup_alloc_context(void)
750 struct pullup_context *c;
752 c = calloc(1, sizeof(struct pullup_context));
757 void ff_pullup_preinit_context(struct pullup_context *c)
759 c->bpp = calloc(c->nplanes, sizeof(int));
760 c->w = calloc(c->nplanes, sizeof(int));
761 c->h = calloc(c->nplanes, sizeof(int));
762 c->stride = calloc(c->nplanes, sizeof(int));
763 c->background = calloc(c->nplanes, sizeof(int));
766 void ff_pullup_init_context(struct pullup_context *c)
768 int mp = c->metric_plane;
769 if (c->nbuffers < 10) c->nbuffers = 10;
770 c->buffers = calloc(c->nbuffers, sizeof (struct pullup_buffer));
772 c->metric_w = (c->w[mp] - ((c->junk_left + c->junk_right) << 3)) >> 3;
773 c->metric_h = (c->h[mp] - ((c->junk_top + c->junk_bottom) << 1)) >> 3;
774 c->metric_offset = c->junk_left*c->bpp[mp] + (c->junk_top<<1)*c->stride[mp];
775 c->metric_len = c->metric_w * c->metric_h;
777 c->head = make_field_queue(c, 8);
779 c->frame = calloc(1, sizeof (struct pullup_frame));
780 c->frame->ifields = calloc(3, sizeof (struct pullup_buffer *));
789 if (c->cpu & PULLUP_CPU_MMX) {
790 c->diff = diff_y_mmx;
791 c->comb = licomb_y_mmx;
796 /* c->comb = qpcomb_y; */
799 case PULLUP_FMT_YUY2:
802 case PULLUP_FMT_RGB32:
803 c->diff = diff_rgb32;
809 void ff_pullup_free_context(struct pullup_context *c)
811 struct pullup_field *f;
820 } while (f != c->head);