X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=libavcodec%2Fx86%2Flossless_videodsp.asm;h=443fe02951d7ef02c3e1853010e4f0d71e2a9877;hb=536ac72f46b7b5094949b4e6a7e07cc8de86aac9;hp=f06fcdf7cfba0be82bab56f0ffdc019901aac1fe;hpb=4e6f2dbcd3ca0cee45a174ed8e50038810f7177b;p=ffmpeg diff --git a/libavcodec/x86/lossless_videodsp.asm b/libavcodec/x86/lossless_videodsp.asm index f06fcdf7cfb..443fe02951d 100644 --- a/libavcodec/x86/lossless_videodsp.asm +++ b/libavcodec/x86/lossless_videodsp.asm @@ -24,98 +24,198 @@ SECTION_RODATA +cextern pb_15 +pb_zzzzzzzz77777777: times 8 db -1 +pb_7: times 8 db 7 pb_ef: times 8 db 14,15 pb_67: times 8 db 6, 7 +pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 +pb_zz11zz55zz99zzdd: db -1,-1,1,1,-1,-1,5,5,-1,-1,9,9,-1,-1,13,13 pb_zzzz2323zzzzabab: db -1,-1,-1,-1, 2, 3, 2, 3,-1,-1,-1,-1,10,11,10,11 pb_zzzzzzzz67676767: db -1,-1,-1,-1,-1,-1,-1,-1, 6, 7, 6, 7, 6, 7, 6, 7 SECTION .text -%macro INT16_LOOP 2 ; %1 = a/u (aligned/unaligned), %2 = add/sub - movd m4, maskd - SPLATW m4, m4 - add wd, wd - test wq, 2*mmsize - 1 - jz %%.tomainloop - push tmpq -%%.wordloop: - sub wq, 2 -%ifidn %2, add - mov tmpw, [srcq+wq] - add tmpw, [dstq+wq] +; void ff_add_median_pred_mmxext(uint8_t *dst, const uint8_t *top, +; const uint8_t *diff, int w, +; int *left, int *left_top) +%macro MEDIAN_PRED 0 +cglobal add_median_pred, 6,6,8, dst, top, diff, w, left, left_top + movu m0, [topq] + mova m2, m0 + movd m4, [left_topq] + LSHIFT m2, 1 + mova m1, m0 + por m4, m2 + movd m3, [leftq] + psubb m0, m4 ; t-tl + add dstq, wq + add topq, wq + add diffq, wq + neg wq + jmp .skip +.loop: + movu m4, [topq+wq] + mova m0, m4 + LSHIFT m4, 1 + por m4, m1 + mova m1, m0 ; t + psubb m0, m4 ; t-tl +.skip: + movu m2, [diffq+wq] +%assign i 0 +%rep mmsize + mova m4, m0 + paddb m4, m3 ; t-tl+l + mova m5, m3 + pmaxub m3, m1 + pminub m5, m1 + pminub m3, m4 + pmaxub m3, m5 ; median + paddb m3, m2 ; +residual +%if i==0 + mova m7, m3 + LSHIFT m7, mmsize-1 %else - mov tmpw, [src1q+wq] - sub tmpw, [src2q+wq] + mova m6, m3 + RSHIFT m7, 1 + LSHIFT m6, mmsize-1 + por m7, m6 %endif - and tmpw, maskw - mov [dstq+wq], tmpw - test wq, 2*mmsize - 1 - jnz %%.wordloop - pop tmpq -%%.tomainloop: -%ifidn %2, add - add srcq, wq -%else - add src1q, wq - add src2q, wq +%if i