const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
if (uDest) {
- x86_reg uv_off = c->uv_offx2 >> 1;
+ x86_reg uv_off = c->uv_off_byte >> 1;
dither_8to16(c, chrDither, 0);
YSCALEYUV2YV12X(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
dither_8to16(c, chrDither, 1);
"punpckhwd %%mm0, %%mm5\n\t"
"punpcklwd %%mm0, %%mm6\n\t"
"punpckhwd %%mm0, %%mm7\n\t"
- "psllw $12, %%mm4\n\t"
- "psllw $12, %%mm5\n\t"
- "psllw $12, %%mm6\n\t"
- "psllw $12, %%mm7\n\t"
- "movq %%mm3, "DITHER32"+0(%1)\n\t"
- "movq %%mm4, "DITHER32"+8(%1)\n\t"
- "movq %%mm4, "DITHER32"+16(%1)\n\t"
- "movq %%mm4, "DITHER32"+24(%1)\n\t"
+ "pslld $12, %%mm4\n\t"
+ "pslld $12, %%mm5\n\t"
+ "pslld $12, %%mm6\n\t"
+ "pslld $12, %%mm7\n\t"
+ "movq %%mm4, "DITHER32"+0(%1)\n\t"
+ "movq %%mm5, "DITHER32"+8(%1)\n\t"
+ "movq %%mm6, "DITHER32"+16(%1)\n\t"
+ "movq %%mm7, "DITHER32"+24(%1)\n\t"
:: "r"(srcDither), "r"(&c->redDither)
);
} else {
"punpckhwd %%mm0, %%mm5\n\t"
"punpcklwd %%mm0, %%mm6\n\t"
"punpckhwd %%mm0, %%mm7\n\t"
- "psllw $12, %%mm4\n\t"
- "psllw $12, %%mm5\n\t"
- "psllw $12, %%mm6\n\t"
- "psllw $12, %%mm7\n\t"
- "movq %%mm3, "DITHER32"+0(%1)\n\t"
- "movq %%mm4, "DITHER32"+8(%1)\n\t"
- "movq %%mm4, "DITHER32"+16(%1)\n\t"
- "movq %%mm4, "DITHER32"+24(%1)\n\t"
+ "pslld $12, %%mm4\n\t"
+ "pslld $12, %%mm5\n\t"
+ "pslld $12, %%mm6\n\t"
+ "pslld $12, %%mm7\n\t"
+ "movq %%mm4, "DITHER32"+0(%1)\n\t"
+ "movq %%mm5, "DITHER32"+8(%1)\n\t"
+ "movq %%mm6, "DITHER32"+16(%1)\n\t"
+ "movq %%mm7, "DITHER32"+24(%1)\n\t"
:: "r"(srcDither), "r"(&c->redDither)
);
}
const uint8_t *lumDither = c->lumDither8, *chrDither = c->chrDither8;
if (uDest) {
- x86_reg uv_off = c->uv_offx2 >> 1;
+ x86_reg uv_off = c->uv_off_byte >> 1;
dither_8to32(c, chrDither, 0);
YSCALEYUV2YV12X_ACCURATE(CHR_MMX_FILTER_OFFSET, uDest, chrDstW, 0)
dither_8to32(c, chrDither, 1);
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX_ACCURATE
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
YSCALEYUV2PACKEDX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
{
x86_reg dummy=0;
x86_reg dstW_reg = dstW;
- x86_reg uv_off = c->uv_offx2;
+ x86_reg uv_off = c->uv_off_byte;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\
"psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\
"psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\
"movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\
- "add "UV_OFFx2"("#c"), "#index" \n\t" \
+ "add "UV_OFF_PX"("#c"), "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\
"movq (%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\
- "sub "UV_OFFx2"("#c"), "#index" \n\t" \
+ "sub "UV_OFF_PX"("#c"), "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\
"paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\
"psrlw $8, %%mm3 \n\t" \