- intf_DbgMsg("vdec debug: EndThread(%p)\n", p_vdec);
-}
-
-/*****************************************************************************
- * AddBlock : add a block
- *****************************************************************************/
-#ifndef HAVE_MMX
-static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
- yuv_data_t * p_data, int i_incr )
-{
- int i_x, i_y;
-
- for( i_y = 0; i_y < 8; i_y++ )
- {
- for( i_x = 0; i_x < 8; i_x++ )
- {
- *p_data = p_vdec->pi_crop[*p_data + *p_block++];
- p_data++;
- }
- p_data += i_incr;
- }
-}
-#else
-static __inline__ void AddBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
- yuv_data_t * p_data, int i_incr )
-{
- asm __volatile__ (
- "pxor %%mm7,%%mm7\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw (%1),%%mm2\n\t"
- "paddw 8(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 16(%1),%%mm2\n\t"
- "paddw 24(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 32(%1),%%mm2\n\t"
- "paddw 40(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 48(%1),%%mm2\n\t"
- "paddw 56(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 64(%1),%%mm2\n\t"
- "paddw 72(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 80(%1),%%mm2\n\t"
- "paddw 88(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 96(%1),%%mm2\n\t"
- "paddw 104(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq (%0),%%mm1\n\t"
- "movq %%mm1,%%mm2\n\t"
- "punpckhbw %%mm7,%%mm1\n\t"
- "punpcklbw %%mm7,%%mm2\n\t"
- "paddw 112(%1),%%mm2\n\t"
- "paddw 120(%1),%%mm1\n\t"
- "packuswb %%mm1,%%mm2\n\t"
- "movq %%mm2,(%0)\n\t"
-
- "emms"
- :"+r" (p_data): "r" (p_block),"r" (i_incr+8));
-}
-#endif
-
-
-/*****************************************************************************
- * CopyBlock : copy a block
- *****************************************************************************/
-#ifndef HAVE_MMX
-static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
- yuv_data_t * p_data, int i_incr )
-{
- int i_x, i_y;
-
- for( i_y = 0; i_y < 8; i_y++ )
- {
- for( i_x = 0; i_x < 8; i_x++ )
- {
- *p_data++ = p_vdec->pi_crop[*p_block++];
- }
- p_data += i_incr;
- }
-}
-#else
-static __inline__ void CopyBlock( vdec_thread_t * p_vdec, dctelem_t * p_block,
- yuv_data_t * p_data, int i_incr )
-{
- asm __volatile__ (
- "movq (%1),%%mm0\n\t"
- "packuswb 8(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 16(%1),%%mm0\n\t"
- "packuswb 24(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 32(%1),%%mm0\n\t"
- "packuswb 40(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 48(%1),%%mm0\n\t"
- "packuswb 56(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 64(%1),%%mm0\n\t"
- "packuswb 72(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 80(%1),%%mm0\n\t"
- "packuswb 88(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 96(%1),%%mm0\n\t"
- "packuswb 104(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "addl %2,%0\n\t"
-
- "movq 112(%1),%%mm0\n\t"
- "packuswb 120(%1),%%mm0\n\t"
- "movq %%mm0,(%0)\n\t"
- "emms"
- :"+r" (p_data): "r" (p_block),"r" (i_incr+8));