From fe175c116ab65ff84b7e8b92bf3a99ba58303acc Mon Sep 17 00:00:00 2001 From: Brieuc Jeunhomme Date: Wed, 14 Jun 2000 21:37:36 +0000 Subject: [PATCH] =?utf8?q?r=EF=BF=BDparation=20de=20mes=20betises=20sur=20?= =?utf8?q?la=20yuv=20(d=EF=BF=BDsol=EF=BF=BD=20tm),=20mais=20je=20remets?= =?utf8?q?=20ca=20bientot.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit --- src/video_output/video_yuv.c | 242 +++++------------------------------ 1 file changed, 34 insertions(+), 208 deletions(-) diff --git a/src/video_output/video_yuv.c b/src/video_output/video_yuv.c index 9f19569f8a..7e6f782220 100644 --- a/src/video_output/video_yuv.c +++ b/src/video_output/video_yuv.c @@ -12,15 +12,16 @@ * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. - * + * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111, USA. + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 02111-1307, USA. *****************************************************************************/ /***************************************************************************** @@ -72,26 +73,6 @@ #define V_RED_COEF ((int)(1.596 * (1<> 4) << 7) \ + ((*p_u + dither20[i_real_y]) >> 5) * 9 \ + ((*p_v + dither20[i_real_y]) >> 5) ]; \ - b_jump_uv += *p_offset; \ + b_jump_uv = (b_jump_uv + *p_offset) & 0x1; \ p_y += *p_offset; \ p_u += *p_offset & b_jump_uv; \ p_v += *p_offset++ & b_jump_uv; \ @@ -208,15 +189,15 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data (((*p_y + dither11[i_real_y]) >> 4) << 7) \ + ((*p_u + dither21[i_real_y]) >> 5) * 9 \ + ((*p_v + dither21[i_real_y]) >> 5) ]; \ - b_jump_uv += *p_offset; \ + b_jump_uv = (b_jump_uv + *p_offset) & 0x1; \ p_y += *p_offset; \ p_u += *p_offset & b_jump_uv; \ p_v += *p_offset++ & b_jump_uv; \ *p_pic++ = p_lookup[ \ (((*p_y + dither12[i_real_y]) >> 4) << 7) \ + ((*p_u + dither22[i_real_y]) >> 5) * 9 \ - + ((*p_v + dither22[i_real_y]) >> 5) ]; \ - b_jump_uv += *p_offset; \ + + ((*p_v + dither22[i_real_y]) >> 5) ]; \ + b_jump_uv = (b_jump_uv + *p_offset) & 0x1; \ p_y += *p_offset; \ p_u += *p_offset & b_jump_uv; \ p_v += *p_offset++ & b_jump_uv; \ @@ -224,7 +205,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data (((*p_y + dither13[i_real_y]) >> 4) << 7) \ + ((*p_u + dither23[i_real_y]) >> 5) * 9 \ + ((*p_v + dither23[i_real_y]) >> 5) ]; \ - b_jump_uv += *p_offset; \ + b_jump_uv = (b_jump_uv + *p_offset) & 0x1; \ p_y += *p_offset; \ p_u += *p_offset & b_jump_uv; \ p_v += *p_offset++ & b_jump_uv; \ @@ -312,13 +293,12 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data * and 4 Bpp. *****************************************************************************/ #define SCALE_HEIGHT( CHROMA, BPP ) \ - \ /* If line is odd, rewind 4:2:0 U and V samples */ \ - /*if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) ) */\ - /*{ */\ - /* p_u -= i_chroma_width; */\ - /* p_v -= i_chroma_width; */\ - /*} */\ + if( ((CHROMA == 420) || (CHROMA == 422)) && !(i_y & 0x1) ) \ + { \ + p_u -= i_chroma_width; \ + p_v -= i_chroma_width; \ + } \ \ /* \ * Handle vertical scaling. The current line can be copied or next one \ @@ -327,70 +307,24 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data switch( i_vertical_scaling ) \ { \ case -1: /* vertical scaling factor is < 1 */ \ - if( i_y & 0x1 ) \ + while( (i_scale_count -= i_pic_height) >= 0 ) \ { \ - while( (i_scale_count -= i_pic_height) >= 0 ) \ - { \ /* Height reduction: skip next source line */ \ - p_y += i_width; \ - if( (CHROMA == 420) || (CHROMA == 422) ) \ - { \ - if( (i_scale_count -= i_pic_height) >= 0 ) \ - { \ - p_y += i_width; \ - i_y += 2; \ - p_u += i_chroma_width; \ - p_v += i_chroma_width; \ - continue; \ - } \ - else \ - { \ - i_y++; \ - break; \ - } \ - } \ - else if( CHROMA == 444 ) \ - { \ - i_y++; \ - p_u += i_width; \ - p_v += i_width; \ - } \ - } \ - } \ - else \ - { \ - if( CHROMA == 420 || CHROMA == 422 ) \ - { \ - p_u -= i_chroma_width; \ - p_v -= i_chroma_width; \ - } \ - while( (i_scale_count -= i_pic_height) >= 0 ) \ + p_y += i_width; \ + i_y++; \ + if( (CHROMA == 420) || (CHROMA == 422) ) \ { \ - /* Height reduction: skip next source line */ \ - p_y += i_width; \ - if( (CHROMA == 420) || (CHROMA == 422) ) \ + if( i_y & 0x1 ) \ { \ p_u += i_chroma_width; \ p_v += i_chroma_width; \ - if( (i_scale_count -= i_pic_height) >= 0 ) \ - { \ - p_y += i_width; \ - i_y+=2; \ - continue; \ - } \ - else \ - { \ - i_y++; \ - break; \ - } \ - } \ - else if( CHROMA == 444 ) \ - { \ - i_y++; \ - p_u += i_width; \ - p_v += i_width; \ } \ } \ + else if( CHROMA == 444 ) \ + { \ + p_u += i_width; \ + p_v += i_width; \ + } \ } \ i_scale_count += i_height; \ break; \ @@ -398,7 +332,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data while( (i_scale_count -= i_height) > 0 ) \ { \ /* Height increment: copy previous picture line */ \ - for( i_x = i_pic_width >> 4; i_x--; ) \ + for( i_x = i_pic_width / 16; i_x--; ) \ { \ *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \ *(((u64 *) p_pic)++) = *(((u64 *) p_pic_start)++ ); \ @@ -444,6 +378,7 @@ static void ConvertYUV444RGB32( p_vout_thread_t p_vout, u32 *p_pic, yuv_data * Handle vertical scaling. The current line can be copied or next one \ * can be ignored. \ */ \ + \ switch( i_vertical_scaling ) \ { \ case -1: /* vertical scaling factor is < 1 */ \ @@ -1168,16 +1103,15 @@ static void ConvertYUV420RGB8( p_vout_thread_t p_vout, u8 *p_pic, yuv_data_t *p_ int dither22[4] = { 0x6, 0x16, 0x2, 0x12 }; int dither23[4] = { 0x1e, 0xe, 0x1a, 0xa }; - #if 0 - /* other matrices that can be interesting, either for debugging or for - * various effects */ + /* other matrices that can be interesting, either for debugging or for effects */ +#if 0 int dither[4][4] = { { 0, 8, 2, 10 }, { 12, 4, 14, 16 }, { 3, 11, 1, 9}, {15, 7, 13, 5} }; int dither[4][4] = { { 7, 8, 0, 15 }, { 0, 15, 8, 7 }, { 7, 0, 15, 8 }, { 15, 7, 8, 0 } }; int dither[4][4] = { { 0, 15, 0, 15 }, { 15, 0, 15, 0 }, { 0, 15, 0, 15 }, { 15, 0, 15, 0 } }; int dither[4][4] = { { 15, 15, 0, 0 }, { 15, 15, 0, 0 }, { 0, 0, 15, 15 }, { 0, 0, 15, 15 } }; int dither[4][4] = { { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 }, { 8, 8, 8, 8 } }; int dither[4][4] = { { 0, 1, 2, 3 }, { 4, 5, 6, 7 }, { 8, 9, 10, 11 }, { 12, 13, 14, 15 } }; - #endif +#endif /* * Initialize some values - i_pic_line_width will store the line skip @@ -1352,15 +1286,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t * int i_vertical_scaling; /* vertical scaling type */ int i_x, i_y; /* horizontal and vertical indexes */ int i_scale_count; /* scale modulo counter */ -#ifndef HAVE_MMX int i_uval, i_vval; /* U and V samples */ int i_red, i_green, i_blue; /* U and V modified samples */ -#endif int i_chroma_width; /* chroma width */ u16 * p_yuv; /* base conversion table */ -#ifndef HAVE_MMX u16 * p_ybase; /* Y dependant conversion table */ -#endif u16 * p_pic_start; /* beginning of the current line for copy */ u16 * p_buffer_start; /* conversion buffer start */ u16 * p_buffer; /* conversion buffer pointer */ @@ -1389,9 +1319,6 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t * p_pic_start = p_pic; p_buffer = b_horizontal_scaling ? p_buffer_start : p_pic; - -#ifndef HAVE_MMX - /* Do YUV conversion to buffer - YUV picture is always formed of 16 * pixels wide blocks */ for( i_x = i_width / 16; i_x--; ) @@ -1405,112 +1332,11 @@ static void ConvertYUV420RGB16( p_vout_thread_t p_vout, u16 *p_pic, yuv_data_t * CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2); CONVERT_YUV_PIXEL(2); CONVERT_Y_PIXEL(2); } + + /* Do horizontal and vertical scaling */ SCALE_WIDTH; SCALE_HEIGHT(420, 2); } - -#else - for ( i_x = i_width / 8; i_x--; ) - { - __asm__ ( - "movd (%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0\n\t" - "movd (%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0\n\t" - "pxor %%mm4, %%mm4 # zero mm4\n\t" - "movq (%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t" - //"movl $0, (%3) # cache preload for image\n\t" - : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer)); - - __asm__ ( - ".align 8 \n\t" - /* Do the multiply part of the conversion for even and odd pixels, - * register usage: - * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, - * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels, - * mm6 -> Y even, mm7 -> Y odd */ - /* convert the chroma part */ - "punpcklbw %%mm4, %%mm0 # scatter 4 Cb 00 u3 00 u2 00 u1 00 u0\n\t" - "punpcklbw %%mm4, %%mm1 # scatter 4 Cr 00 v3 00 v2 00 v1 00 v0\n\t" - "psubsw mmx_80w, %%mm0 # Cb -= 128\n\t" - "psubsw mmx_80w, %%mm1 # Cr -= 128\n\t" - "psllw $3, %%mm0 # Promote precision\n\t" - "psllw $3, %%mm1 # Promote precision\n\t" - "movq %%mm0, %%mm2 # Copy 4 Cb 00 u3 00 u2 00 u1 00 u0\n\t" - "movq %%mm1, %%mm3 # Copy 4 Cr 00 v3 00 v2 00 v1 00 v0\n\t" - "pmulhw mmx_U_green, %%mm2# Mul Cb with green coeff -> Cb green\n\t" - "pmulhw mmx_V_green, %%mm3# Mul Cr with green coeff -> Cr green\n\t" - "pmulhw mmx_U_blue, %%mm0 # Mul Cb -> Cblue 00 b3 00 b2 00 b1 00 b0\n\t" - "pmulhw mmx_V_red, %%mm1 # Mul Cr -> Cred 00 r3 00 r2 00 r1 00 r0\n\t" - "paddsw %%mm3, %%mm2 # Cb green + Cr green -> Cgreen\n\t" - /* convert the luma part */ - "psubusb mmx_10w, %%mm6 # Y -= 16\n\t" - "movq %%mm6, %%mm7 # Copy 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t" - "pand mmx_00ffw, %%mm6 # get Y even 00 Y6 00 Y4 00 Y2 00 Y0\n\t" - "psrlw $8, %%mm7 # get Y odd 00 Y7 00 Y5 00 Y3 00 Y1\n\t" - "psllw $3, %%mm6 # Promote precision\n\t" - "psllw $3, %%mm7 # Promote precision\n\t" - "pmulhw mmx_Y_coeff, %%mm6# Mul 4 Y even 00 y6 00 y4 00 y2 00 y0\n\t" - "pmulhw mmx_Y_coeff, %%mm7# Mul 4 Y odd 00 y7 00 y5 00 y3 00 y1\n\t" - /* Do the addition part of the conversion for even and odd pixels, - * register usage: - * mm0 -> Cblue, mm1 -> Cred, mm2 -> Cgreen even pixels, - * mm3 -> Cblue, mm4 -> Cred, mm5 -> Cgreen odd pixels, - * mm6 -> Y even, mm7 -> Y odd */ /* Do horizontal and vertical scaling */ - "movq %%mm0, %%mm3 # Copy Cblue\n\t" - "movq %%mm1, %%mm4 # Copy Cred\n\t" - "movq %%mm2, %%mm5 # Copy Cgreen\n\t" - "paddsw %%mm6, %%mm0 # Y even + Cblue 00 B6 00 B4 00 B2 00 B0\n\t" - "paddsw %%mm7, %%mm3 # Y odd + Cblue 00 B7 00 B5 00 B3 00 B1\n\t" - "paddsw %%mm6, %%mm1 # Y even + Cred 00 R6 00 R4 00 R2 00 R0\n\t" - "paddsw %%mm7, %%mm4 # Y odd + Cred 00 R7 00 R5 00 R3 00 R1\n\t" - "paddsw %%mm6, %%mm2 # Y even + Cgreen 00 G6 00 G4 00 G2 00 G0\n\t" - "paddsw %%mm7, %%mm5 # Y odd + Cgreen 00 G7 00 G5 00 G3 00 G1\n\t" - /* Limit RGB even to 0..255 */ - "packuswb %%mm0, %%mm0 # B6 B4 B2 B0 | B6 B4 B2 B0\n\t" - "packuswb %%mm1, %%mm1 # R6 R4 R2 R0 | R6 R4 R2 R0\n\t" - "packuswb %%mm2, %%mm2 # G6 G4 G2 G0 | G6 G4 G2 G0\n\t" - /* Limit RGB odd to 0..255 */ - "packuswb %%mm3, %%mm3 # B7 B5 B3 B1 | B7 B5 B3 B1\n\t" - "packuswb %%mm4, %%mm4 # R7 R5 R3 R1 | R7 R5 R3 R1\n\t" - "packuswb %%mm5, %%mm5 # G7 G5 G3 G1 | G7 G5 G3 G1\n\t" - /* Interleave RGB even and odd */ - "punpcklbw %%mm3, %%mm0 # B7 B6 B5 B4 B3 B2 B1 B0\n\t" - "punpcklbw %%mm4, %%mm1 # R7 R6 R5 R4 R3 R2 R1 R0\n\t" - "punpcklbw %%mm5, %%mm2 # G7 G6 G5 G4 G3 G2 G1 G0\n\t" - /* mask unneeded bits off */ - "pand mmx_redmask, %%mm0# b7b6b5b4 b3_0_0_0 b7b6b5b4 b3_0_0_0\n\t" - "pand mmx_grnmask, %%mm2# g7g6g5g4 g3g2_0_0 g7g6g5g4 g3g2_0_0\n\t" - "pand mmx_redmask, %%mm1# r7r6r5r4 r3_0_0_0 r7r6r5r4 r3_0_0_0\n\t" - "psrlw mmx_blueshift,%%mm0#0_0_0_b7 b6b5b4b3 0_0_0_b7 b6b5b4b3\n\t" - "pxor %%mm4, %%mm4 # zero mm4\n\t" - "movq %%mm0, %%mm5 # Copy B7-B0\n\t" - "movq %%mm2, %%mm7 # Copy G7-G0\n\t" - /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ - "punpcklbw %%mm4, %%mm2 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0\n\t" - "punpcklbw %%mm1, %%mm0 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t" - "psllw mmx_blueshift,%%mm2# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0\n\t" - "por %%mm2, %%mm0 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t" - "movq 8(%0), %%mm6 # Load 8 Y Y7 Y6 Y5 Y4 Y3 Y2 Y1 Y0\n\t" - "movq %%mm0, (%3) # store pixel 0-3\n\t" - /* convert rgb24 plane to rgb16 pack for pixel 0-3 */ - "punpckhbw %%mm4, %%mm7 # 0_0_0_0 0_0_0_0 g7g6g5g4 g3g2_0_0\n\t" - "punpckhbw %%mm1, %%mm5 # r7r6r5r4 r3_0_0_0 0_0_0_b7 b6b5b4b3\n\t" - "psllw mmx_blueshift,%%mm7# 0_0_0_0 0_g7g6g5 g4g3g2_0 0_0_0_0\n\t" - "movd 4(%1), %%mm0 # Load 4 Cb 00 00 00 00 u3 u2 u1 u0\n\t" - "por %%mm7, %%mm5 # r7r6r5r4 r3g7g6g5 g4g3g2b7 b6b5b4b3\n\t" - "movd 4(%2), %%mm1 # Load 4 Cr 00 00 00 00 v3 v2 v1 v0\n\t" - "movq %%mm5, 8(%3) # store pixel 4-7\n\t" - : : "r" (p_y), "r" (p_u), "r" (p_v), "r" (p_buffer)); - p_y += 8; - p_u += 4; - p_v += 4; - p_buffer += 8; - } - - SCALE_WIDTH; - SCALE_HEIGHT(420, 2); - } - __asm__ ("emms\n\t"); -#endif } /***************************************************************************** -- 2.39.2