video_chroma/swscale.c: Fixup commit eae2f440af1d7e79019adc4da5f871d0592d9f91

[vlc] / modules / video_filter / deinterlace / algo_phosphor.c
diff --git a/modules/video_filter/deinterlace/algo_phosphor.c b/modules/video_filter/deinterlace/algo_phosphor.c

index 0d9aa74080c266c0a39cb8b7611c2f05bda141ce..30f5e4bbc17b684dbc05dd7b67394e6646369d06 100644 (file)
--- a/modules/video_filter/deinterlace/algo_phosphor.c
+++ b/modules/video_filter/deinterlace/algo_phosphor.c
@@ -1,24 +1,24 @@
  /*****************************************************************************
   * algo_phosphor.c : Phosphor algorithm for the VLC deinterlacer
   *****************************************************************************
- * Copyright (C) 2011 the VideoLAN team
+ * Copyright (C) 2011 VLC authors and VideoLAN
   * $Id$
   *
   * Author: Juha Jeronen <juha.jeronen@jyu.fi>
   *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation; either version 2.1 of the License, or
   * (at your option) any later version.
   *
   * This program is distributed in the hope that it will be useful,
   * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
   *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
   *****************************************************************************/
  
  #ifdef HAVE_CONFIG_H
@@ -68,8 +68,9 @@
   * @see RenderPhosphor()
   * @see ComposeFrame()
   */
-static void DarkenField( picture_t *p_dst, const int i_field,
-                                           const int i_strength )
+static void DarkenField( picture_t *p_dst,
+                         const int i_field, const int i_strength,
+                         bool process_chroma )
  {
      assert( p_dst != NULL );
      assert( i_field == 0 || i_field == 1 );
@@ -77,11 +78,6 @@ static void DarkenField( picture_t *p_dst, const int i_field,
  
      /* Bitwise ANDing with this clears the i_strength highest bits
         of each byte */
-#ifdef CAN_COMPILE_MMXEXT
-    unsigned u_cpu = vlc_CPU();
-    uint64_t i_strength_u64 = i_strength; /* for MMX version (needs to know
-                                             number of bits) */
-#endif
      const uint8_t  remove_high_u8 = 0xFF >> i_strength;
      const uint64_t remove_high_u64 = remove_high_u8 *
                                              INT64_C(0x0101010101010101);
@@ -91,7 +87,7 @@ static void DarkenField( picture_t *p_dst, const int i_field,
         For luma, the operation is just a shift + bitwise AND, so we vectorize
         even in the C version.
  
-       There is an MMX version, too, because it performs about twice faster.
+       There is an MMX version too, because it performs about twice faster.
      */
      int i_plane = Y_PLANE;
      uint8_t *p_out, *p_out_end;
@@ -111,29 +107,91 @@ static void DarkenField( picture_t *p_dst, const int i_field,
          uint64_t *po = (uint64_t *)p_out;
          int x = 0;
  
-#ifdef CAN_COMPILE_MMXEXT
-        if( u_cpu & CPU_CAPABILITY_MMXEXT )
+        for( ; x < w8; x += 8, ++po )
+            (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
+
+        /* handle the width remainder */
+        uint8_t *po_temp = (uint8_t *)po;
+        for( ; x < w; ++x, ++po_temp )
+            (*po_temp) = ( ((*po_temp) >> i_strength) & remove_high_u8 );
+    }
+
+    /* Process chroma if the field chromas are independent.
+
+       The origin (black) is at YUV = (0, 128, 128) in the uint8 format.
+       The chroma processing is a bit more complicated than luma,
+       and needs MMX for vectorization.
+    */
+    if( process_chroma )
+    {
+        for( i_plane++ /* luma already handled*/;
+             i_plane < p_dst->i_planes;
+             i_plane++ )
          {
-            movq_m2r( i_strength_u64,  mm1 );
-            movq_m2r( remove_high_u64, mm2 );
-            for( ; x < w8; x += 8 )
+            int w = p_dst->p[i_plane].i_visible_pitch;
+            p_out = p_dst->p[i_plane].p_pixels;
+            p_out_end = p_out + p_dst->p[i_plane].i_pitch
+                              * p_dst->p[i_plane].i_visible_lines;
+
+            /* skip first line for bottom field */
+            if( i_field == 1 )
+                p_out += p_dst->p[i_plane].i_pitch;
+
+            for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
              {
-                movq_m2r( (*po), mm0 );
+                /* Handle the width remainder */
+                uint8_t *po = p_out;
+                for( int x = 0; x < w; ++x, ++po )
+                    (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
+            } /* for p_out... */
+        } /* for i_plane... */
+    } /* if process_chroma */
+}
+
+#ifdef CAN_COMPILE_MMXEXT
+VLC_MMX
+static void DarkenFieldMMX( picture_t *p_dst,
+                            const int i_field, const int i_strength,
+                            bool process_chroma )
+{
+    assert( p_dst != NULL );
+    assert( i_field == 0 || i_field == 1 );
+    assert( i_strength >= 1 && i_strength <= 3 );
  
-                psrlq_r2r( mm1, mm0 );
-                pand_r2r(  mm2, mm0 );
+    uint64_t i_strength_u64 = i_strength; /* needs to know number of bits */
+    const uint8_t  remove_high_u8 = 0xFF >> i_strength;
+    const uint64_t remove_high_u64 = remove_high_u8 *
+                                            INT64_C(0x0101010101010101);
  
-                movq_r2m( mm0, (*po++) );
-            }
-        }
-        else
+    int i_plane = Y_PLANE;
+    uint8_t *p_out, *p_out_end;
+    int w = p_dst->p[i_plane].i_visible_pitch;
+    p_out = p_dst->p[i_plane].p_pixels;
+    p_out_end = p_out + p_dst->p[i_plane].i_pitch
+                      * p_dst->p[i_plane].i_visible_lines;
+
+    /* skip first line for bottom field */
+    if( i_field == 1 )
+        p_out += p_dst->p[i_plane].i_pitch;
+
+    int wm8 = w % 8;   /* remainder */
+    int w8  = w - wm8; /* part of width that is divisible by 8 */
+    for( ; p_out < p_out_end ; p_out += 2*p_dst->p[i_plane].i_pitch )
+    {
+        uint64_t *po = (uint64_t *)p_out;
+        int x = 0;
+
+        movq_m2r( i_strength_u64,  mm1 );
+        movq_m2r( remove_high_u64, mm2 );
+        for( ; x < w8; x += 8 )
          {
-#endif
-            for( ; x < w8; x += 8, ++po )
-                (*po) = ( ((*po) >> i_strength) & remove_high_u64 );
-#ifdef CAN_COMPILE_MMXEXT
+            movq_m2r( (*po), mm0 );
+
+            psrlq_r2r( mm1, mm0 );
+            pand_r2r(  mm2, mm0 );
+
+            movq_r2m( mm0, (*po++) );
          }
-#endif
  
          /* handle the width remainder */
          uint8_t *po_temp = (uint8_t *)po;
@@ -147,19 +205,16 @@ static void DarkenField( picture_t *p_dst, const int i_field,
         The chroma processing is a bit more complicated than luma,
         and needs MMX for vectorization.
      */
-    if( p_dst->format.i_chroma == VLC_CODEC_I422  ||
-        p_dst->format.i_chroma == VLC_CODEC_J422 )
+    if( process_chroma )
      {
-        for( i_plane = 0 ; i_plane < p_dst->i_planes ; i_plane++ )
+        for( i_plane++ /* luma already handled */;
+             i_plane < p_dst->i_planes;
+             i_plane++ )
          {
-            if( i_plane == Y_PLANE )
-                continue; /* luma already handled */
-
              int w = p_dst->p[i_plane].i_visible_pitch;
-#ifdef CAN_COMPILE_MMXEXT
              int wm8 = w % 8;   /* remainder */
              int w8  = w - wm8; /* part of width that is divisible by 8 */
-#endif
+
              p_out = p_dst->p[i_plane].p_pixels;
              p_out_end = p_out + p_dst->p[i_plane].i_pitch
                                * p_dst->p[i_plane].i_visible_lines;
@@ -172,54 +227,46 @@ static void DarkenField( picture_t *p_dst, const int i_field,
              {
                  int x = 0;
  
-#ifdef CAN_COMPILE_MMXEXT
                  /* See also easy-to-read C version below. */
-                if( u_cpu & CPU_CAPABILITY_MMXEXT )
+                static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
+                movq_m2r( b128, mm5 );
+                movq_m2r( i_strength_u64,  mm6 );
+                movq_m2r( remove_high_u64, mm7 );
+
+                uint64_t *po8 = (uint64_t *)p_out;
+                for( ; x < w8; x += 8 )
                  {
-                    static const mmx_t b128 = { .uq = 0x8080808080808080ULL };
-                    movq_m2r( b128, mm5 );
-                    movq_m2r( i_strength_u64,  mm6 );
-                    movq_m2r( remove_high_u64, mm7 );
-
-                    uint64_t *po = (uint64_t *)p_out;
-                    for( ; x < w8; x += 8 )
-                    {
-                        movq_m2r( (*po), mm0 );
-
-                        movq_r2r( mm5, mm2 ); /* 128 */
-                        movq_r2r( mm0, mm1 ); /* copy of data */
-                        psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
-                        psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
-
-                        /* >> i_strength */
-                        psrlq_r2r( mm6, mm1 );
-                        psrlq_r2r( mm6, mm2 );
-                        pand_r2r(  mm7, mm1 );
-                        pand_r2r(  mm7, mm2 );
-
-                        /* collect results from pos./neg. parts */
-                        psubb_r2r( mm2, mm1 );
-                        paddb_r2r( mm5, mm1 );
-
-                        movq_r2m( mm1, (*po++) );
-                    }
+                    movq_m2r( (*po8), mm0 );
+
+                    movq_r2r( mm5, mm2 ); /* 128 */
+                    movq_r2r( mm0, mm1 ); /* copy of data */
+                    psubusb_r2r( mm2, mm1 ); /* mm1 = max(data - 128, 0) */
+                    psubusb_r2r( mm0, mm2 ); /* mm2 = max(128 - data, 0) */
+
+                    /* >> i_strength */
+                    psrlq_r2r( mm6, mm1 );
+                    psrlq_r2r( mm6, mm2 );
+                    pand_r2r(  mm7, mm1 );
+                    pand_r2r(  mm7, mm2 );
+
+                    /* collect results from pos./neg. parts */
+                    psubb_r2r( mm2, mm1 );
+                    paddb_r2r( mm5, mm1 );
+
+                    movq_r2m( mm1, (*po8++) );
                  }
-#endif
  
-                /* C version - handle the width remainder
-                   (or everything if no MMX) */
+                /* C version - handle the width remainder */
                  uint8_t *po = p_out;
                  for( ; x < w; ++x, ++po )
                      (*po) = 128 + ( ((*po) - 128) / (1 << i_strength) );
              } /* for p_out... */
          } /* for i_plane... */
-    } /* if b_i422 */
+    } /* if process_chroma */
  
-#ifdef CAN_COMPILE_MMXEXT
-    if( u_cpu & CPU_CAPABILITY_MMXEXT )
-        emms();
-#endif
+    emms();
  }
+#endif
  
  /*****************************************************************************
   * Public functions
@@ -265,9 +312,13 @@ int RenderPhosphor( filter_t *p_filter,
              p_in_top = p_old;
      }
  
-    compose_chroma_t cc = CC_ALTLINE; /* initialize to prevent compiler warning */
-    switch( p_sys->phosphor.i_chroma_for_420 )
+    compose_chroma_t cc = CC_ALTLINE;
+    if( 2 * p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+        2 * p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den )
      {
+        /* Only 420 like chroma */
+        switch( p_sys->phosphor.i_chroma_for_420 )
+        {
          case PC_BLEND:
              cc = CC_MERGE;
              break;
@@ -287,9 +338,9 @@ int RenderPhosphor( filter_t *p_filter,
              /* The above are the only possibilities, if there are no bugs. */
              assert(0);
              break;
+        }
      }
-
-    ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc );
+    ComposeFrame( p_filter, p_dst, p_in_top, p_in_bottom, cc, p_filter->fmt_in.video.i_chroma == VLC_CODEC_YV12 );
  
      /* Simulate phosphor light output decay for the old field.
  
@@ -301,7 +352,17 @@ int RenderPhosphor( filter_t *p_filter,
         In most use cases the dimmer is used.
      */
      if( p_sys->phosphor.i_dimmer_strength > 0 )
-        DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength );
-
+    {
+#ifdef CAN_COMPILE_MMXEXT
+        if( vlc_CPU_MMXEXT() )
+            DarkenFieldMMX( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
+                p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+                p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
+        else
+#endif
+            DarkenField( p_dst, !i_field, p_sys->phosphor.i_dimmer_strength,
+                p_sys->chroma->p[1].h.num == p_sys->chroma->p[1].h.den &&
+                p_sys->chroma->p[2].h.num == p_sys->chroma->p[2].h.den );
+    }
      return VLC_SUCCESS;
  }