]> git.sesse.net Git - x264/commitdiff
arm: Implement x264_deblock_h_chroma_422_neon
authorMartin Storsjö <martin@martin.st>
Tue, 25 Aug 2015 11:38:15 +0000 (14:38 +0300)
committerHenrik Gramner <henrik@gramner.com>
Sun, 11 Oct 2015 16:44:54 +0000 (18:44 +0200)
checkasm timing       Cortex-A7      A8     A9
deblock_h_chroma_422_c       6953    6269   5145
deblock_h_chroma_422_neon    3905    2569   2551

common/arm/deblock-a.S
common/deblock.c

index 446e6780a16cf57194a393ba22b8965d762f2438..a300220f80c0c24a03e2eb99b779e0a390c4b2f5 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright (C) 2009-2015 x264 project
  *
  * Authors: Mans Rullgard <mans@mansr.com>
+ *          Martin Storsjo <martin@martin.st>
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -261,6 +262,7 @@ function x264_deblock_h_chroma_neon
     h264_loop_filter_start
 
     sub             r0,  r0,  #4
+deblock_h_chroma:
     vld1.8          {d18}, [r0], r1
     vld1.8          {d16}, [r0], r1
     vld1.8          {d0},  [r0], r1
@@ -290,6 +292,22 @@ function x264_deblock_h_chroma_neon
     bx              lr
 endfunc
 
+function x264_deblock_h_chroma_422_neon
+    h264_loop_filter_start
+    push            {lr}
+    sub             r0,  r0,  #4
+    add             r1,  r1,  r1
+    bl              deblock_h_chroma
+    ldr             ip,  [sp, #4]
+    ldr             ip,  [ip]
+    vdup.32         d24, ip
+    sub             r0,  r0,  r1, lsl #3
+    add             r0,  r0,  r1, lsr #1
+    sub             r0,  r0,  #2
+    pop             {lr}
+    b               deblock_h_chroma
+endfunc
+
 function x264_deblock_strength_neon
     ldr             ip,  [sp]
     vmov.i8         q8,  #0
index 374e293667968f861a33c40e46c8ffb2e88c4242..83bda62091f2ee9ca64dc9d82d8e48a75f545710 100644 (file)
@@ -739,8 +739,8 @@ void x264_deblock_h_chroma_neon( uint8_t *pix, intptr_t stride, int alpha, int b
 void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X264_SCAN8_LUMA_SIZE],
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
-#if ARCH_AARCH64
 void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
+#if ARCH_AARCH64
 void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
 void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
@@ -873,11 +873,11 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
         pf->deblock_luma[0] = x264_deblock_h_luma_neon;
         pf->deblock_chroma[1] = x264_deblock_v_chroma_neon;
         pf->deblock_h_chroma_420 = x264_deblock_h_chroma_neon;
+        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
 #if ARCH_AARCH64
         pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon;
         pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon;
         pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon;
-        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
         pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon;
         pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon;
         pf->deblock_luma_intra[0] = x264_deblock_h_luma_intra_neon;