]> git.sesse.net Git - x264/commitdiff
aarch64: x264_deblock_h_chroma_422_neon
authorJanne Grunau <janne-x264@jannau.net>
Mon, 13 Oct 2014 15:29:22 +0000 (17:29 +0200)
committerAnton Mitrofanov <BugMaster@narod.ru>
Tue, 16 Dec 2014 17:40:04 +0000 (20:40 +0300)
deblock_h_chroma_422 2.5 times faster

common/aarch64/deblock-a.S
common/deblock.c

index 9618665df71767d697499b42d6726778f284b6ad..f16d42b7d7e3c5de87747b38faace4043bd06cb7 100644 (file)
@@ -247,6 +247,7 @@ function x264_deblock_h_chroma_neon, export=1
     h264_loop_filter_start
 
     sub             x0,  x0,  #4
+deblock_h_chroma:
     ld1             {v18.d}[0], [x0], x1
     ld1             {v16.d}[0], [x0], x1
     ld1             {v0.d}[0],  [x0], x1
@@ -275,6 +276,18 @@ function x264_deblock_h_chroma_neon, export=1
     ret
 endfunc
 
+function x264_deblock_h_chroma_422_neon, export=1
+    add             x5,  x0,  x1
+    add             x1,  x1,  x1
+    mov             x7,  x30
+    bl              X(x264_deblock_h_chroma_neon)
+    ldr             w6,  [x4]
+    mov             x30, x7
+    sub             x0,  x5,  #4
+    mov             v24.s[0], w6
+    b               deblock_h_chroma
+endfunc
+
 .macro h264_loop_filter_chroma8
     dup             v22.8b,  w2                 // alpha
     uxtl            v24.8h,  v24.8b
index b0b8d2b625426845e8686e9673d912a7f6271681..84201c6b68a6aa6c525ce06f4d179d3591fa925f 100644 (file)
@@ -738,6 +738,7 @@ void x264_deblock_strength_neon( uint8_t nnz[X264_SCAN8_SIZE], int8_t ref[2][X26
                                  int16_t mv[2][X264_SCAN8_LUMA_SIZE][2], uint8_t bs[2][8][4],
                                  int mvy_limit, int bframe );
 #if ARCH_AARCH64
+void x264_deblock_h_chroma_422_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_h_chroma_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta, int8_t *tc0 );
 void x264_deblock_h_chroma_intra_mbaff_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
 void x264_deblock_h_chroma_intra_neon( uint8_t *pix, intptr_t stride, int alpha, int beta );
@@ -856,6 +857,7 @@ void x264_deblock_init( int cpu, x264_deblock_function_t *pf, int b_mbaff )
         pf->deblock_chroma_420_mbaff = x264_deblock_h_chroma_mbaff_neon;
         pf->deblock_chroma_420_intra_mbaff = x264_deblock_h_chroma_intra_mbaff_neon;
         pf->deblock_h_chroma_420_intra = x264_deblock_h_chroma_intra_neon;
+        pf->deblock_h_chroma_422 = x264_deblock_h_chroma_422_neon;
         pf->deblock_h_chroma_422_intra = x264_deblock_h_chroma_422_intra_neon;
         pf->deblock_chroma_intra[1] = x264_deblock_v_chroma_intra_neon;
 #endif