]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/arm/hevcdsp_init_neon.c
Merge commit '899ee03088d55152a48830df0899887f055da1de'
[ffmpeg] / libavcodec / arm / hevcdsp_init_neon.c
index a4628d2a93ff11e423ab14f5dc33cdc29afba7f9..201a088dac05684e49a87d9ba96548fce0e3c03c 100644 (file)
 #include "libavutil/attributes.h"
 #include "libavutil/arm/cpu.h"
 #include "libavcodec/hevcdsp.h"
+#include "libavcodec/avcodec.h"
 #include "hevcdsp_arm.h"
 
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                  int16_t *sao_offset_val, int sao_left_class,
+                                  int width, int height);
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+                                  int eo, int width, int height);
+
 void ff_hevc_v_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 void ff_hevc_h_loop_filter_luma_neon(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
 void ff_hevc_v_loop_filter_chroma_neon(uint8_t *_pix, ptrdiff_t _stride, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
@@ -142,6 +150,47 @@ QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v2_neon_8);
 QPEL_FUNC_UW(ff_hevc_put_qpel_uw_h3v3_neon_8);
 #undef QPEL_FUNC_UW
 
+void ff_hevc_sao_band_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height, int16_t *offset_table);
+
+void ff_hevc_sao_band_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src,
+                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
+                                  int16_t *sao_offset_val, int sao_left_class,
+                                  int width, int height) {
+    uint8_t *dst = _dst;
+    uint8_t *src = _src;
+    int16_t offset_table[32] = {0};
+    int k;
+
+    for (k = 0; k < 4; k++) {
+        offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
+    }
+
+    ff_hevc_sao_band_filter_neon_8(dst, src, stride_dst, stride_src, width, height, offset_table);
+}
+
+void ff_hevc_sao_edge_filter_neon_8(uint8_t *dst, uint8_t *src, ptrdiff_t stride_dst, ptrdiff_t stride_src, int width, int height,
+                                    int a_stride, int b_stride, int16_t *sao_offset_val, uint8_t *edge_idx);
+
+void ff_hevc_sao_edge_filter_neon_8_wrapper(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
+                                  int eo, int width, int height) {
+    static uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
+    static const int8_t pos[4][2][2] = {
+        { { -1,  0 }, {  1, 0 } }, // horizontal
+        { {  0, -1 }, {  0, 1 } }, // vertical
+        { { -1, -1 }, {  1, 1 } }, // 45 degree
+        { {  1, -1 }, { -1, 1 } }, // 135 degree
+    };
+    uint8_t *dst = _dst;
+    uint8_t *src = _src;
+    int a_stride, b_stride;
+    ptrdiff_t stride_src = (2*MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE);
+
+    a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
+    b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
+
+    ff_hevc_sao_edge_filter_neon_8(dst, src, stride_dst, stride_src, width, height, a_stride, b_stride, sao_offset_val, edge_idx);
+}
+
 void ff_hevc_put_qpel_neon_wrapper(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
                                    int height, intptr_t mx, intptr_t my, int width) {
 
@@ -168,6 +217,16 @@ av_cold void ff_hevc_dsp_init_neon(HEVCDSPContext *c, const int bit_depth)
         c->hevc_h_loop_filter_luma     = ff_hevc_h_loop_filter_luma_neon;
         c->hevc_v_loop_filter_chroma   = ff_hevc_v_loop_filter_chroma_neon;
         c->hevc_h_loop_filter_chroma   = ff_hevc_h_loop_filter_chroma_neon;
+        c->sao_band_filter[0]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[1]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[2]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[3]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_band_filter[4]          = ff_hevc_sao_band_filter_neon_8_wrapper;
+        c->sao_edge_filter[0]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[1]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[2]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[3]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
+        c->sao_edge_filter[4]          = ff_hevc_sao_edge_filter_neon_8_wrapper;
         c->add_residual[0]             = ff_hevc_add_residual_4x4_8_neon;
         c->add_residual[1]             = ff_hevc_add_residual_8x8_8_neon;
         c->add_residual[2]             = ff_hevc_add_residual_16x16_8_neon;