]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/h264pred.c
xsubdec: Convert to the new bitstream reader
[ffmpeg] / libavcodec / h264pred.c
index b3701ef3b82bb66b0cc402fbada6ece4ec2e355f..7627eb076d7f8e1511bd98deacad383c18cc37f0 100644 (file)
 
 /**
  * @file
- * H.264 / AVC / MPEG4 part10 prediction functions.
+ * H.264 / AVC / MPEG-4 part10 prediction functions.
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
+#include "libavutil/attributes.h"
+#include "libavutil/intreadwrite.h"
+#include "avcodec.h"
 #include "h264pred.h"
 
 #define BIT_DEPTH 8
 #include "h264pred_template.c"
 #undef BIT_DEPTH
 
-static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
+static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright,
+                                   ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     uint32_t v = PACK_4U8((lt + 2*t0 + t1 + 2) >> 2,
@@ -54,8 +59,10 @@ static void pred4x4_vertical_vp8_c(uint8_t *src, const uint8_t *topright, int st
     AV_WN32A(src+3*stride, v);
 }
 
-static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
-    const int lt= src[-1-1*stride];
+static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
+    const unsigned lt = src[-1-1*stride];
     LOAD_LEFT_EDGE
 
     AV_WN32A(src+0*stride, ((lt + 2*l0 + l1 + 2) >> 2)*0x01010101);
@@ -64,11 +71,11 @@ static void pred4x4_horizontal_vp8_c(uint8_t *src, const uint8_t *topright, int
     AV_WN32A(src+3*stride, ((l2 + 2*l3 + l3 + 2) >> 2)*0x01010101);
 }
 
-static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
-    const av_unused int unu0= t0;
-    const av_unused int unu1= l0;
 
     src[0+0*stride]=(l1 + t1)>>1;
     src[1+0*stride]=
@@ -88,7 +95,9 @@ static void pred4x4_down_left_svq3_c(uint8_t *src, const uint8_t *topright, int
     src[3+3*stride]=(l3 + t3)>>1;
 }
 
-static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                     ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     LOAD_LEFT_EDGE
@@ -112,7 +121,10 @@ static void pred4x4_down_left_rv40_c(uint8_t *src, const uint8_t *topright, int
     src[3+3*stride]=(t6 + t7 + 1 + l6 + l7 + 1)>>2;
 }
 
-static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_down_left_rv40_nodown_c(uint8_t *src,
+                                            const uint8_t *topright,
+                                            ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
     LOAD_LEFT_EDGE
@@ -135,8 +147,11 @@ static void pred4x4_down_left_rv40_nodown_c(uint8_t *src, const uint8_t *toprigh
     src[3+3*stride]=(t6 + t7 + 1 + 2*l3 + 1)>>2;
 }
 
-static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, int stride,
-                                       const int l0, const int l1, const int l2, const int l3, const int l4){
+static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright,
+                                       ptrdiff_t stride,
+                                       const int l0, const int l1, const int l2,
+                                       const int l3, const int l4)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -158,20 +173,27 @@ static void pred4x4_vertical_left_rv40(uint8_t *src, const uint8_t *topright, in
     src[3+3*stride]=(t4 + 2*t5 + t6 + 2)>>2;
 }
 
-static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_DOWN_LEFT_EDGE
 
     pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l4);
 }
 
-static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
 
     pred4x4_vertical_left_rv40(src, topright, stride, l0, l1, l2, l3, l3);
 }
 
-static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright,
+                                        ptrdiff_t stride)
+{
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
 
@@ -193,7 +215,9 @@ static void pred4x4_vertical_left_vp8_c(uint8_t *src, const uint8_t *topright, i
     src[3+3*stride]=(t5 + 2*t6 + t7 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
+                                         ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_DOWN_LEFT_EDGE
     LOAD_TOP_EDGE
@@ -217,7 +241,10 @@ static void pred4x4_horizontal_up_rv40_c(uint8_t *src, const uint8_t *topright,
     src[3+3*stride]=(l4 + 2*l5 + l6 + 2)>>2;
 }
 
-static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *topright, int stride){
+static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src,
+                                                const uint8_t *topright,
+                                                ptrdiff_t stride)
+{
     LOAD_LEFT_EDGE
     LOAD_TOP_EDGE
     LOAD_TOP_RIGHT_EDGE
@@ -240,13 +267,15 @@ static void pred4x4_horizontal_up_rv40_nodown_c(uint8_t *src, const uint8_t *top
     src[3+3*stride]=l3;
 }
 
-static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright,
+                             ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
 
     for (y = 0; y < 4; y++) {
-        uint8_t *cm_in = cm + src[-1];
+        const uint8_t *cm_in = cm + src[-1];
         src[0] = cm_in[top[0]];
         src[1] = cm_in[top[1]];
         src[2] = cm_in[top[2]];
@@ -255,21 +284,24 @@ static void pred4x4_tm_vp8_c(uint8_t *src, const uint8_t *topright, int stride){
     }
 }
 
-static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
+static void pred16x16_plane_svq3_c(uint8_t *src, ptrdiff_t stride)
+{
     pred16x16_plane_compat_8_c(src, stride, 1, 0);
 }
 
-static void pred16x16_plane_rv40_c(uint8_t *src, int stride){
+static void pred16x16_plane_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     pred16x16_plane_compat_8_c(src, stride, 0, 1);
 }
 
-static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+static void pred16x16_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
 
     for (y = 0; y < 16; y++) {
-        uint8_t *cm_in = cm + src[-1];
+        const uint8_t *cm_in = cm + src[-1];
         src[0]  = cm_in[top[0]];
         src[1]  = cm_in[top[1]];
         src[2]  = cm_in[top[2]];
@@ -290,9 +322,10 @@ static void pred16x16_tm_vp8_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_left_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
-    int dc0;
+    unsigned dc0;
 
     dc0=0;
     for(i=0;i<8; i++)
@@ -305,9 +338,10 @@ static void pred8x8_left_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_top_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
-    int dc0;
+    unsigned dc0;
 
     dc0=0;
     for(i=0;i<8; i++)
@@ -320,9 +354,10 @@ static void pred8x8_top_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
+static void pred8x8_dc_rv40_c(uint8_t *src, ptrdiff_t stride)
+{
     int i;
-    int dc0=0;
+    unsigned dc0 = 0;
 
     for(i=0;i<4; i++){
         dc0+= src[-1+i*stride] + src[i-stride];
@@ -341,13 +376,14 @@ static void pred8x8_dc_rv40_c(uint8_t *src, int stride){
     }
 }
 
-static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
-    uint8_t *cm = ff_cropTbl + MAX_NEG_CROP - src[-1-stride];
+static void pred8x8_tm_vp8_c(uint8_t *src, ptrdiff_t stride)
+{
+    const uint8_t *cm = ff_crop_tab + MAX_NEG_CROP - src[-1-stride];
     uint8_t *top = src-stride;
     int y;
 
     for (y = 0; y < 8; y++) {
-        uint8_t *cm_in = cm + src[-1];
+        const uint8_t *cm_in = cm + src[-1];
         src[0] = cm_in[top[0]];
         src[1] = cm_in[top[1]];
         src[2] = cm_in[top[2]];
@@ -363,9 +399,10 @@ static void pred8x8_tm_vp8_c(uint8_t *src, int stride){
 /**
  * Set the intra prediction function pointers.
  */
-void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
-//    MpegEncContext * const s = &h->s;
-
+av_cold void ff_h264_pred_init(H264PredContext *h, int codec_id,
+                               const int bit_depth,
+                               const int chroma_format_idc)
+{
 #undef FUNC
 #undef FUNCC
 #define FUNC(a, depth) a ## _ ## depth
@@ -373,8 +410,8 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
 #define FUNCD(a) a ## _c
 
 #define H264_PRED(depth) \
-    if(codec_id != CODEC_ID_RV40){\
-        if(codec_id == CODEC_ID_VP8) {\
+    if(codec_id != AV_CODEC_ID_RV40){\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
             h->pred4x4[VERT_PRED       ]= FUNCD(pred4x4_vertical_vp8);\
             h->pred4x4[HOR_PRED        ]= FUNCD(pred4x4_horizontal_vp8);\
         } else {\
@@ -382,22 +419,21 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
             h->pred4x4[HOR_PRED        ]= FUNCC(pred4x4_horizontal        , depth);\
         }\
         h->pred4x4[DC_PRED             ]= FUNCC(pred4x4_dc                , depth);\
-        if(codec_id == CODEC_ID_SVQ3)\
+        if(codec_id == AV_CODEC_ID_SVQ3)\
             h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCD(pred4x4_down_left_svq3);\
         else\
             h->pred4x4[DIAG_DOWN_LEFT_PRED ]= FUNCC(pred4x4_down_left     , depth);\
         h->pred4x4[DIAG_DOWN_RIGHT_PRED]= FUNCC(pred4x4_down_right        , depth);\
         h->pred4x4[VERT_RIGHT_PRED     ]= FUNCC(pred4x4_vertical_right    , depth);\
         h->pred4x4[HOR_DOWN_PRED       ]= FUNCC(pred4x4_horizontal_down   , depth);\
-        if (codec_id == CODEC_ID_VP8) {\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
             h->pred4x4[VERT_LEFT_PRED  ]= FUNCD(pred4x4_vertical_left_vp8);\
         } else\
             h->pred4x4[VERT_LEFT_PRED  ]= FUNCC(pred4x4_vertical_left     , depth);\
         h->pred4x4[HOR_UP_PRED         ]= FUNCC(pred4x4_horizontal_up     , depth);\
-        if(codec_id != CODEC_ID_VP8) {\
+        if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\
             h->pred4x4[LEFT_DC_PRED    ]= FUNCC(pred4x4_left_dc           , depth);\
             h->pred4x4[TOP_DC_PRED     ]= FUNCC(pred4x4_top_dc            , depth);\
-            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
         } else {\
             h->pred4x4[TM_VP8_PRED     ]= FUNCD(pred4x4_tm_vp8);\
             h->pred4x4[DC_127_PRED     ]= FUNCC(pred4x4_127_dc            , depth);\
@@ -405,6 +441,8 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
             h->pred4x4[VERT_VP8_PRED   ]= FUNCC(pred4x4_vertical          , depth);\
             h->pred4x4[HOR_VP8_PRED    ]= FUNCC(pred4x4_horizontal        , depth);\
         }\
+        if (codec_id != AV_CODEC_ID_VP8)\
+            h->pred4x4[DC_128_PRED     ]= FUNCC(pred4x4_128_dc            , depth);\
     }else{\
         h->pred4x4[VERT_PRED           ]= FUNCC(pred4x4_vertical          , depth);\
         h->pred4x4[HOR_PRED            ]= FUNCC(pred4x4_horizontal        , depth);\
@@ -436,42 +474,67 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
     h->pred8x8l[TOP_DC_PRED         ]= FUNCC(pred8x8l_top_dc              , depth);\
     h->pred8x8l[DC_128_PRED         ]= FUNCC(pred8x8l_128_dc              , depth);\
 \
-    h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical                   , depth);\
-    h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal                 , depth);\
-    if (codec_id != CODEC_ID_VP8) {\
-        h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                    , depth);\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x8_vertical               , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x8_horizontal             , depth);\
+    } else {\
+        h->pred8x8[VERT_PRED8x8   ]= FUNCC(pred8x16_vertical              , depth);\
+        h->pred8x8[HOR_PRED8x8    ]= FUNCC(pred8x16_horizontal            , depth);\
+    }\
+    if (codec_id != AV_CODEC_ID_VP7 && codec_id != AV_CODEC_ID_VP8) {\
+        if (chroma_format_idc <= 1) {\
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x8_plane                , depth);\
+        } else {\
+            h->pred8x8[PLANE_PRED8x8]= FUNCC(pred8x16_plane               , depth);\
+        }\
     } else\
         h->pred8x8[PLANE_PRED8x8]= FUNCD(pred8x8_tm_vp8);\
-    if(codec_id != CODEC_ID_RV40 && codec_id != CODEC_ID_VP8){\
-        h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
-        h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
-        h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
-        h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
-        h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
-        h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
-        h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+    if (codec_id != AV_CODEC_ID_RV40 && codec_id != AV_CODEC_ID_VP7 && \
+        codec_id != AV_CODEC_ID_VP8) {\
+        if (chroma_format_idc <= 1) {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x8_dc                     , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x8_left_dc                , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x8_top_dc                 , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x8_mad_cow_dc_0l0, depth);\
+        } else {\
+            h->pred8x8[DC_PRED8x8     ]= FUNCC(pred8x16_dc                    , depth);\
+            h->pred8x8[LEFT_DC_PRED8x8]= FUNCC(pred8x16_left_dc               , depth);\
+            h->pred8x8[TOP_DC_PRED8x8 ]= FUNCC(pred8x16_top_dc                , depth);\
+            h->pred8x8[ALZHEIMER_DC_L0T_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l0t, depth);\
+            h->pred8x8[ALZHEIMER_DC_0LT_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0lt, depth);\
+            h->pred8x8[ALZHEIMER_DC_L00_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_l00, depth);\
+            h->pred8x8[ALZHEIMER_DC_0L0_PRED8x8 ]= FUNC(pred8x16_mad_cow_dc_0l0, depth);\
+        }\
     }else{\
         h->pred8x8[DC_PRED8x8     ]= FUNCD(pred8x8_dc_rv40);\
         h->pred8x8[LEFT_DC_PRED8x8]= FUNCD(pred8x8_left_dc_rv40);\
         h->pred8x8[TOP_DC_PRED8x8 ]= FUNCD(pred8x8_top_dc_rv40);\
-        if (codec_id == CODEC_ID_VP8) {\
+        if (codec_id == AV_CODEC_ID_VP7 || codec_id == AV_CODEC_ID_VP8) {\
             h->pred8x8[DC_127_PRED8x8]= FUNCC(pred8x8_127_dc              , depth);\
             h->pred8x8[DC_129_PRED8x8]= FUNCC(pred8x8_129_dc              , depth);\
         }\
     }\
-    h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                     , depth);\
+    if (chroma_format_idc <= 1) {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x8_128_dc                 , depth);\
+    } else {\
+        h->pred8x8[DC_128_PRED8x8 ]= FUNCC(pred8x16_128_dc                , depth);\
+    }\
 \
     h->pred16x16[DC_PRED8x8     ]= FUNCC(pred16x16_dc                     , depth);\
     h->pred16x16[VERT_PRED8x8   ]= FUNCC(pred16x16_vertical               , depth);\
     h->pred16x16[HOR_PRED8x8    ]= FUNCC(pred16x16_horizontal             , depth);\
     switch(codec_id){\
-    case CODEC_ID_SVQ3:\
+    case AV_CODEC_ID_SVQ3:\
        h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_svq3);\
        break;\
-    case CODEC_ID_RV40:\
+    case AV_CODEC_ID_RV40:\
        h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_plane_rv40);\
        break;\
-    case CODEC_ID_VP8:\
+    case AV_CODEC_ID_VP7:\
+    case AV_CODEC_ID_VP8:\
        h->pred16x16[PLANE_PRED8x8  ]= FUNCD(pred16x16_tm_vp8);\
        h->pred16x16[DC_127_PRED8x8]= FUNCC(pred16x16_127_dc               , depth);\
        h->pred16x16[DC_129_PRED8x8]= FUNCC(pred16x16_129_dc               , depth);\
@@ -484,13 +547,18 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
     h->pred16x16[TOP_DC_PRED8x8 ]= FUNCC(pred16x16_top_dc                 , depth);\
     h->pred16x16[DC_128_PRED8x8 ]= FUNCC(pred16x16_128_dc                 , depth);\
 \
-    /* special lossless h/v prediction for h264 */ \
+    /* special lossless h/v prediction for H.264 */ \
     h->pred4x4_add  [VERT_PRED   ]= FUNCC(pred4x4_vertical_add            , depth);\
     h->pred4x4_add  [ HOR_PRED   ]= FUNCC(pred4x4_horizontal_add          , depth);\
     h->pred8x8l_add [VERT_PRED   ]= FUNCC(pred8x8l_vertical_add           , depth);\
     h->pred8x8l_add [ HOR_PRED   ]= FUNCC(pred8x8l_horizontal_add         , depth);\
+    if (chroma_format_idc <= 1) {\
     h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x8_vertical_add            , depth);\
     h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x8_horizontal_add          , depth);\
+    } else {\
+        h->pred8x8_add  [VERT_PRED8x8]= FUNCC(pred8x16_vertical_add            , depth);\
+        h->pred8x8_add  [ HOR_PRED8x8]= FUNCC(pred8x16_horizontal_add          , depth);\
+    }\
     h->pred16x16_add[VERT_PRED8x8]= FUNCC(pred16x16_vertical_add          , depth);\
     h->pred16x16_add[ HOR_PRED8x8]= FUNCC(pred16x16_horizontal_add        , depth);\
 
@@ -506,6 +574,10 @@ void ff_h264_pred_init(H264PredContext *h, int codec_id, const int bit_depth){
             break;
     }
 
-    if (ARCH_ARM) ff_h264_pred_init_arm(h, codec_id, bit_depth);
-    if (HAVE_MMX) ff_h264_pred_init_x86(h, codec_id, bit_depth);
+    if (ARCH_AARCH64)
+        ff_h264_pred_init_aarch64(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_ARM)
+        ff_h264_pred_init_arm(h, codec_id, bit_depth, chroma_format_idc);
+    if (ARCH_X86)
+        ff_h264_pred_init_x86(h, codec_id, bit_depth, chroma_format_idc);
 }