]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/cabac.c
rv40: NEON optimised chroma MC
[ffmpeg] / libavcodec / cabac.c
index 94829e90653914355cfaa801bf14e6b1651d164d..57ab395ed73bed62423b3d3b497c0d548c673f65 100644 (file)
@@ -2,33 +2,32 @@
  * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder
  * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
  *
- * This file is part of FFmpeg.
+ * This file is part of Libav.
  *
- * FFmpeg is free software; you can redistribute it and/or
+ * Libav is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
  * version 2.1 of the License, or (at your option) any later version.
  *
- * FFmpeg is distributed in the hope that it will be useful,
+ * Libav is distributed in the hope that it will be useful,
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * Lesser General Public License for more details.
  *
  * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
+ * License along with Libav; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- *
  */
 
 /**
- * @file cabac.c
+ * @file
  * Context Adaptive Binary Arithmetic Coder.
  */
 
 #include <string.h>
 
-#include "common.h"
-#include "bitstream.h"
+#include "libavutil/common.h"
+#include "get_bits.h"
 #include "cabac.h"
 
 static const uint8_t lps_range[64][4]= {
@@ -50,7 +49,8 @@ static const uint8_t lps_range[64][4]= {
 {  6,  8,  9, 11}, {  6,  7,  9, 10}, {  6,  7,  8,  9}, {  2,  2,  2,  2},
 };
 
-uint8_t ff_h264_lps_range[2*65][4];
+uint8_t ff_h264_mlps_state[4*64];
+uint8_t ff_h264_lps_range[4*2*64];
 uint8_t ff_h264_lps_state[2*64];
 uint8_t ff_h264_mps_state[2*64];
 
@@ -76,15 +76,27 @@ static const uint8_t lps_state[64]= {
  36,36,37,37,37,38,38,63,
 };
 
-const uint8_t ff_h264_norm_shift[128]= {
- 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
+const uint8_t ff_h264_norm_shift[512]= {
+ 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
+ 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 };
 
 /**
@@ -120,7 +132,7 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
     c->low =  (*c->bytestream++)<<10;
 #endif
     c->low+= ((*c->bytestream++)<<2) + 2;
-    c->range= 0x1FE<<(CABAC_BITS + 1);
+    c->range= 0x1FE;
 }
 
 void ff_init_cabac_states(CABACContext *c){
@@ -128,48 +140,180 @@ void ff_init_cabac_states(CABACContext *c){
 
     for(i=0; i<64; i++){
         for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
-            ff_h264_lps_range[2*i+0][j+4]=
-            ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j];
+            ff_h264_lps_range[j*2*64+2*i+0]=
+            ff_h264_lps_range[j*2*64+2*i+1]= lps_range[i][j];
         }
 
+        ff_h264_mlps_state[128+2*i+0]=
         ff_h264_mps_state[2*i+0]= 2*mps_state[i]+0;
+        ff_h264_mlps_state[128+2*i+1]=
         ff_h264_mps_state[2*i+1]= 2*mps_state[i]+1;
 
         if( i ){
-#ifdef BRANCHLESS_CABAC_DECODER
-            ff_h264_mps_state[-2*i-1]= 2*lps_state[i]+0; //FIXME yes this is not valid C but iam lazy, cleanup welcome
-            ff_h264_mps_state[-2*i-2]= 2*lps_state[i]+1;
-        }else{
-            ff_h264_mps_state[-2*i-1]= 1;
-            ff_h264_mps_state[-2*i-2]= 0;
-#else
-            ff_h264_lps_state[2*i+0]= 2*lps_state[i]+0;
-            ff_h264_lps_state[2*i+1]= 2*lps_state[i]+1;
+            ff_h264_mlps_state[128-2*i-1]= 2*lps_state[i]+0;
+            ff_h264_mlps_state[128-2*i-2]= 2*lps_state[i]+1;
         }else{
-            ff_h264_lps_state[2*i+0]= 1;
-            ff_h264_lps_state[2*i+1]= 0;
-#endif
+            ff_h264_mlps_state[128-2*i-1]= 1;
+            ff_h264_mlps_state[128-2*i-2]= 0;
         }
     }
 }
 
-#if 0 //selftest
+#ifdef TEST
 #define SIZE 10240
 
+#include "libavutil/lfg.h"
 #include "avcodec.h"
+#include "cabac.h"
+
+static void put_cabac(CABACContext *c, uint8_t * const state, int bit){
+    int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state];
+
+    if(bit == ((*state)&1)){
+        c->range -= RangeLPS;
+        *state= ff_h264_mps_state[*state];
+    }else{
+        c->low += c->range - RangeLPS;
+        c->range = RangeLPS;
+        *state= ff_h264_lps_state[*state];
+    }
+
+    renorm_cabac_encoder(c);
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+}
+
+/**
+ * @param bit 0 -> write zero bit, !=0 write one bit
+ */
+static void put_cabac_bypass(CABACContext *c, int bit){
+    c->low += c->low;
+
+    if(bit){
+        c->low += c->range;
+    }
+//FIXME optimize
+    if(c->low<0x200){
+        put_cabac_bit(c, 0);
+    }else if(c->low<0x400){
+        c->outstanding_count++;
+        c->low -= 0x200;
+    }else{
+        put_cabac_bit(c, 1);
+        c->low -= 0x400;
+    }
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+}
+
+/**
+ *
+ * @return the number of bytes written
+ */
+static int put_cabac_terminate(CABACContext *c, int bit){
+    c->range -= 2;
+
+    if(!bit){
+        renorm_cabac_encoder(c);
+    }else{
+        c->low += c->range;
+        c->range= 2;
 
-int main(){
+        renorm_cabac_encoder(c);
+
+        assert(c->low <= 0x1FF);
+        put_cabac_bit(c, c->low>>9);
+        put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
+
+        flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
+    }
+
+#ifdef STRICT_LIMITS
+    c->symCount++;
+#endif
+
+    return (put_bits_count(&c->pb)+7)>>3;
+}
+
+/**
+ * put (truncated) unary binarization.
+ */
+static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
+    int i;
+
+    assert(v <= max);
+
+    for(i=0; i<v; i++){
+        put_cabac(c, state, 1);
+        if(i < max_index) state++;
+    }
+    if(truncated==0 || v<max)
+        put_cabac(c, state, 0);
+}
+
+/**
+ * put unary exp golomb k-th order binarization.
+ */
+static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
+    int i;
+
+    if(v==0)
+        put_cabac(c, state, 0);
+    else{
+        const int sign= v < 0;
+
+        if(is_signed) v= FFABS(v);
+
+        if(v<max){
+            for(i=0; i<v; i++){
+                put_cabac(c, state, 1);
+                if(i < max_index) state++;
+            }
+
+            put_cabac(c, state, 0);
+        }else{
+            int m= 1<<k;
+
+            for(i=0; i<max; i++){
+                put_cabac(c, state, 1);
+                if(i < max_index) state++;
+            }
+
+            v -= max;
+            while(v >= m){ //FIXME optimize
+                put_cabac_bypass(c, 1);
+                v-= m;
+                m+= m;
+            }
+            put_cabac_bypass(c, 0);
+            while(m>>=1){
+                put_cabac_bypass(c, v&m);
+            }
+        }
+
+        if(is_signed)
+            put_cabac_bypass(c, sign);
+    }
+}
+
+int main(void){
     CABACContext c;
     uint8_t b[9*SIZE];
     uint8_t r[9*SIZE];
     int i;
     uint8_t state[10]= {0};
+    AVLFG prng;
 
+    av_lfg_init(&prng, 1);
     ff_init_cabac_encoder(&c, b, SIZE);
-    ff_init_cabac_states(&c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
+    ff_init_cabac_states(&c);
 
     for(i=0; i<SIZE; i++){
-        r[i]= random()%7;
+        r[i] = av_lfg_get(&prng) % 7;
     }
 
     for(i=0; i<SIZE; i++){
@@ -236,4 +380,4 @@ STOP_TIMER("get_cabac_ueg")
     return 0;
 }
 
-#endif
+#endif /* TEST */