vc-1: Optimise parser (with special attention to ARM)

[ffmpeg] / libavcodec / vc1_parser.c
diff --git a/libavcodec/vc1_parser.c b/libavcodec/vc1_parser.c

index cc29ce148b1329970ffd1a72dbbb1f068a32e750..4ed14bccbe59d1eff40845ce544e8c36f09bd995 100644 (file)
--- a/libavcodec/vc1_parser.c
+++ b/libavcodec/vc1_parser.c
@@ -30,122 +30,88 @@
  #include "vc1.h"
  #include "get_bits.h"
  
+/** The maximum number of bytes of a sequence, entry point or
+ *  frame header whose values we pay any attention to */
+#define UNESCAPED_THRESHOLD 37
+
+/** The maximum number of bytes of a sequence, entry point or
+ *  frame header which must be valid memory (because they are
+ *  used to update the bitstream cache in skip_bits() calls)
+ */
+#define UNESCAPED_LIMIT 144
+
+typedef enum {
+    NO_MATCH,
+    ONE_ZERO,
+    TWO_ZEROS,
+    ONE
+} VC1ParseSearchState;
+
  typedef struct {
      ParseContext pc;
      VC1Context v;
+    uint8_t prev_start_code;
+    size_t bytes_to_skip;
+    uint8_t unesc_buffer[UNESCAPED_LIMIT];
+    size_t unesc_index;
+    VC1ParseSearchState search_state;
  } VC1ParseContext;
  
-static void vc1_extract_headers(AVCodecParserContext *s, AVCodecContext *avctx,
-                                const uint8_t *buf, int buf_size)
+static void vc1_extract_header(AVCodecParserContext *s, AVCodecContext *avctx,
+                               const uint8_t *buf, int buf_size)
  {
+    /* Parse the header we just finished unescaping */
      VC1ParseContext *vpc = s->priv_data;
      GetBitContext gb;
-    const uint8_t *start, *end, *next;
-    uint8_t *buf2 = av_mallocz(buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-
+    int ret;
      vpc->v.s.avctx = avctx;
      vpc->v.parse_only = 1;
-    vpc->v.first_pic_header_flag = 1;
-    next = buf;
-    s->repeat_pict = 0;
-
-    for(start = buf, end = buf + buf_size; next < end; start = next){
-        int buf2_size, size;
-        int ret;
-
-        next = find_next_marker(start + 4, end);
-        size = next - start - 4;
-        buf2_size = vc1_unescape_buffer(start + 4, size, buf2);
-        init_get_bits(&gb, buf2, buf2_size * 8);
-        if(size <= 0) continue;
-        switch(AV_RB32(start)){
-        case VC1_CODE_SEQHDR:
-            ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
-            break;
-        case VC1_CODE_ENTRYPOINT:
-            ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
-            break;
-        case VC1_CODE_FRAME:
-            if(vpc->v.profile < PROFILE_ADVANCED)
-                ret = ff_vc1_parse_frame_header    (&vpc->v, &gb);
-            else
-                ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
-
-            if (ret < 0)
-                break;
-
-            /* keep AV_PICTURE_TYPE_BI internal to VC1 */
-            if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
-                s->pict_type = AV_PICTURE_TYPE_B;
-            else
-                s->pict_type = vpc->v.s.pict_type;
-
-            if (avctx->ticks_per_frame > 1){
-                // process pulldown flags
-                s->repeat_pict = 1;
-                // Pulldown flags are only valid when 'broadcast' has been set.
-                // So ticks_per_frame will be 2
-                if (vpc->v.rff){
-                    // repeat field
-                    s->repeat_pict = 2;
-                }else if (vpc->v.rptfrm){
-                    // repeat frames
-                    s->repeat_pict = vpc->v.rptfrm * 2 + 1;
-                }
-            }
-
-            if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
-                s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
-            else
-                s->field_order = AV_FIELD_PROGRESSIVE;
+    init_get_bits(&gb, buf, buf_size * 8);
+    switch (vpc->prev_start_code) {
+    case VC1_CODE_SEQHDR & 0xFF:
+        ff_vc1_decode_sequence_header(avctx, &vpc->v, &gb);
+        break;
+    case VC1_CODE_ENTRYPOINT & 0xFF:
+        ff_vc1_decode_entry_point(avctx, &vpc->v, &gb);
+        break;
+    case VC1_CODE_FRAME & 0xFF:
+        if(vpc->v.profile < PROFILE_ADVANCED)
+            ret = ff_vc1_parse_frame_header    (&vpc->v, &gb);
+        else
+            ret = ff_vc1_parse_frame_header_adv(&vpc->v, &gb);
  
+        if (ret < 0)
              break;
-        }
-    }
  
-    av_free(buf2);
-}
+        /* keep AV_PICTURE_TYPE_BI internal to VC1 */
+        if (vpc->v.s.pict_type == AV_PICTURE_TYPE_BI)
+            s->pict_type = AV_PICTURE_TYPE_B;
+        else
+            s->pict_type = vpc->v.s.pict_type;
  
-/**
- * Find the end of the current frame in the bitstream.
- * @return the position of the first byte of the next frame, or -1
- */
-static int vc1_find_frame_end(ParseContext *pc, const uint8_t *buf,
-                               int buf_size) {
-    int pic_found, i;
-    uint32_t state;
-
-    pic_found= pc->frame_start_found;
-    state= pc->state;
-
-    i=0;
-    if(!pic_found){
-        for(i=0; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(state == VC1_CODE_FRAME || state == VC1_CODE_FIELD){
-                i++;
-                pic_found=1;
-                break;
+        if (avctx->ticks_per_frame > 1){
+            // process pulldown flags
+            s->repeat_pict = 1;
+            // Pulldown flags are only valid when 'broadcast' has been set.
+            // So ticks_per_frame will be 2
+            if (vpc->v.rff){
+                // repeat field
+                s->repeat_pict = 2;
+            }else if (vpc->v.rptfrm){
+                // repeat frames
+                s->repeat_pict = vpc->v.rptfrm * 2 + 1;
              }
+        }else{
+            s->repeat_pict = 0;
          }
-    }
  
-    if(pic_found){
-        /* EOF considered as end of frame */
-        if (buf_size == 0)
-            return 0;
-        for(; i<buf_size; i++){
-            state= (state<<8) | buf[i];
-            if(IS_MARKER(state) && state != VC1_CODE_FIELD && state != VC1_CODE_SLICE){
-                pc->frame_start_found=0;
-                pc->state=-1;
-                return i-3;
-            }
-        }
+        if (vpc->v.broadcast && vpc->v.interlace && !vpc->v.psf)
+            s->field_order = vpc->v.tff ? AV_FIELD_TT : AV_FIELD_BB;
+        else
+            s->field_order = AV_FIELD_PROGRESSIVE;
+
+        break;
      }
-    pc->frame_start_found= pic_found;
-    pc->state= state;
-    return END_NOT_FOUND;
  }
  
  static int vc1_parse(AVCodecParserContext *s,
@@ -153,22 +119,127 @@ static int vc1_parse(AVCodecParserContext *s,
                             const uint8_t **poutbuf, int *poutbuf_size,
                             const uint8_t *buf, int buf_size)
  {
+    /* Here we do the searching for frame boundaries and headers at
+     * the same time. Only a minimal amount at the start of each
+     * header is unescaped. */
      VC1ParseContext *vpc = s->priv_data;
-    int next;
+    int pic_found = vpc->pc.frame_start_found;
+    uint8_t *unesc_buffer = vpc->unesc_buffer;
+    size_t unesc_index = vpc->unesc_index;
+    VC1ParseSearchState search_state = vpc->search_state;
+    int next = END_NOT_FOUND;
+    int i = vpc->bytes_to_skip;
+
+    if (pic_found && buf_size == 0) {
+        /* EOF considered as end of frame */
+        memset(unesc_buffer + unesc_index, 0, UNESCAPED_THRESHOLD - unesc_index);
+        vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+        next = 0;
+    }
+    while (i < buf_size) {
+        int start_code_found = 0;
+        uint8_t b;
+        while (i < buf_size && unesc_index < UNESCAPED_THRESHOLD) {
+            b = buf[i++];
+            unesc_buffer[unesc_index++] = b;
+            if (search_state <= ONE_ZERO)
+                search_state = b ? NO_MATCH : search_state + 1;
+            else if (search_state == TWO_ZEROS) {
+                if (b == 1)
+                    search_state = ONE;
+                else if (b > 1) {
+                    if (b == 3)
+                        unesc_index--; // swallow emulation prevention byte
+                    search_state = NO_MATCH;
+                }
+            }
+            else { // search_state == ONE
+                // Header unescaping terminates early due to detection of next start code
+                search_state = NO_MATCH;
+                start_code_found = 1;
+                break;
+            }
+        }
+        if ((s->flags & PARSER_FLAG_COMPLETE_FRAMES) &&
+                unesc_index >= UNESCAPED_THRESHOLD &&
+                vpc->prev_start_code == (VC1_CODE_FRAME & 0xFF))
+        {
+            // No need to keep scanning the rest of the buffer for
+            // start codes if we know it contains a complete frame and
+            // we've already unescaped all we need of the frame header
+            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+            break;
+        }
+        if (unesc_index >= UNESCAPED_THRESHOLD && !start_code_found) {
+            while (i < buf_size) {
+                if (search_state == NO_MATCH) {
+                    i += vpc->v.vc1dsp.vc1_find_start_code_candidate(buf + i, buf_size - i);
+                    if (i < buf_size) {
+                        search_state = ONE_ZERO;
+                    }
+                    i++;
+                } else {
+                    b = buf[i++];
+                    if (search_state == ONE_ZERO)
+                        search_state = b ? NO_MATCH : TWO_ZEROS;
+                    else if (search_state == TWO_ZEROS) {
+                        if (b >= 1)
+                            search_state = b == 1 ? ONE : NO_MATCH;
+                    }
+                    else { // search_state == ONE
+                        search_state = NO_MATCH;
+                        start_code_found = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (start_code_found) {
+            vc1_extract_header(s, avctx, unesc_buffer, unesc_index);
+
+            vpc->prev_start_code = b;
+            unesc_index = 0;
+
+            if (!(s->flags & PARSER_FLAG_COMPLETE_FRAMES)) {
+                if (!pic_found && (b == (VC1_CODE_FRAME & 0xFF) || b == (VC1_CODE_FIELD & 0xFF))) {
+                    pic_found = 1;
+                }
+                else if (pic_found && b != (VC1_CODE_FIELD & 0xFF) && b != (VC1_CODE_SLICE & 0xFF)) {
+                    next = i - 4;
+                    pic_found = b == (VC1_CODE_FRAME & 0xFF);
+                    break;
+                }
+            }
+        }
+    }
  
-    if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
-        next= buf_size;
-    }else{
-        next= vc1_find_frame_end(&vpc->pc, buf, buf_size);
+    vpc->pc.frame_start_found = pic_found;
+    vpc->unesc_index = unesc_index;
+    vpc->search_state = search_state;
  
+    if (s->flags & PARSER_FLAG_COMPLETE_FRAMES) {
+        next = buf_size;
+    } else {
          if (ff_combine_frame(&vpc->pc, next, &buf, &buf_size) < 0) {
+            vpc->bytes_to_skip = 0;
              *poutbuf = NULL;
              *poutbuf_size = 0;
              return buf_size;
          }
      }
  
-    vc1_extract_headers(s, avctx, buf, buf_size);
+    vpc->v.first_pic_header_flag = 1;
+
+    /* If we return with a valid pointer to a combined frame buffer
+     * then on the next call then we'll have been unhelpfully rewound
+     * by up to 4 bytes (depending upon whether the start code
+     * overlapped the input buffer, and if so by how much). We don't
+     * want this: it will either cause spurious second detections of
+     * the start code we've already seen, or cause extra bytes to be
+     * inserted at the start of the unescaped buffer. */
+    vpc->bytes_to_skip = 4;
+    if (next < 0)
+        vpc->bytes_to_skip += next;
  
      *poutbuf = buf;
      *poutbuf_size = buf_size;
@@ -199,6 +270,11 @@ static av_cold int vc1_parse_init(AVCodecParserContext *s)
  {
      VC1ParseContext *vpc = s->priv_data;
      vpc->v.s.slice_context_count = 1;
+    vpc->v.first_pic_header_flag = 1;
+    vpc->prev_start_code = 0;
+    vpc->bytes_to_skip = 0;
+    vpc->unesc_index = 0;
+    vpc->search_state = NO_MATCH;
      return ff_vc1_init_common(&vpc->v);
  }