]> git.sesse.net Git - ffmpeg/blobdiff - libavcodec/prosumer.c
aarch64: h264pred: Optimize the inner loop of existing 8 bit functions
[ffmpeg] / libavcodec / prosumer.c
index 0fa01099507601e731b5e9da08f3a84962b1084a..9950519604c53a8f8f618a8527708095d3f8534e 100644 (file)
@@ -38,7 +38,7 @@ typedef struct ProSumerContext {
 
     unsigned stride;
     unsigned size;
-    uint32_t lut[0x10000];
+    uint32_t lut[0x2000];
     uint8_t *initial_line;
     uint8_t *decbuffer;
 } ProSumerContext;
@@ -57,25 +57,25 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
     b = lut[2 * idx];
 
     while (1) {
-        if (((b & 0xFF00u) != 0x8000u) || (b & 0xFFu)) {
+        if (bytestream2_get_bytes_left_p(pb) <= 0 || bytestream2_get_eof(pb))
+            return 0;
+        if ((b & 0xFF00u) != 0x8000u || (b & 0xFFu)) {
             if ((b & 0xFF00u) != 0x8000u) {
                 bytestream2_put_le16(pb, b);
-            } else if (b & 0xFFu) {
+            } else {
                 idx = 0;
                 for (int i = 0; i < (b & 0xFFu); i++)
                     bytestream2_put_le32(pb, 0);
             }
             c = b >> 16;
             if (c & 0xFF00u) {
-                c = (((c >> 8) & 0xFFu) | (c & 0xFF00)) & 0xF00F;
                 fill = lut[2 * idx + 1];
-                if ((c & 0xFF00u) == 0x1000) {
+                if ((c & 0xF000u) == 0x1000) {
                     bytestream2_put_le16(pb, fill);
-                    c &= 0xFFFF00FFu;
                 } else {
                     bytestream2_put_le32(pb, fill);
-                    c &= 0xFFFF00FFu;
                 }
+                c = (c >> 8) & 0x0Fu;
             }
             while (c) {
                 a <<= 4;
@@ -84,21 +84,20 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
                     if (bytestream2_get_bytes_left(gb) <= 0) {
                         if (!a)
                             return 0;
-                        cnt = 4;
                     } else {
-                        pos = bytestream2_tell(gb) ^ 2;
-                        bytestream2_seek(gb, pos, SEEK_SET);
+                        pos = bytestream2_tell(gb);
+                        bytestream2_seek(gb, pos ^ 2, SEEK_SET);
                         AV_WN16(&a, bytestream2_peek_le16(gb));
-                        pos = pos ^ 2;
-                        bytestream2_seek(gb, pos, SEEK_SET);
-                        bytestream2_skip(gb, 2);
-                        cnt = 4;
+                        bytestream2_seek(gb, pos + 2, SEEK_SET);
                     }
+                    cnt = 4;
                 }
                 c--;
             }
             idx = a >> 20;
             b = lut[2 * idx];
+            if (!b)
+                return AVERROR_INVALIDDATA;
             continue;
         }
         idx = 2;
@@ -117,12 +116,10 @@ static int decompress(GetByteContext *gb, int size, PutByteContext *pb, const ui
                 }
                 return 0;
             }
-            pos = bytestream2_tell(gb) ^ 2;
-            bytestream2_seek(gb, pos, SEEK_SET);
+            pos = bytestream2_tell(gb);
+            bytestream2_seek(gb, pos ^ 2, SEEK_SET);
             AV_WN16(&a, bytestream2_peek_le16(gb));
-            pos = pos ^ 2;
-            bytestream2_seek(gb, pos, SEEK_SET);
-            bytestream2_skip(gb, 2);
+            bytestream2_seek(gb, pos + 2, SEEK_SET);
             cnt = 4;
             idx--;
         }
@@ -156,11 +153,17 @@ static int decode_frame(AVCodecContext *avctx, void *data,
     if (avpkt->size <= 32)
         return AVERROR_INVALIDDATA;
 
-    memset(s->decbuffer, 0, s->size);
     bytestream2_init(&s->gb, avpkt->data, avpkt->size);
     bytestream2_init_writer(&s->pb, s->decbuffer, s->size);
+    ret = decompress(&s->gb, AV_RL32(avpkt->data + 28) >> 1, &s->pb, s->lut);
+    if (ret < 0)
+        return ret;
+    if (bytestream2_get_bytes_left_p(&s->pb) > s->size * (int64_t)avctx->discard_damaged_percentage / 100)
+        return AVERROR_INVALIDDATA;
+
+    av_assert0(s->size >= bytestream2_get_bytes_left_p(&s->pb));
+    memset(s->decbuffer + bytestream2_tell_p(&s->pb), 0, bytestream2_get_bytes_left_p(&s->pb));
 
-    decompress(&s->gb, AV_RL32(avpkt->data + 28) >> 1, &s->pb, s->lut);
     vertical_predict((uint32_t *)s->decbuffer, 0, (uint32_t *)s->initial_line, s->stride, 1);
     vertical_predict((uint32_t *)s->decbuffer, s->stride, (uint32_t *)s->decbuffer, s->stride, avctx->height - 1);
 
@@ -274,7 +277,6 @@ static const uint16_t table[] = {
     0x8022, 0xDAC, 0x8023, 0xDAD, 0x8024, 0xDAE, 0x8025, 0xDAF, 0x8026, 0xDB0,
     0x8027, 0xDB1, 0x8028, 0xDB2, 0x8029, 0xDB3, 0x802A, 0xDB4, 0x802B, 0xDB5,
     0x802C, 0xDB6, 0x802D, 0xDB7, 0x802E, 0xDB8, 0x802F, 0xDB9, 0x80FF, 0xDBA,
-    0x0001
 };
 
 static void fill_elements(uint32_t idx, uint32_t shift, uint32_t *e0, uint32_t *e1)