hevc: ppc: Add HEVC 4x4 IDCT for PowerPC

[ffmpeg] / libavcodec / exr.c
diff --git a/libavcodec/exr.c b/libavcodec/exr.c

index 37a31cebd8ac23ceb42c8226b0e433e4b120455f..28cee8413426b51a5aa29df62024d07bb96e4e41 100644 (file)
--- a/libavcodec/exr.c
+++ b/libavcodec/exr.c
@@ -27,17 +27,20 @@
   * For more information on the OpenEXR format, visit:
   *  http://openexr.com/
   *
- * exr_flt2uint() and exr_halflt2uint() is credited to  Reimar Döffinger
+ * exr_flt2uint() and exr_halflt2uint() is credited to Reimar Döffinger.
+ * exr_half2float() is credited to Aaftab Munshi, Dan Ginsburg, Dave Shreiner.
   */
  
+#include <float.h>
  #include <zlib.h>
  
  #include "libavutil/imgutils.h"
+#include "libavutil/intfloat.h"
  #include "libavutil/opt.h"
  
  #include "avcodec.h"
+#include "bitstream.h"
  #include "bytestream.h"
-#include "get_bits.h"
  #include "internal.h"
  #include "mathops.h"
  #include "thread.h"
@@ -106,8 +109,74 @@ typedef struct EXRContext {
      EXRThreadData *thread_data;
  
      const char *layer;
+
+    float gamma;
+    uint16_t gamma_table[65536];
  } EXRContext;
  
+/* -15 stored using a single precision bias of 127 */
+#define HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP 0x38000000
+
+/* max exponent value in single precision that will be converted
+ * to Inf or Nan when stored as a half-float */
+#define HALF_FLOAT_MAX_BIASED_EXP_AS_SINGLE_FP_EXP 0x47800000
+
+/* 255 is the max exponent biased value */
+#define FLOAT_MAX_BIASED_EXP (0xFF << 23)
+
+#define HALF_FLOAT_MAX_BIASED_EXP (0x1F << 10)
+
+/**
+ * Convert a half float as a uint16_t into a full float.
+ *
+ * @param hf half float as uint16_t
+ *
+ * @return float value
+ */
+static union av_intfloat32 exr_half2float(uint16_t hf)
+{
+    unsigned int sign = (unsigned int) (hf >> 15);
+    unsigned int mantissa = (unsigned int) (hf & ((1 << 10) - 1));
+    unsigned int exp = (unsigned int) (hf & HALF_FLOAT_MAX_BIASED_EXP);
+    union av_intfloat32 f;
+
+    if (exp == HALF_FLOAT_MAX_BIASED_EXP) {
+        // we have a half-float NaN or Inf
+        // half-float NaNs will be converted to a single precision NaN
+        // half-float Infs will be converted to a single precision Inf
+        exp = FLOAT_MAX_BIASED_EXP;
+        if (mantissa)
+            mantissa = (1 << 23) - 1;    // set all bits to indicate a NaN
+    } else if (exp == 0x0) {
+        // convert half-float zero/denorm to single precision value
+        if (mantissa) {
+            mantissa <<= 1;
+            exp = HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP;
+            // check for leading 1 in denorm mantissa
+            while ((mantissa & (1 << 10))) {
+                // for every leading 0, decrement single precision exponent by 1
+                // and shift half-float mantissa value to the left
+                mantissa <<= 1;
+                exp -= (1 << 23);
+            }
+            // clamp the mantissa to 10 bits
+            mantissa &= ((1 << 10) - 1);
+            // shift left to generate single-precision mantissa of 23 bits
+            mantissa <<= 13;
+        }
+    } else {
+        // shift left to generate single-precision mantissa of 23 bits
+        mantissa <<= 13;
+        // generate single precision biased exponent value
+        exp = (exp << 13) + HALF_FLOAT_MIN_BIASED_EXP_AS_SINGLE_FP_EXP;
+    }
+
+    f.i = (sign << 31) | exp | mantissa;
+
+    return f;
+}
+
+
  /**
   * Convert from 32-bit float as uint32_t to uint16_t.
   *
@@ -310,15 +379,16 @@ static void huf_canonical_code_table(uint64_t *hcode)
  static int huf_unpack_enc_table(GetByteContext *gb,
                                  int32_t im, int32_t iM, uint64_t *hcode)
  {
-    GetBitContext gbit;
-
-    init_get_bits8(&gbit, gb->buffer, bytestream2_get_bytes_left(gb));
+    BitstreamContext bc;
+    int ret = bitstream_init8(&bc, gb->buffer, bytestream2_get_bytes_left(gb));
+    if (ret < 0)
+        return ret;
  
      for (; im <= iM; im++) {
-        uint64_t l = hcode[im] = get_bits(&gbit, 6);
+        uint64_t l = hcode[im] = bitstream_read(&bc, 6);
  
          if (l == LONG_ZEROCODE_RUN) {
-            int zerun = get_bits(&gbit, 8) + SHORTEST_LONG_RUN;
+            int zerun = bitstream_read(&bc, 8) + SHORTEST_LONG_RUN;
  
              if (im + zerun > iM + 1)
                  return AVERROR_INVALIDDATA;
@@ -340,7 +410,7 @@ static int huf_unpack_enc_table(GetByteContext *gb,
          }
      }
  
-    bytestream2_skip(gb, (get_bits_count(&gbit) + 7) / 8);
+    bytestream2_skip(gb, (bitstream_tell(&bc) + 7) / 8);
      huf_canonical_code_table(hcode);
  
      return 0;
@@ -389,7 +459,7 @@ static int huf_build_dec_table(const uint64_t *hcode, int im,
          lc += 8;                                                              \
  }
  
-#define get_code(po, rlc, c, lc, gb, out, oe)                                 \
+#define get_code(po, rlc, c, lc, gb, out, oe, outb)                           \
  {                                                                             \
          if (po == rlc) {                                                      \
              if (lc < 8)                                                       \
@@ -398,7 +468,7 @@ static int huf_build_dec_table(const uint64_t *hcode, int im,
                                                                                \
              cs = c >> lc;                                                     \
                                                                                \
-            if (out + cs > oe)                                                \
+            if (out + cs > oe || out == outb)                                 \
                  return AVERROR_INVALIDDATA;                                   \
                                                                                \
              s = out[-1];                                                      \
@@ -431,7 +501,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
  
              if (pl.len) {
                  lc -= pl.len;
-                get_code(pl.lit, rlc, c, lc, gb, out, oe);
+                get_code(pl.lit, rlc, c, lc, gb, out, oe, outb);
              } else {
                  int j;
  
@@ -448,7 +518,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
                          if ((hcode[pl.p[j]] >> 6) ==
                              ((c >> (lc - l)) & ((1LL << l) - 1))) {
                              lc -= l;
-                            get_code(pl.p[j], rlc, c, lc, gb, out, oe);
+                            get_code(pl.p[j], rlc, c, lc, gb, out, oe, outb);
                              break;
                          }
                      }
@@ -469,7 +539,7 @@ static int huf_decode(const uint64_t *hcode, const HufDec *hdecod,
  
          if (pl.len) {
              lc -= pl.len;
-            get_code(pl.lit, rlc, c, lc, gb, out, oe);
+            get_code(pl.lit, rlc, c, lc, gb, out, oe, outb);
          } else {
              return AVERROR_INVALIDDATA;
          }
@@ -771,6 +841,7 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
      int axmax = (avctx->width - (s->xmax + 1)) * 2 * s->desc->nb_components;
      int bxmin = s->xmin * 2 * s->desc->nb_components;
      int i, x, buf_size = s->buf_size;
+    float one_gamma = 1.0f / s->gamma;
      int ret;
  
      line_offset = AV_RL64(s->gb.buffer + jobnr * 8);
@@ -851,18 +922,30 @@ static int decode_block(AVCodecContext *avctx, void *tdata,
          if (s->pixel_type == EXR_FLOAT) {
              // 32-bit
              for (x = 0; x < xdelta; x++) {
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&r));
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&g));
-                *ptr_x++ = exr_flt2uint(bytestream_get_le32(&b));
+                union av_intfloat32 t;
+                t.i = bytestream_get_le32(&r);
+                if (t.f > 0.0f)  /* avoid negative values */
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
+
+                t.i = bytestream_get_le32(&g);
+                if (t.f > 0.0f)
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
+
+                t.i = bytestream_get_le32(&b);
+                if (t.f > 0.0f)
+                    t.f = powf(t.f, one_gamma);
+                *ptr_x++ = exr_flt2uint(t.i);
                  if (channel_buffer[3])
                      *ptr_x++ = exr_flt2uint(bytestream_get_le32(&a));
              }
          } else {
              // 16-bit
              for (x = 0; x < xdelta; x++) {
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&r));
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&g));
-                *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&b));
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&r)];
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&g)];
+                *ptr_x++ = s->gamma_table[bytestream_get_le16(&b)];
                  if (channel_buffer[3])
                      *ptr_x++ = exr_halflt2uint(bytestream_get_le16(&a));
              }
@@ -1009,8 +1092,7 @@ static int decode_header(EXRContext *s)
  
                  current_pixel_type = bytestream2_get_le32(&ch_gb);
                  if (current_pixel_type >= EXR_UNKNOWN) {
-                    avpriv_report_missing_feature(s->avctx,
-                                                  "Pixel type %d.\n",
+                    avpriv_report_missing_feature(s->avctx, "Pixel type %d",
                                                    current_pixel_type);
                      return AVERROR_PATCHWELCOME;
                  }
@@ -1262,6 +1344,9 @@ static int decode_frame(AVCodecContext *avctx, void *data,
  static av_cold int decode_init(AVCodecContext *avctx)
  {
      EXRContext *s = avctx->priv_data;
+    uint32_t i;
+    union av_intfloat32 t;
+    float one_gamma = 1.0f / s->gamma;
  
      s->avctx              = avctx;
      s->xmin               = ~0;
@@ -1280,7 +1365,23 @@ static av_cold int decode_init(AVCodecContext *avctx)
      s->w                  = 0;
      s->h                  = 0;
  
-    // allocate thread data, used for non EXR_RAW compreesion types
+    if (one_gamma > 0.9999f && one_gamma < 1.0001f) {
+        for (i = 0; i < 65536; ++i)
+            s->gamma_table[i] = exr_halflt2uint(i);
+    } else {
+        for (i = 0; i < 65536; ++i) {
+            t = exr_half2float(i);
+            /* If negative value we reuse half value */
+            if (t.f <= 0.0f) {
+                s->gamma_table[i] = exr_halflt2uint(i);
+            } else {
+                t.f = powf(t.f, one_gamma);
+                s->gamma_table[i] = exr_flt2uint(t.i);
+            }
+        }
+    }
+
+    // allocate thread data, used for non EXR_RAW compression types
      s->thread_data = av_mallocz_array(avctx->thread_count, sizeof(EXRThreadData));
      if (!s->thread_data)
          return AVERROR_INVALIDDATA;
@@ -1291,7 +1392,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
  static int decode_init_thread_copy(AVCodecContext *avctx)
  {    EXRContext *s = avctx->priv_data;
  
-    // allocate thread data, used for non EXR_RAW compreesion types
+    // allocate thread data, used for non EXR_RAW compression types
      s->thread_data = av_mallocz_array(avctx->thread_count, sizeof(EXRThreadData));
      if (!s->thread_data)
          return AVERROR_INVALIDDATA;
@@ -1322,6 +1423,8 @@ static av_cold int decode_end(AVCodecContext *avctx)
  static const AVOption options[] = {
      { "layer", "Set the decoding layer", OFFSET(layer),
          AV_OPT_TYPE_STRING, { .str = "" }, 0, 0, VD },
+    { "gamma", "Set the float gamma value when decoding", OFFSET(gamma),
+        AV_OPT_TYPE_FLOAT, { .dbl = 1.0f }, 0.001, FLT_MAX, VD },
      { NULL },
  };
  
@@ -1342,7 +1445,7 @@ AVCodec ff_exr_decoder = {
      .init_thread_copy = ONLY_IF_THREADS_ENABLED(decode_init_thread_copy),
      .close            = decode_end,
      .decode           = decode_frame,
-    .capabilities     = CODEC_CAP_DR1 | CODEC_CAP_FRAME_THREADS |
-                        CODEC_CAP_SLICE_THREADS,
+    .capabilities     = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_FRAME_THREADS |
+                        AV_CODEC_CAP_SLICE_THREADS,
      .priv_class       = &exr_class,
  };