};
-void vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
-int vp56_free(AVCodecContext *avctx);
-void vp56_init_dequant(VP56Context *s, int quantizer);
-int vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
- AVPacket *avpkt);
+void ff_vp56_init(AVCodecContext *avctx, int flip, int has_alpha);
+int ff_vp56_free(AVCodecContext *avctx);
+void ff_vp56_init_dequant(VP56Context *s, int quantizer);
+int ff_vp56_decode_frame(AVCodecContext *avctx, void *data, int *data_size,
+ AVPacket *avpkt);
/**
* vp56 specific range coder implementation
*/
-static inline void vp56_init_range_decoder(VP56RangeCoder *c,
- const uint8_t *buf, int buf_size)
-{
- c->high = 255;
- c->bits = -8;
- c->buffer = buf;
- c->end = buf + buf_size;
- c->code_word = bytestream_get_be16(&c->buffer);
-}
+extern const uint8_t ff_vp56_norm_shift[256];
+void ff_vp56_init_range_decoder(VP56RangeCoder *c, const uint8_t *buf, int buf_size);
-static av_always_inline void vp56_rac_renorm(VP56RangeCoder *c, unsigned int code_word)
+static av_always_inline unsigned int vp56_rac_renorm(VP56RangeCoder *c)
{
- int shift = ff_h264_norm_shift[c->high] - 1;
+ int shift = ff_vp56_norm_shift[c->high];
int bits = c->bits;
+ unsigned int code_word = c->code_word;
c->high <<= shift;
code_word <<= shift;
bits += shift;
if(bits >= 0 && c->buffer < c->end) {
- code_word |= *c->buffer++ << bits;
- bits -= 8;
+ code_word |= bytestream_get_be16(&c->buffer) << bits;
+ bits -= 16;
}
c->bits = bits;
- c->code_word = code_word;
+ return code_word;
}
-static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
+#if ARCH_X86
+#include "x86/vp56_arith.h"
+#endif
+
+#ifndef vp56_rac_get_prob
+#define vp56_rac_get_prob vp56_rac_get_prob
+static av_always_inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob)
{
- /* Don't put c->high in a local variable; if we do that, gcc gets
- * the stupids and turns the code below into a branch again. */
- unsigned int code_word = c->code_word;
+ unsigned int code_word = vp56_rac_renorm(c);
unsigned int low = 1 + (((c->high - 1) * prob) >> 8);
- unsigned int low_shift = low << 8;
+ unsigned int low_shift = low << 16;
int bit = code_word >= low_shift;
- /* Incantation to convince GCC to turn these into conditional moves
- * instead of branches -- faster, as this branch is basically
- * unpredictable. */
c->high = bit ? c->high - low : low;
- code_word = bit ? code_word - low_shift : code_word;
+ c->code_word = bit ? code_word - low_shift : code_word;
- vp56_rac_renorm(c, code_word);
return bit;
}
+#endif
-static inline int vp56_rac_get(VP56RangeCoder *c)
+// branchy variant, to be used where there's a branch based on the bit decoded
+static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob)
{
+ unsigned long code_word = vp56_rac_renorm(c);
+ unsigned low = 1 + (((c->high - 1) * prob) >> 8);
+ unsigned low_shift = low << 16;
+
+ if (code_word >= low_shift) {
+ c->high -= low;
+ c->code_word = code_word - low_shift;
+ return 1;
+ }
+
+ c->high = low;
+ c->code_word = code_word;
+ return 0;
+}
+
+static av_always_inline int vp56_rac_get(VP56RangeCoder *c)
+{
+ unsigned int code_word = vp56_rac_renorm(c);
/* equiprobable */
int low = (c->high + 1) >> 1;
- unsigned int low_shift = low << 8;
- int bit = c->code_word >= low_shift;
+ unsigned int low_shift = low << 16;
+ int bit = code_word >= low_shift;
if (bit) {
- c->high = (c->high - low) << 1;
- c->code_word -= low_shift;
+ c->high -= low;
+ code_word -= low_shift;
} else {
- c->high = low << 1;
+ c->high = low;
}
- /* normalize */
- c->code_word <<= 1;
- if (++c->bits == 0 && c->buffer < c->end) {
- c->bits = -8;
- c->code_word |= *c->buffer++;
- }
+ c->code_word = code_word;
return bit;
}
// rounding is different than vp56_rac_get, is vp56_rac_get wrong?
-static inline int vp8_rac_get(VP56RangeCoder *c)
+static av_always_inline int vp8_rac_get(VP56RangeCoder *c)
{
return vp56_rac_get_prob(c, 128);
}
-static inline int vp56_rac_gets(VP56RangeCoder *c, int bits)
+static av_unused int vp56_rac_gets(VP56RangeCoder *c, int bits)
{
int value = 0;
return value;
}
-static inline int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
+static av_unused int vp8_rac_get_uint(VP56RangeCoder *c, int bits)
{
int value = 0;
}
// fixme: add 1 bit to all the calls to this?
-static inline int vp8_rac_get_sint(VP56RangeCoder *c, int bits)
+static av_unused int vp8_rac_get_sint(VP56RangeCoder *c, int bits)
{
int v;
}
// P(7)
-static inline int vp56_rac_gets_nn(VP56RangeCoder *c, int bits)
+static av_unused int vp56_rac_gets_nn(VP56RangeCoder *c, int bits)
{
int v = vp56_rac_gets(c, 7) << 1;
return v + !v;
}
-static inline int vp8_rac_get_nn(VP56RangeCoder *c)
+static av_unused int vp8_rac_get_nn(VP56RangeCoder *c)
{
int v = vp8_rac_get_uint(c, 7) << 1;
return v + !v;
}
-static inline int vp56_rac_get_tree(VP56RangeCoder *c,
- const VP56Tree *tree,
- const uint8_t *probs)
+static av_always_inline
+int vp56_rac_get_tree(VP56RangeCoder *c,
+ const VP56Tree *tree,
+ const uint8_t *probs)
{
while (tree->val > 0) {
if (vp56_rac_get_prob(c, probs[tree->prob_idx]))
* on a node other than the root node, needed for coeff decode where this is
* used to save a bit after a 0 token (by disallowing EOB to immediately follow.)
*/
-static inline int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2],
- const uint8_t *probs, int i)
+static av_always_inline
+int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2],
+ const uint8_t *probs, int i)
{
do {
i = tree[i][vp56_rac_get_prob(c, probs[i])];
// how probabilities are associated with decisions is different I think
// well, the new scheme fits in the old but this way has one fewer branches per decision
-static inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
- const uint8_t *probs)
+static av_always_inline
+int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2],
+ const uint8_t *probs)
{
return vp8_rac_get_tree_with_offset(c, tree, probs, 0);
}
// DCTextra
-static inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob)
+static av_always_inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob)
{
int v = 0;