-#define ROT(x,s) ((x<<s)|(x>>(32-s)))
-#if 0
-static inline void mix(uint8_t state[4][4], uint32_t multbl[4][256]){
- int i;
- for(i=0; i<4; i++)
-#ifdef CONFIG_SMALL
- ((uint32_t *)(state))[i] = multbl[0][state[i][0]] ^ ROT(multbl[0][state[i][1]], 8)
- ^ROT(multbl[0][state[i][2]],16) ^ ROT(multbl[0][state[i][3]],24);
+static inline void addkey_d(uint8_t *dst, const av_aes_block *src,
+ const av_aes_block *round_key)
+{
+ AV_WN64(dst, src->u64[0] ^ round_key->u64[0]);
+ AV_WN64(dst + 8, src->u64[1] ^ round_key->u64[1]);
+}
+
+static void subshift(av_aes_block s0[2], int s, const uint8_t *box)
+{
+ av_aes_block *s1 = (av_aes_block *) (s0[0].u8 - s);
+ av_aes_block *s3 = (av_aes_block *) (s0[0].u8 + s);
+
+ s0[0].u8[ 0] = box[s0[1].u8[ 0]];
+ s0[0].u8[ 4] = box[s0[1].u8[ 4]];
+ s0[0].u8[ 8] = box[s0[1].u8[ 8]];
+ s0[0].u8[12] = box[s0[1].u8[12]];
+ s1[0].u8[ 3] = box[s1[1].u8[ 7]];
+ s1[0].u8[ 7] = box[s1[1].u8[11]];
+ s1[0].u8[11] = box[s1[1].u8[15]];
+ s1[0].u8[15] = box[s1[1].u8[ 3]];
+ s0[0].u8[ 2] = box[s0[1].u8[10]];
+ s0[0].u8[10] = box[s0[1].u8[ 2]];
+ s0[0].u8[ 6] = box[s0[1].u8[14]];
+ s0[0].u8[14] = box[s0[1].u8[ 6]];
+ s3[0].u8[ 1] = box[s3[1].u8[13]];
+ s3[0].u8[13] = box[s3[1].u8[ 9]];
+ s3[0].u8[ 9] = box[s3[1].u8[ 5]];
+ s3[0].u8[ 5] = box[s3[1].u8[ 1]];
+}
+
+static inline int mix_core(uint32_t multbl[][256], int a, int b, int c, int d){
+#if CONFIG_SMALL
+ return multbl[0][a] ^ ROT(multbl[0][b], 8) ^ ROT(multbl[0][c], 16) ^ ROT(multbl[0][d], 24);