temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ CLIP_SH(delta0, tc_neg, tc_pos);
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
temp1 = ((p3_src + p2_src) << 1) + p2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst0 = (v16u8) (temp2 + (v8i16) p2_src);
temp1 = temp0 + p2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - p1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst1 = (v16u8) (temp2 + (v8i16) p1_src);
temp1 = (temp0 << 1) + p2_src + q1_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - p0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst2 = (v16u8) (temp2 + (v8i16) p0_src);
dst0 = __msa_bmz_v(dst0, (v16u8) p2_src, (v16u8) p_is_pcm_vec);
temp1 = ((q3_src + q2_src) << 1) + q2_src + temp0;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q2_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst5 = (v16u8) (temp2 + (v8i16) q2_src);
temp1 = temp0 + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 2);
temp2 = (v8i16) (temp1 - q1_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst4 = (v16u8) (temp2 + (v8i16) q1_src);
temp1 = (temp0 << 1) + p1_src + q2_src;
temp1 = (v8u16) __msa_srari_h((v8i16) temp1, 3);
temp2 = (v8i16) (temp1 - q0_src);
- temp2 = CLIP_SH(temp2, tc_neg, tc_pos);
+ CLIP_SH(temp2, tc_neg, tc_pos);
dst3 = (v16u8) (temp2 + (v8i16) q0_src);
dst3 = __msa_bmz_v(dst3, (v16u8) q0_src, (v16u8) q_is_pcm_vec);
abs_delta0 = __msa_add_a_h(delta0, (v8i16) zero);
abs_delta0 = (v8u16) abs_delta0 < temp1;
- delta0 = CLIP_SH(delta0, tc_neg, tc_pos);
+ CLIP_SH(delta0, tc_neg, tc_pos);
- temp0 = (v8u16) (delta0 + p0_src);
- temp0 = (v8u16) CLIP_SH_0_255(temp0);
- temp0 = (v8u16) __msa_bmz_v((v16u8) temp0, (v16u8) p0_src,
+ temp2 = (v8i16) (delta0 + p0_src);
+ CLIP_SH_0_255(temp2);
+ temp0 = (v8u16) __msa_bmz_v((v16u8) temp2, (v16u8) p0_src,
(v16u8) p_is_pcm_vec);
temp2 = (v8i16) (q0_src - delta0);
- temp2 = CLIP_SH_0_255(temp2);
+ CLIP_SH_0_255(temp2);
temp2 = (v8i16) __msa_bmz_v((v16u8) temp2, (v16u8) q0_src,
(v16u8) q_is_pcm_vec);
delta1 -= (v8i16) p1_src;
delta1 += delta0;
delta1 >>= 1;
- delta1 = CLIP_SH(delta1, tc_neg, tc_pos);
+ CLIP_SH(delta1, tc_neg, tc_pos);
delta1 = (v8i16) p1_src + (v8i16) delta1;
- delta1 = CLIP_SH_0_255(delta1);
+ CLIP_SH_0_255(delta1);
delta1 = (v8i16) __msa_bmnz_v((v16u8) delta1, (v16u8) p1_src,
(v16u8) p_is_pcm_vec);
delta2 = delta2 - (v8i16) q1_src;
delta2 = delta2 - delta0;
delta2 = delta2 >> 1;
- delta2 = CLIP_SH(delta2, tc_neg, tc_pos);
+ CLIP_SH(delta2, tc_neg, tc_pos);
delta2 = (v8i16) q1_src + (v8i16) delta2;
- delta2 = CLIP_SH_0_255(delta2);
+ CLIP_SH_0_255(delta2);
delta2 = (v8i16) __msa_bmnz_v((v16u8) delta2, (v16u8) q1_src,
(v16u8) q_is_pcm_vec);
delta1 = (v8i16) __msa_bmz_v((v16u8) delta1, (v16u8) p1_src,
temp0 <<= 2;
temp0 += temp1;
delta = __msa_srari_h((v8i16) temp0, 3);
- delta = CLIP_SH(delta, tc_neg, tc_pos);
+ CLIP_SH(delta, tc_neg, tc_pos);
temp0 = (v8i16) ((v8i16) p0 + delta);
- temp0 = CLIP_SH_0_255(temp0);
+ CLIP_SH_0_255(temp0);
temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
(v16u8) p_is_pcm_vec);
temp1 = (v8i16) ((v8i16) q0 - delta);
- temp1 = CLIP_SH_0_255(temp1);
+ CLIP_SH_0_255(temp1);
temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
(v16u8) q_is_pcm_vec);
temp0 <<= 2;
temp0 += temp1;
delta = __msa_srari_h((v8i16) temp0, 3);
- delta = CLIP_SH(delta, tc_neg, tc_pos);
+ CLIP_SH(delta, tc_neg, tc_pos);
temp0 = (v8i16) ((v8i16) p0 + delta);
- temp0 = CLIP_SH_0_255(temp0);
+ CLIP_SH_0_255(temp0);
temp0 = (v8i16) __msa_bmz_v((v16u8) temp0, (v16u8) p0,
(v16u8) p_is_pcm_vec);
temp1 = (v8i16) ((v8i16) q0 - delta);
- temp1 = CLIP_SH_0_255(temp1);
+ CLIP_SH_0_255(temp1);
temp1 = (v8i16) __msa_bmz_v((v16u8) temp1, (v16u8) q0,
(v16u8) q_is_pcm_vec);
v16u8 cmp_minus10, diff_minus10, diff_minus11;
v16u8 src0, src1, dst0, src_minus10, src_minus11, src_plus10, src_plus11;
v16i8 offset, sao_offset = LD_SB(sao_offset_val);
+ v16i8 zeros = { 0 };
sao_offset = __msa_pckev_b(sao_offset, sao_offset);
src -= 1;
for (height -= 2; height; height -= 2) {
src += (src_stride << 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src0, src1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_plus10, src_plus11, 2);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 1, src0, src1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_plus10, src_plus11);
PCKEV_D2_UB(src_minus11, src_minus10, src_plus11, src_plus10,
src_minus10, src_plus10);
dst += dst_stride;
}
- SLDI_B2_0_UB(src_minus10, src_minus11, src0, src1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_plus10, src_plus11, 2);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 1, src0, src1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_plus10, src_plus11);
PCKEV_D2_UB(src_minus11, src_minus10, src_plus11, src_plus10, src_minus10,
src_plus10);
dst_ptr = dst + v_cnt;
LD_UB4(src_minus1, src_stride, src10, src11, src12, src13);
- SLDI_B2_SB(src10, src11, src_minus10, src_minus11, src_zero0,
- src_zero1, 1);
- SLDI_B2_SB(src12, src13, src_minus12, src_minus13, src_zero2,
- src_zero3, 1);
- SLDI_B2_SB(src10, src11, src_minus10, src_minus11, src_plus10,
- src_plus11, 2);
- SLDI_B2_SB(src12, src13, src_minus12, src_minus13, src_plus12,
- src_plus13, 2);
+ SLDI_B4_SB(src10, src_minus10, src11, src_minus11,
+ src12, src_minus12, src13, src_minus13, 1,
+ src_zero0, src_zero1, src_zero2, src_zero3);
+ SLDI_B4_SB(src10, src_minus10, src11, src_minus11,
+ src12, src_minus12, src13, src_minus13, 2,
+ src_plus10, src_plus11, src_plus12, src_plus13);
cmp_minus10 = ((v16u8) src_zero0 == src_minus10);
cmp_plus10 = ((v16u8) src_zero0 == (v16u8) src_plus10);
v16u8 src_minus11, src10, src11;
v16i8 src_plus0, src_zero0, src_plus1, src_zero1, dst0;
v8i16 offset_mask0, offset_mask1;
+ v16i8 zeros = { 0 };
sao_offset = __msa_pckev_b(sao_offset, sao_offset);
for (height -= 2; height; height -= 2) {
src_orig += (src_stride << 1);
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_SB(src10, src11, src_plus0, src_plus1, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_SB(zeros, src10, zeros, src11, 2, src_plus0, src_plus1);
ILVR_B2_UB(src_plus0, src_minus10, src_plus1, src_minus11, src_minus10,
src_minus11);
dst += dst_stride;
}
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_SB(src10, src11, src_plus0, src_plus1, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_SB(zeros, src10, zeros, src11, 2, src_plus0, src_plus1);
ILVR_B2_UB(src_plus0, src_minus10, src_plus1, src_minus11, src_minus10,
src_minus11);
v16u8 src_minus10, src10, src_minus11, src11;
v16i8 src_zero0, src_plus10, src_zero1, src_plus11, dst0;
v8i16 offset_mask0, offset_mask1;
+ v16i8 zeros = { 0 };
sao_offset = __msa_pckev_b(sao_offset, sao_offset);
src_orig = src - 1;
for (height -= 2; height; height -= 2) {
src_orig += (src_stride << 1);
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_SB(src10, src11, src_plus10, src_plus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_SB(zeros, src10, zeros, src11, 2, src_plus10, src_plus11);
ILVR_B2_UB(src_plus10, src_minus10, src_plus11, src_minus11,
src_minus10, src_minus11);
dst += dst_stride;
}
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_SB(src10, src11, src_plus10, src_plus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_SB(zeros, src10, zeros, src11, 2, src_plus10, src_plus11);
ILVR_B2_UB(src_plus10, src_minus10, src_plus11, src_minus11, src_minus10,
src_minus11);
ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
src_plus13 = LD_UB(src + 1 + v_cnt + (src_stride << 2));
src_orig += 16;
- SLDI_B2_SB(src10, src11, src_minus11, src_minus12, src_zero0,
- src_zero1, 1);
- SLDI_B2_SB(src12, src13, src_minus13, src_minus14, src_zero2,
- src_zero3, 1);
- SLDI_B2_SB(src11, src12, src_minus12, src_minus13, src_plus10,
- src_plus11, 2);
+ SLDI_B4_SB(src10, src_minus11, src11, src_minus12,
+ src12, src_minus13, src13, src_minus14, 1,
+ src_zero0, src_zero1, src_zero2, src_zero3);
+ SLDI_B2_SB(src11, src_minus12, src12, src_minus13, 2, src_plus10,
+ src_plus11);
src_plus12 = __msa_sldi_b((v16i8) src13, (v16i8) src_minus14, 2);
v16u8 cmp_minus10, diff_minus10, cmp_minus11, diff_minus11;
v16u8 src_minus10, src10, src_minus11, src11;
v8i16 offset_mask0, offset_mask1;
+ v16i8 zeros = { 0 };
sao_offset = __msa_pckev_b(sao_offset, sao_offset);
src_orig = src - 1;
for (height -= 2; height; height -= 2) {
src_orig += (src_stride << 1);
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_minus10, src_minus11);
ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
src_minus11);
dst += dst_stride;
}
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_minus10, src_minus11);
ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
src_minus11);
v16u8 src_minus10, src10, src_minus11, src11;
v16i8 src_zero0, src_zero1, dst0;
v8i16 offset_mask0, offset_mask1;
+ v16i8 zeros = { 0 };
sao_offset = __msa_pckev_b(sao_offset, sao_offset);
src_orig = src - 1;
for (height -= 2; height; height -= 2) {
src_orig += (src_stride << 1);
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_minus10, src_minus11);
ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
src_minus11);
ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,
dst += dst_stride;
}
- SLDI_B2_0_SB(src_minus11, src10, src_zero0, src_zero1, 1);
- SLDI_B2_0_UB(src_minus10, src_minus11, src_minus10, src_minus11, 2);
+ SLDI_B2_SB(zeros, src_minus11, zeros, src10, 1, src_zero0, src_zero1);
+ SLDI_B2_UB(zeros, src_minus10, zeros, src_minus11, 2, src_minus10, src_minus11);
ILVR_B2_UB(src10, src_minus10, src11, src_minus11, src_minus10,
src_minus11);
ILVR_B2_SB(src_zero0, src_zero0, src_zero1, src_zero1, src_zero0,