- t0 = vec_ld(0, dst);
-#if 0 //FIXME: there is still something wrong
- if (step == 2) {
- int y;
- vector float d0, d1, s3, t2;
- vector unsigned int sel =
- vec_mergeh(vec_splat_u32(-1), vec_splat_u32(0));
- t1 = vec_ld(16, dst);
- for (i=0,y=0; i<len-3; i+=4,y+=8) {
-
- s0 = vec_ld(0,src0+i);
- s1 = vec_ld(0,src1+i);
- s2 = vec_ld(0,src2+i);
-
-// t0 = vec_ld(0, dst+y); //[x x x|a]
-// t1 = vec_ld(16, dst+y); //[b c d|e]
- t2 = vec_ld(31, dst+y); //[f g h|x]
-
- d = vec_madd(s0,s1,s2); // [A B C D]
-
- // [A A B B]
-
- // [C C D D]
-
- d0 = vec_perm(t0, t1, mask); // [a b c d]
-
- d0 = vec_sel(vec_mergeh(d, d), d0, sel); // [A b B d]
-
- edges = vec_perm(t1, t0, mask);
-
- t0 = vec_perm(edges, d0, align); // [x x x|A]
-
- t1 = vec_perm(d0, edges, align); // [b B d|e]
-
- vec_stl(t0, 0, dst+y);
-
- d1 = vec_perm(t1, t2, mask); // [e f g h]
-
- d1 = vec_sel(vec_mergel(d, d), d1, sel); // [C f D h]
-
- edges = vec_perm(t2, t1, mask);
+ for (i=0; i<len-3; i+=4) {
+ t0 = vec_ld(0, dst+i);
+ t1 = vec_ld(15, dst+i);
+ s0 = vec_ld(0, src0+i);
+ s1 = vec_ld(0, src1+i);
+ s2 = vec_ld(0, src2+i);
+ edges = vec_perm(t1 ,t0, mask);
+ d = vec_madd(s0,s1,s2);
+ t1 = vec_perm(d, edges, align);
+ t0 = vec_perm(edges, d, align);
+ vec_st(t1, 15, dst+i);
+ vec_st(t0, 0, dst+i);
+ }
+}