X-Git-Url: https://git.sesse.net/?a=blobdiff_plain;f=sor.frag;h=ef431d3346d96be4e0270ddd6ad4c9ebd1664a63;hb=40043846b1a598b9819a03bcf0c6bda4a43aa7f9;hp=e1f86bbbfea8c3f6f019857369fe220eb69e5e4c;hpb=6911c97816e4dde727ed3aa0fa0631c4047d2bd6;p=nageru

diff --git a/sor.frag b/sor.frag
index e1f86bb..ef431d3 100644
--- a/sor.frag
+++ b/sor.frag
@@ -45,8 +45,9 @@ void main()
 	// just immediately throws away half of the warp, but it helps convergence
 	// a _lot_ (rough testing indicates that five iterations of SOR is as good
 	// as ~50 iterations of Jacobi). We could probably do better by reorganizing
-	// the data into two-values-per-pixel, so-called âtwinning bufferingâ,
-	// but it makes for rather annoying code in the rest of the pipeline.
+	// the data into two-values-per-pixel, so-called âtwinned bufferingâ;
+	// seemingly, it helps Haswell by ~15% on the SOR code, but GTX 950 not at all
+	// (at least not on 720p). Presumably the latter is already bandwidth bound.
 	int color = int(round(element_sum_idx)) & 1;
 	if (color != phase) discard;