#include <math.h>
#include <stdio.h>
#include <algorithm>
+#include <Eigen/Sparse>
+#include <Eigen/SparseQR>
+#include <Eigen/OrderingMethods>
#include "effect_chain.h"
#include "effect_util.h"
#include "resample_effect.h"
#include "util.h"
+using namespace Eigen;
using namespace std;
namespace movit {
return num_samples_saved;
}
+// Normalize so that the sum becomes one. Note that we do it twice;
+// this sometimes helps a tiny little bit when we have many samples.
+template<class T>
+void normalize_sum(Tap<T>* vals, unsigned num)
+{
+ for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
+ double sum = 0.0;
+ for (unsigned i = 0; i < num; ++i) {
+ sum += to_fp64(vals[i].weight);
+ }
+ for (unsigned i = 0; i < num; ++i) {
+ vals[i].weight = from_fp64<T>(to_fp64(vals[i].weight) / sum);
+ }
+ }
+}
+
// Make use of the bilinear filtering in the GPU to reduce the number of samples
// we need to make. This is a bit more complex than BlurEffect since we cannot combine
// two neighboring samples if their weights have differing signs, so we first need to
src_samples,
src_samples - src_bilinear_samples);
assert(int(src_samples) - int(num_samples_saved) == src_bilinear_samples);
-
- // Normalize so that the sum becomes one. Note that we do it twice;
- // this sometimes helps a tiny little bit when we have many samples.
- for (int normalize_pass = 0; normalize_pass < 2; ++normalize_pass) {
- double sum = 0.0;
- for (int i = 0; i < src_bilinear_samples; ++i) {
- sum += to_fp64(bilinear_weights_ptr[i].weight);
- }
- for (int i = 0; i < src_bilinear_samples; ++i) {
- bilinear_weights_ptr[i].weight = from_fp64<DestFloat>(
- to_fp64(bilinear_weights_ptr[i].weight) / sum);
- }
- }
+ normalize_sum(bilinear_weights_ptr, src_bilinear_samples);
}
return src_bilinear_samples;
}
// Now make use of the bilinear filtering in the GPU to reduce the number of samples
// we need to make. Try fp16 first; if it's not accurate enough, we go to fp32.
+ // Our tolerance level for total error is a bit higher than the one for invididual
+ // samples, since one would assume overall errors in the shape don't matter as much.
+ const float max_error = 2.0f / (255.0f * 255.0f);
Tap<fp16_int_t> *bilinear_weights_fp16;
src_bilinear_samples = combine_many_samples(weights, src_size, src_samples, dst_samples, &bilinear_weights_fp16);
Tap<float> *bilinear_weights_fp32 = NULL;
bilinear_weights_fp16 + y * src_bilinear_samples, src_bilinear_samples,
src_size);
max_sum_sq_error_fp16 = std::max(max_sum_sq_error_fp16, sum_sq_error_fp16);
+ if (max_sum_sq_error_fp16 > max_error) {
+ break;
+ }
}
- // Our tolerance level for total error is a bit higher than the one for invididual
- // samples, since one would assume overall errors in the shape don't matter as much.
- if (max_sum_sq_error_fp16 > 2.0f / (255.0f * 255.0f)) {
+ if (max_sum_sq_error_fp16 > max_error) {
fallback_to_fp32 = true;
src_bilinear_samples = combine_many_samples(weights, src_size, src_samples, dst_samples, &bilinear_weights_fp32);
}