1 // This file is part of BOINC.
2 // http://boinc.berkeley.edu
3 // Copyright (C) 2008 University of California
5 // BOINC is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU Lesser General Public License
7 // as published by the Free Software Foundation,
8 // either version 3 of the License, or (at your option) any later version.
10 // BOINC is distributed in the hope that it will be useful,
11 // but WITHOUT ANY WARRANTY; without even the implied warranty of
12 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
13 // See the GNU Lesser General Public License for more details.
15 // You should have received a copy of the GNU Lesser General Public License
16 // along with BOINC. If not, see <http://www.gnu.org/licenses/>.
18 // Code to facilitate writing validators.
19 // Can be used as the basis for a validator that accepts everything
20 // (see sample_trivial_validator.C),
21 // or that requires strict equality (see sample_bitwise_validator.C)
22 // or that uses fuzzy comparison.
27 #include "error_numbers.h"
32 #include "sched_util.h"
33 #include "sched_config.h"
34 #include "sched_msgs.h"
35 #include "validator.h"
36 #include "validate_util.h"
41 int FILE_INFO::parse(XML_PARSER& xp) {
43 bool is_tag, found=false;
46 while (!xp.get(tag, sizeof(tag), is_tag)) {
47 if (!is_tag) continue;
48 if (!strcmp(tag, "/file_ref")) {
49 return found?0:ERR_XML_PARSE;
51 if (xp.parse_string(tag, "file_name", name)) {
55 if (xp.parse_bool(tag, "optional", optional)) continue;
56 if (xp.parse_bool(tag, "no_validate", no_validate)) continue;
61 int get_output_file_info(RESULT& result, FILE_INFO& fi) {
62 char tag[256], path[1024];
66 mf.init_buf_read(result.xml_doc_in);
68 while (!xp.get(tag, sizeof(tag), is_tag)) {
69 if (!is_tag) continue;
70 if (!strcmp(tag, "file_ref")) {
71 int retval = fi.parse(xp);
72 if (retval) return retval;
74 fi.name.c_str(), config.upload_dir, config.uldl_dir_fanout, path
83 int get_output_file_infos(RESULT& result, vector<FILE_INFO>& fis) {
84 char tag[256], path[1024];
88 mf.init_buf_read(result.xml_doc_in);
91 while (!xp.get(tag, sizeof(tag), is_tag)) {
92 if (!is_tag) continue;
93 if (!strcmp(tag, "file_ref")) {
95 int retval = fi.parse(xp);
96 if (retval) return retval;
98 fi.name.c_str(), config.upload_dir, config.uldl_dir_fanout, path
107 int get_output_file_path(RESULT& result, string& path) {
109 int retval = get_output_file_info(result, fi);
110 if (retval) return retval;
115 int get_output_file_paths(RESULT& result, vector<string>& paths) {
116 vector<FILE_INFO> fis;
117 int retval = get_output_file_infos(result, fis);
118 if (retval) return retval;
120 for (unsigned int i=0; i<fis.size(); i++) {
121 paths.push_back(fis[i].path);
129 int parse(XML_PARSER& xp) {
133 strcpy(file_name, "");
134 strcpy(open_name, "");
135 while (!xp.get(tag, sizeof(tag), is_tag)) {
136 if (!is_tag) continue;
137 if (!strcmp(tag, "/file_ref")) {
140 if (xp.parse_str(tag, "file_name", file_name, sizeof(file_name))) continue;
141 if (xp.parse_str(tag, "open_name", open_name, sizeof(open_name))) continue;
143 return ERR_XML_PARSE;
147 // given a path returned by the above, get the corresponding logical name
149 int get_logical_name(RESULT& result, string& path, string& name) {
150 char phys_name[1024];
156 mf.init_buf_read(result.xml_doc_in);
159 strcpy(phys_name, path.c_str());
160 char* p = strrchr(phys_name, '/');
161 if (!p) return ERR_NOT_FOUND;
162 strcpy(phys_name, p+1);
164 while (!xp.get(tag, sizeof(tag), is_tag)) {
165 if (!is_tag) continue;
166 if (!strcmp(tag, "result")) continue;
167 if (!strcmp(tag, "file_ref")) {
169 retval = fr.parse(xp);
170 if (retval) continue;
171 if (!strcmp(phys_name, fr.file_name)) {
177 xp.skip_unexpected(tag, false, 0);
179 return ERR_XML_PARSE;
182 #define CREDIT_EPSILON .001
184 // If we have N correct results with nonzero claimed credit,
185 // compute a canonical credit as follows:
186 // - if N==0 (all claimed credits are infinitesmal), return CREDIT_EPSILON
187 // - if N==1, return that credit
188 // - if N==2, return min
189 // - if N>2, toss out min and max, return average of rest
191 double median_mean_credit(WORKUNIT& /*wu*/, vector<RESULT>& results) {
192 int ilow=-1, ihigh=-1;
193 double credit_low = 0, credit_high = 0;
197 for (i=0; i<results.size(); i++) {
198 RESULT& result = results[i];
199 if (result.validate_state != VALIDATE_STATE_VALID) continue;
200 if (result.claimed_credit < CREDIT_EPSILON) continue;
203 credit_low = credit_high = result.claimed_credit;
205 if (result.claimed_credit < credit_low) {
207 credit_low = result.claimed_credit;
209 if (result.claimed_credit > credit_high) {
211 credit_high = result.claimed_credit;
219 return CREDIT_EPSILON;
225 for (i=0; i<results.size(); i++) {
226 if (i == (unsigned int) ilow) continue;
227 if (i == (unsigned int) ihigh) continue;
228 RESULT& result = results[i];
229 if (result.validate_state != VALIDATE_STATE_VALID) continue;
231 sum += result.claimed_credit;
233 return sum/(nvalid-2);
237 int get_credit_from_wu(WORKUNIT& wu, vector<RESULT>&, double& credit) {
243 retval = dbwu.get_field_str("xml_doc", dbwu.xml_doc, sizeof(dbwu.xml_doc));
245 if (parse_double(dbwu.xml_doc, "<credit>", x)) {
250 return ERR_XML_PARSE;
253 // This function should be called from the validator whenever credit
254 // is granted to a host. It's purpose is to track the average credit
255 // per cpu time for that host.
257 // It updates an exponentially-decaying estimate of credit_per_cpu_sec
258 // Note that this does NOT decay with time, but instead decays with
259 // total credits earned. If a host stops earning credits, then this
260 // quantity stops decaying. So credit_per_cpu_sec must NOT be
261 // periodically decayed using the update_stats utility or similar
264 // The intended purpose is for cross-project credit comparisons on
265 // BOINC statistics pages, for hosts attached to multiple machines.
266 // One day people will write PhD theses on how to normalize credit
267 // values to equalize them across projects. I hope this will be done
268 // according to "Allen's principle": "Credits granted by a project
269 // should be normalized so that, averaged across all hosts attached to
270 // multiple projects, projects grant equal credit per cpu second."
271 // This principle ensures that (on average) participants will choose
272 // projects based on merit, not based on credits. It also ensures
273 // that (on average) host machines migrate to the projects for which
274 // they are best suited.
276 // For cross-project comparison the value of credit_per_cpu_sec should
277 // be exported in the statistics file host_id.gz, which is written by
278 // the code in db_dump.C.
280 // Algorithm: credits_per_cpu_second should be updated each time that
281 // a host is granted credit, according to:
283 // CREDIT_AVERAGE_CONST = 500 [see Note 5]
284 // MAX_CREDIT_PER_CPU_SEC = 0.1 [see Note 6]
286 // e = tanh(granted_credit/CREDIT_AVERAGE_CONST)
287 // if (e < 0) then e = 0
288 // if (e > 1) then e = 1
289 // if (credit_per_cpu_sec <= 0) then e = 1
290 // if (cpu_time <= 0) then e = 0 [see Note 4]
291 // if (granted_credit <= 0) then e = 0 [see Note 3]
293 // rate = granted_credit/cpu_time
294 // if (rate < 0) rate = 0
295 // if (rate > MAX_CREDIT_PER_CPU_SEC) rate = MAX_CREDIT_PER_CPU_SEC
297 // credit_per_cpu_sec = e * rate + (1 - e) * credit_per_cpu_sec
299 // Note 0: all quantities above should be treated as real numbers
300 // Note 1: cpu_time is measured in seconds
301 // Note 2: When a host is created, the initial value of
302 // credit_per_cpu_sec, should be zero.
303 // Note 3: If a host has done invalid work (granted_credit==0) we have
304 // chosen not to include it. One might argue that the
305 // boundary case granted_credit==0 should be treated the same
306 // as granted_credit>0. However the goal here is not to
307 // identify cpus whose host machines sometimes produce
308 // rubbish. It is to get a measure of how effectively the cpu
309 // runs the application code.
310 // Note 4: e==0 means 'DO NOT include the first term on the rhs of the
311 // equation defining credit_per_cpu_sec' which is equivalent
312 // to 'DO NOT update credit_per_cpu_sec'.
313 // Note 5: CREDIT_AVERAGE_CONST determines the exponential decay
314 // credit used in averaging credit_per_cpu_sec. It may be
315 // changed at any time, even if the project database has
316 // already been populated with non-zero values of
317 // credit_per_cpu_sec.
318 // Note 6: Typical VERY FAST cpus have credit_per_cpu_sec of around
319 // 0.02. This is a safety mechanism designed to prevent
320 // trouble if a client or host has reported absurd values (due
321 // to a bug in client or server software or by cheating). In
322 // five years when cpus are five time faster, please increase
323 // the value of R. You may also want to increase the value of
324 // CREDIT_AVERAGE_CONST.
326 // Nonzero return value: host exceeded the max allowed
329 int update_credit_per_cpu_sec(
330 double granted_credit, // credit granted for this work
331 double cpu_time, // cpu time (seconds) used for this work
332 double& credit_per_cpu_sec // (average) credit per cpu second
336 // Either of these values may be freely changed in the future.
337 // When CPUs get much faster one must increase the 'sanity-check'
338 // value of max_credit_per_cpu_sec. At that time it would also
339 // make sense to proportionally increase the credit_average_const.
341 const double credit_average_const = 500;
342 const double max_credit_per_cpu_sec = 0.07;
344 double e = tanh(granted_credit/credit_average_const);
345 if (e <= 0.0 || cpu_time == 0.0 || granted_credit == 0.0) return retval;
346 if (e > 1.0 || credit_per_cpu_sec == 0.0) e = 1.0;
348 double rate = granted_credit/cpu_time;
349 if (rate < 0.0) rate = 0.0;
350 if (rate > max_credit_per_cpu_sec) {
351 rate = max_credit_per_cpu_sec;
355 credit_per_cpu_sec = e * rate + (1.0 - e) * credit_per_cpu_sec;
360 double stddev_credit(WORKUNIT& wu, std::vector<RESULT>& results) {
361 double credit_low_bound = 0, credit_high_bound = 0;
362 double penalize_credit_high_bound = 0;
363 double credit_avg = 0;
371 for (i=0; i<results.size(); i++) {
372 RESULT& result = results[i];
373 if (result.validate_state != VALIDATE_STATE_VALID) continue;
374 credit = credit + result.claimed_credit;
379 return CREDIT_EPSILON;
382 credit_avg = credit/nvalid;
385 //calculate stddev difference
386 for (i=0; i<results.size(); i++) {
387 RESULT& result = results[i];
388 if (result.validate_state != VALIDATE_STATE_VALID) continue;
389 std_dev = pow(credit_avg - result.claimed_credit,2) + std_dev;
393 std_dev = std_dev/ (double) nvalid;
394 std_dev = sqrt(std_dev);
396 credit_low_bound = credit_avg-std_dev;
397 if (credit_low_bound > credit_avg*.85) {
398 credit_low_bound = credit_avg*.85;
400 credit_low_bound = credit_low_bound - 2.5;
401 if (credit_low_bound < 1) credit_low_bound = 1;
403 credit_high_bound = credit_avg+std_dev;
404 if (credit_high_bound < credit_avg*1.15) {
405 credit_high_bound = credit_avg*1.15;
407 credit_high_bound = credit_high_bound + 5;
412 for (i=0; i<results.size(); i++) {
413 RESULT& result = results[i];
414 if (result.validate_state != VALIDATE_STATE_VALID) continue;
415 if (result.claimed_credit < credit_high_bound && result.claimed_credit > credit_low_bound) {
416 credit = credit + result.claimed_credit;
419 log_messages.printf(MSG_NORMAL,
420 "[RESULT#%d %s] CREDIT_CALC_SD Discarding invalid credit %.1lf, avg %.1lf, low %.1lf, high %.1lf \n",
421 result.id, result.name, result.claimed_credit,
422 credit_avg, credit_low_bound, credit_high_bound
430 grant_credit = median_mean_credit(wu, results);
434 grant_credit = credit/nvalid;
435 old = median_mean_credit(wu, results);
439 if (old > grant_credit) {
440 log_messages.printf(MSG_DEBUG,
441 "CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf Less awarded\n",
444 } else if (old == grant_credit) {
445 log_messages.printf(MSG_DEBUG,
446 "CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf Same awarded\n",
450 log_messages.printf(MSG_DEBUG,
451 "CREDIT_CALC_VAL New Method grant: %.1lf Old Method grant: %.1lf More awarded\n",
456 // penalize hosts that are claiming too much
457 penalize_credit_high_bound = grant_credit+1.5*std_dev;
458 if (penalize_credit_high_bound < grant_credit*1.65) {
459 penalize_credit_high_bound = grant_credit*1.65;
461 penalize_credit_high_bound = penalize_credit_high_bound + 20;
463 for (i=0; i<results.size(); i++) {
464 RESULT& result = results[i];
465 if (result.validate_state != VALIDATE_STATE_VALID) continue;
466 if (result.claimed_credit > penalize_credit_high_bound) {
467 result.granted_credit = grant_credit * 0.5;
468 log_messages.printf(MSG_NORMAL,
469 "[RESULT#%d %s] CREDIT_CALC_PENALTY Penalizing host for too high credit %.1lf, grant %.1lf, penalize %.1lf, stddev %.1lf, avg %.1lf, low %.1lf, high %.1lf \n",
470 result.id, result.name, result.claimed_credit, grant_credit,
471 penalize_credit_high_bound, std_dev, credit_avg,
472 credit_low_bound, credit_high_bound
480 double two_credit(WORKUNIT& wu, std::vector<RESULT>& results) {
483 double credit_avg = 0;
484 double last_credit = 0;
489 for (i=0; i<results.size(); i++) {
490 RESULT& result = results[i];
491 if (result.validate_state != VALIDATE_STATE_VALID) continue;
492 credit = credit + result.claimed_credit;
493 last_credit = result.claimed_credit;
498 return CREDIT_EPSILON;
501 credit_avg = credit/nvalid;
503 // If more then 2 valid results, compute via stddev method
504 if (nvalid > 2) return stddev_credit(wu, results);
505 log_messages.printf(MSG_DEBUG,
506 "[WORKUNIT#%d %s] Only 2 results \n",wu.id, wu.name
509 // If only 2, then check to see if range is reasonable
510 if (fabs(last_credit - credit_avg) < 0.15*credit_avg) return credit_avg;
511 log_messages.printf(MSG_DEBUG,
512 "[WORKUNIT#%d %s] Average is more than 15 percent from each value \n",
516 // log data on large variance in runtime
517 float cpu_time = 0.0;
518 for (i=0; i<results.size(); i++) {
519 RESULT& result = results[i];
520 if (result.validate_state != VALIDATE_STATE_VALID) continue;
521 if (result.cpu_time < 30) continue;
523 cpu_time = result.cpu_time*1.0;
525 if (cpu_time/result.cpu_time > 2 || cpu_time/result.cpu_time < 0.5) {
526 log_messages.printf(MSG_DEBUG,
527 "[WORKUNIT#%d %s] Large difference in runtime \n",
535 //find result with smallest deviation from historical credit and award that value
537 double deviation = -1;
538 grant_credit = credit_avg; // default award in case nobody matches the cases
539 for (i=0; i<results.size(); i++) {
540 RESULT& result = results[i];
541 if (result.validate_state != VALIDATE_STATE_VALID) continue;
542 host.lookup_id(result.hostid);
543 log_messages.printf(MSG_DEBUG,
544 "[RESULT#%d %s] Claimed Credit = %.2lf Historical Credit = %.2lf \n",
545 result.id, result.name, result.claimed_credit,
546 result.cpu_time*host.credit_per_cpu_sec
548 if ((deviation < 0 || deviation > fabs(result.claimed_credit - result.cpu_time*host.credit_per_cpu_sec)) && result.cpu_time > 30) {
549 deviation = fabs(result.claimed_credit - result.cpu_time*host.credit_per_cpu_sec);
550 log_messages.printf(MSG_NORMAL,
551 "[RESULT#%d %s] Credit deviation = %.2lf \n",
552 result.id, result.name, deviation
554 grant_credit = result.claimed_credit;
557 log_messages.printf(MSG_DEBUG,
558 "[WORKUNIT#%d %s] Credit granted = %.2lf \n",
559 wu.id, wu.name, grant_credit
564 const char *BOINC_RCSID_07049e8a0e = "$Id: validate_util.cpp 16069 2008-09-26 18:20:24Z davea $";