@@ -198,140 +198,225 @@ struct RunResults {
198
198
bool file_report_aggregates_only = false ;
199
199
};
200
200
201
- RunResults RunBenchmark (
202
- const benchmark::internal::Benchmark::Instance& b,
203
- std::vector<BenchmarkReporter::Run>* complexity_reports) {
201
+ class BenchmarkRunner {
204
202
RunResults run_results;
205
203
206
- const bool has_explicit_iteration_count = b.iterations != 0 ;
207
- size_t iters = has_explicit_iteration_count ? b.iterations : 1 ;
208
- std::unique_ptr<internal::ThreadManager> manager;
209
- std::vector<std::thread> pool (b.threads - 1 );
210
- const int repeats =
211
- b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions;
212
- if (repeats != 1 ) {
213
- run_results.display_report_aggregates_only =
214
- (FLAGS_benchmark_report_aggregates_only ||
215
- FLAGS_benchmark_display_aggregates_only);
216
- run_results.file_report_aggregates_only =
217
- FLAGS_benchmark_report_aggregates_only;
218
- if (b.aggregation_report_mode != internal::ARM_Unspecified) {
219
- run_results.display_report_aggregates_only =
220
- (b.aggregation_report_mode &
221
- internal::ARM_DisplayReportAggregatesOnly);
222
- run_results.file_report_aggregates_only =
223
- (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly);
204
+ const benchmark::internal::Benchmark::Instance& b;
205
+ std::vector<BenchmarkReporter::Run>& complexity_reports;
206
+
207
+ const double min_time;
208
+ const int repeats;
209
+ const bool has_explicit_iteration_count;
210
+
211
+ std::vector<std::thread> pool;
212
+
213
+ size_t iters; // preserved between repetitions!
214
+ // So only the first repetition has to find/calculate it,
215
+ // the other repetitions will just use that precomputed iteration count.
216
+
217
+ struct IterationResults {
218
+ internal::ThreadManager::Result results;
219
+ size_t iters;
220
+ double seconds;
221
+ };
222
+ IterationResults doNIterations () {
223
+ VLOG (2 ) << " Running " << b.name << " for " << iters << " \n " ;
224
+
225
+ std::unique_ptr<internal::ThreadManager> manager;
226
+ manager.reset (new internal::ThreadManager (b.threads ));
227
+
228
+ // Run all but one thread in separate threads
229
+ for (std::size_t ti = 0 ; ti < pool.size (); ++ti) {
230
+ pool[ti] = std::thread (&RunInThread, &b, iters, static_cast <int >(ti + 1 ),
231
+ manager.get ());
224
232
}
233
+ // And run one thread here directly.
234
+ // (If we were asked to run just one thread, we don't create new threads.)
235
+ // Yes, we need to do this here *after* we start the separate threads.
236
+ RunInThread (&b, iters, 0 , manager.get ());
237
+
238
+ // The main thread has finished. Now let's wait for the other threads.
239
+ manager->WaitForAllThreads ();
240
+ for (std::thread& thread : pool) thread.join ();
241
+
242
+ IterationResults i;
243
+ // Acquire the measurements/counters from the manager, UNDER THE LOCK!
244
+ {
245
+ MutexLock l (manager->GetBenchmarkMutex ());
246
+ i.results = manager->results ;
247
+ }
248
+
249
+ // And get rid of the manager.
250
+ manager.reset ();
251
+
252
+ // Adjust real/manual time stats since they were reported per thread.
253
+ i.results .real_time_used /= b.threads ;
254
+ i.results .manual_time_used /= b.threads ;
255
+
256
+ VLOG (2 ) << " Ran in " << i.results .cpu_time_used << " /"
257
+ << i.results .real_time_used << " \n " ;
258
+
259
+ // So for how long were we running?
260
+ i.iters = iters;
261
+ // Base decisions off of real time if requested by this benchmark.
262
+ i.seconds = i.results .cpu_time_used ;
263
+ if (b.use_manual_time ) {
264
+ i.seconds = i.results .manual_time_used ;
265
+ } else if (b.use_real_time ) {
266
+ i.seconds = i.results .real_time_used ;
267
+ }
268
+
269
+ return i;
270
+ }
271
+
272
+ size_t predictNumItersNeeded (const IterationResults& i) const {
273
+ // See how much iterations should be increased by.
274
+ // Note: Avoid division by zero with max(seconds, 1ns).
275
+ double multiplier = min_time * 1.4 / std::max (i.seconds , 1e-9 );
276
+ // If our last run was at least 10% of FLAGS_benchmark_min_time then we
277
+ // use the multiplier directly.
278
+ // Otherwise we use at most 10 times expansion.
279
+ // NOTE: When the last run was at least 10% of the min time the max
280
+ // expansion should be 14x.
281
+ bool is_significant = (i.seconds / min_time) > 0.1 ;
282
+ multiplier = is_significant ? multiplier : std::min (10.0 , multiplier);
283
+ if (multiplier <= 1.0 ) multiplier = 2.0 ;
284
+
285
+ // So what seems to be the sufficiently-large iteration count? Round up.
286
+ const size_t max_next_iters =
287
+ 0.5 + std::max (multiplier * i.iters , i.iters + 1.0 );
288
+ // But we do have *some* sanity limits though..
289
+ const size_t next_iters = std::min (max_next_iters, kMaxIterations );
290
+
291
+ VLOG (3 ) << " Next iters: " << next_iters << " , " << multiplier << " \n " ;
292
+ return next_iters; // round up before conversion to integer.
293
+ }
294
+
295
+ bool shouldReportIterationResults (const IterationResults& i) const {
296
+ // Determine if this run should be reported;
297
+ // Either it has run for a sufficient amount of time
298
+ // or because an error was reported.
299
+ return i.results .has_error_ ||
300
+ i.iters >= kMaxIterations || // Too many iterations already.
301
+ i.seconds >= min_time || // The elapsed time is large enough.
302
+ // CPU time is specified but the elapsed real time greatly exceeds
303
+ // the minimum time.
304
+ // Note that user provided timers are except from this sanity check.
305
+ ((i.results .real_time_used >= 5 * min_time) && !b.use_manual_time );
225
306
}
226
- for (int repetition_num = 0 ; repetition_num < repeats; repetition_num++) {
307
+
10000
308
+ void doOneRepetition (bool not_in_the_first_repetition) {
309
+ IterationResults i;
310
+
311
+ // We *may* be gradually increasing the length (iteration count)
312
+ // of the benchmark until we decide the results are significant.
313
+ // And once we do, we report those last results and exit.
314
+ // Please do note that the if there are repetitions, the iteration count
315
+ // is *only* calculated for the *first* repetition, and other repetitions
316
+ // simply use that precomputed iteration count.
227
317
for (;;) {
228
- // Try benchmark
229
- VLOG (2 ) << " Running " << b.name << " for " << iters << " \n " ;
318
+ i = doNIterations ();
319
+
320
+ // Do we consider the results to be significant?
321
+ // If we are doing repetitions, and the first repetition was already done,
322
+ // it has calculated the correct iteration time, so we have run that very
323
+ // iteration count just now. No need to calculate anything. Just report.
324
+ // Else, the normal rules apply.
325
+ const bool results_are_significant = not_in_the_first_repetition ||
326
+ has_explicit_iteration_count ||
327
+ shouldReportIterationResults (i);
328
+
329
+ if (results_are_significant) break ; // Good, let's report them!
330
+
331
+ // Nope, bad iteration. Let's re-estimate the hopefully-sufficient
332
+ // iteration count, and run the benchmark again...
333
+
334
+ iters = predictNumItersNeeded (i);
335
+ assert (iters > i.iters &&
336
+ " if we did more iterations than we want to do the next time, "
337
+ " then we should have accepted the current iteration run." );
338
+ }
230
339
231
- manager.reset (new internal::ThreadManager (b.threads ));
232
- for (std::size_t ti = 0 ; ti < pool.size (); ++ti) {
233
- pool[ti] = std::thread (&RunInThread, &b, iters,
234
- static_cast <int >(ti + 1 ), manager.get ());
235
- }
236
- RunInThread (&b, iters, 0 , manager.get ());
340
+ // Oh, one last thing, we need to also produce the 'memory measurements'..
341
+ MemoryManager::Result memory_result;
342
+ size_t memory_iterations = 0 ;
343
+ if (memory_manager != nullptr ) {
344
+ // Only run a few iterations to reduce the impact of one-time
345
+ // allocations in benchmarks that are not properly managed.
346
+ memory_iterations = std::min<size_t >(16 , iters);
347
+ memory_manager->Start ();
348
+ std::unique_ptr<internal::ThreadManager> manager;
349
+ manager.reset (new internal::ThreadManager (1 ));
350
+ RunInThread (&b, memory_iterations, 0 , manager.get ());
237
351
manager->WaitForAllThreads ();
238
- for (std::thread& thread : pool) thread.join ();
239
- internal::ThreadManager::Result results;
240
- {
241
- MutexLock l (manager->GetBenchmarkMutex ());
242
- results = manager->results ;
243
- }
244
352
manager.reset ();
245
- // Adjust real/manual time stats since they were reported per thread.
246
- results.real_time_used /= b.threads ;
247
- results.manual_time_used /= b.threads ;
248
-
249
- VLOG (2 ) << " Ran in " << results.cpu_time_used << " /"
250
- << results.real_time_used << " \n " ;
251
-
252
- // Base decisions off of real time if requested by this benchmark.
253
- double seconds = results.cpu_time_used ;
254
- if (b.use_manual_time ) {
255
- seconds = results.manual_time_used ;
256
- } else if (b.use_real_time ) {
257
- seconds = results.real_time_used ;
258
- }
259
353
260
- const double min_time =
261
- !IsZero (b.min_time ) ? b.min_time : FLAGS_benchmark_min_time;
262
-
263
- // clang-format off
264
- // turn off clang-format since it mangles prettiness here
265
- // Determine if this run should be reported; Either it has
266
- // run for a sufficient amount of time or because an error was reported.
267
- const bool should_report = repetition_num > 0
268
- || has_explicit_iteration_count // An exact iteration count was requested
269
- || results.has_error_
270
- || iters >= kMaxIterations // No chance to try again, we hit the limit.
271
- || seconds >= min_time // the elapsed time is large enough
272
- // CPU time is specified but the elapsed real time greatly exceeds the
273
- // minimum time. Note that user provided timers are except from this
274
- // sanity check.
275
- || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time );
276
- // clang-format on
277
-
278
- if (should_report) {
279
- MemoryManager::Result memory_result;
280
- size_t memory_iterations = 0 ;
281
- if (memory_manager != nullptr ) {
282
- // Only run a few iterations to reduce the impact of one-time
283
- // allocations in benchmarks that are not properly managed.
284
- memory_iterations = std::min<size_t >(16 , iters);
285
- memory_manager->Start ();
286
- manager.reset (new internal::ThreadManager (1 ));
287
- RunInThread (&b, memory_iterations, 0 , manager.get ());
288
- manager->WaitForAllThreads ();
289
- manager.reset ();
290
-
291
- memory_manager->Stop (&memory_result);
292
- }
293
-
294
- BenchmarkReporter::Run report = CreateRunReport (
295
- b, results, memory_iterations, memory_result, seconds);
296
- if (!report.error_occurred && b.complexity != oNone)
297
- complexity_reports->push_back (report);
298
- run_results.non_aggregates .push_back (report);
299
- break ;
300
- }
354
+ memory_manager->Stop (&memory_result);
355
+ }
356
+
357
+ // Ok, now actualy report.
358
+ BenchmarkReporter::Run report = CreateRunReport (
359
+ b, i.results , memory_iterations, memory_result, i.seconds );
360
+
361
+ if (!report.error_occurred && b.complexity != oNone)
362
+ complexity_reports.push_back (report);
363
+
364
+ run_results.non_aggregates .push_back (report);
365
+ }
301
366
302
- // See how much iterations should be increased by
303
- // Note: Avoid division by zero with max(seconds, 1ns).
304
- double multiplier = min_time * 1.4 / std::max (seconds, 1e-9 );
305
- // If our last run was at least 10% of FLAGS_benchmark_min_time then we
306
- // use the multiplier directly. Otherwise we use at most 10 times
307
- // expansion.
308
- // NOTE: When the last run was at least 10% of the min time the max
309
- // expansion should be 14x.
310
- bool is_significant = (seconds / min_time) > 0.1 ;
311
- multiplier = is_significant ? multiplier : std::min (10.0 , multiplier);
312
- if (multiplier <= 1.0 ) multiplier = 2.0 ;
313
- double next_iters = std::max (multiplier * iters, iters + 1.0 );
314
- if (next_iters > kMaxIterations ) {
315
- next_iters = kMaxIterations ;
367
+ public:
368
+ BenchmarkRunner (const benchmark::internal::Benchmark::Instance& b_,
369
+ std::vector<BenchmarkReporter::Run>* complexity_reports_)
370
+ : b(b_),
371
+ complexity_reports (*complexity_reports_),
372
+ min_time(!IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time),
373
+ repeats(b.repetitions != 0 ? b.repetitions
374
+ : FLAGS_benchmark_repetitions),
375
+ has_explicit_iteration_count(b.iterations != 0 ),
376
+ pool(b.threads - 1 ),
377
+ iters(has_explicit_iteration_count ? b.iterations : 1 ) {
378
+ if (repeats != 1 ) {
379
+ run_results.display_report_aggregates_only =
380
+ (FLAGS_benchmark_report_aggregates_only ||
381
+ FLAGS_benchmark_display_aggregates_only);
382
+ run_results.file_report_aggregates_only =
383
+ FLAGS_benchmark_report_aggregates_only;
384
+ if (b.aggregation_report_mode != internal::ARM_Unspecified) {
385
+ run_results.display_report_aggregates_only =
386
+ (b.aggregation_report_mode &
387
+ internal::ARM_DisplayReportAggregatesOnly);
388
+ run_results.file_report_aggregates_only =
389
+ (b.aggregation_report_mode &
390
+ internal::ARM_FileReportAggregatesOnly);
316
391
}
317
- VLOG (3 ) << " Next iters: " << next_iters << " , " << multiplier << " \n " ;
318
- iters = static_cast <int >(next_iters + 0.5 );
319
392
}
320
- }
321
393
322
- // Calculate additional statistics
323
- run_results.aggregates_only = ComputeStats (run_results.non_aggregates );
394
+ for (int repetition_num = 0 ; repetition_num < repeats; repetition_num++) {
395
+ const bool not_in_the_first_repetition = repetition_num != 0 ;
396
+ doOneRepetition (not_in_the_first_repetition);
397
+ }
398
+
399
+ // Calculate additional statistics
400
+ run_results.aggregates_only = ComputeStats (run_results.non_aggregates );
324
401
325
- // Maybe calculate complexity report
326
- if ((b.complexity != oNone) && b.last_benchmark_instance ) {
327
- auto additional_run_stats = ComputeBigO (*complexity_reports);
328
- run_results.aggregates_only .insert (run_results.aggregates_only .end (),
329
- additional_run_stats.begin (),
330
- additional_run_stats.end ());
331
- complexity_reports->clear ();
402
+ // Maybe calculate complexity report
403
+ if ((b.complexity != oNone) && b.last_benchmark_instance ) {
404
+ auto additional_run_stats = ComputeBigO (complexity_reports);
405
+ run_results.aggregates_only .insert (run_results.aggregates_only .end (),
406
+ additional_run_stats.begin (),
407
+ additional_run_stats.end ());
408
+ complexity_reports.clear ();
409
+ }
332
410
}
333
411
334
- return run_results;
412
+ RunResults getResults () { return run_results; }
413
+ };
414
+
415
+ RunResults RunBenchmark (
416
+ const benchmark::internal::Benchmark::Instance& b,
417
+ std::vector<BenchmarkReporter::Run>* complexity_reports) {
418
+ BenchmarkRunner r (b, complexity_reports);
419
+ return r.getResults ();
335
420
}
336
421
337
422
} // namespace
0 commit comments