|
14 | 14 |
|
15 | 15 | #include "benchmark/benchmark.h"
|
16 | 16 | #include "benchmark_api_internal.h"
|
| 17 | +#include "benchmark_runner.h" |
17 | 18 | #include "internal_macros.h"
|
18 | 19 |
|
19 | 20 | #ifndef BENCHMARK_OS_WINDOWS
|
@@ -113,226 +114,11 @@ DEFINE_int32(v, 0, "The level of verbose logging to output");
|
113 | 114 |
|
114 | 115 | namespace benchmark {
|
115 | 116 |
|
116 |
| -namespace { |
117 |
| -static const size_t kMaxIterations = 1000000000; |
118 |
| - |
119 |
| -static MemoryManager* memory_manager = nullptr; |
120 |
| -} // end namespace |
121 |
| - |
122 | 117 | namespace internal {
|
123 | 118 |
|
| 119 | +// FIXME: wouldn't LTO mess this up? |
124 | 120 | void UseCharPointer(char const volatile*) {}
|
125 | 121 |
|
126 |
| -namespace { |
127 |
| - |
128 |
| -BenchmarkReporter::Run CreateRunReport( |
129 |
| - const benchmark::internal::BenchmarkInstance& b, |
130 |
| - const internal::ThreadManager::Result& results, size_t memory_iterations, |
131 |
| - const MemoryManager::Result& memory_result, double seconds) { |
132 |
| - // Create report about this benchmark run. |
133 |
| - BenchmarkReporter::Run report; |
134 |
| - |
135 |
| - report.run_name = b.name; |
136 |
| - report.error_occurred = results.has_error_; |
137 |
| - report.error_message = results.error_message_; |
138 |
| - report.report_label = results.report_label_; |
139 |
| - // This is the total iterations across all threads. |
140 |
| - report.iterations = results.iterations; |
141 |
| - report.time_unit = b.time_unit; |
142 |
| - |
143 |
| - if (!report.error_occurred) { |
144 |
| - if (b.use_manual_time) { |
145 |
| - report.real_accumulated_time = results.manual_time_used; |
146 |
| - } else { |
147 |
| - report.real_accumulated_time = results.real_time_used; |
148 |
| - } |
149 |
| - report.cpu_accumulated_time = results.cpu_time_used; |
150 |
| - report.complexity_n = results.complexity_n; |
151 |
| - report.complexity = b.complexity; |
152 |
| - report.complexity_lambda = b.complexity_lambda; |
153 |
| - report.statistics = b.statistics; |
154 |
| - report.counters = results.counters; |
155 |
| - |
156 |
| - if (memory_iterations > 0) { |
157 |
| - report.has_memory_result = true; |
158 |
| - report.allocs_per_iter = |
159 |
| - memory_iterations ? static_cast<double>(memory_result.num_allocs) / |
160 |
| - memory_iterations |
161 |
| - : 0; |
162 |
| - report.max_bytes_used = memory_result.max_bytes_used; |
163 |
| - } |
164 |
| - |
165 |
| - internal::Finish(&report.counters, results.iterations, seconds, b.threads); |
166 |
| - } |
167 |
| - return report; |
168 |
| -} |
169 |
| - |
170 |
| -// Execute one thread of benchmark b for the specified number of iterations. |
171 |
| -// Adds the stats collected for the thread into *total. |
172 |
| -void RunInThread(const BenchmarkInstance* b, size_t iters, int thread_id, |
173 |
| - ThreadManager* manager) { |
174 |
| - internal::ThreadTimer timer; |
175 |
| - State st = b->Run(iters, thread_id, &timer, manager); |
176 |
| - CHECK(st.iterations() >= st.max_iterations) |
177 |
| - << "Benchmark returned before State::KeepRunning() returned false!"; |
178 |
| - { |
179 |
| - MutexLock l(manager->GetBenchmarkMutex()); |
180 |
| - internal::ThreadManager::Result& results = manager->results; |
181 |
| - results.iterations += st.iterations(); |
182 |
| - results.cpu_time_used += timer.cpu_time_used(); |
183 |
| - results.real_time_used += timer.real_time_used(); |
184 |
| - results.manual_time_used += timer.manual_time_used(); |
185 |
| - results.complexity_n += st.complexity_length_n(); |
186 |
| - internal::Increment(&results.counters, st.counters); |
187 |
| - } |
188 |
| - manager->NotifyThreadComplete(); |
189 |
| -} |
190 |
| - |
191 |
| -struct RunResults { |
192 |
| - std::vector<BenchmarkReporter::Run> non_aggregates; |
193 |
| - std::vector<BenchmarkReporter::Run> aggregates_only; |
194 |
| - |
195 |
| - bool display_report_aggregates_only = false; |
196 |
| - bool file_report_aggregates_only = false; |
197 |
| -}; |
198 |
| - |
199 |
| -RunResults RunBenchmark( |
200 |
| - const benchmark::internal::BenchmarkInstance& b, |
201 |
| - std::vector<BenchmarkReporter::Run>* complexity_reports) { |
202 |
| - RunResults run_results; |
203 |
| - |
204 |
| - const bool has_explicit_iteration_count = b.iterations != 0; |
205 |
| - size_t iters = has_explicit_iteration_count ? b.iterations : 1; |
206 |
| - std::unique_ptr<internal::ThreadManager> manager; |
207 |
| - std::vector<std::thread> pool(b.threads - 1); |
208 |
| - const int repeats = |
209 |
| - b.repetitions != 0 ? b.repetitions : FLAGS_benchmark_repetitions; |
210 |
| - if (repeats != 1) { |
211 |
| - run_results.display_report_aggregates_only = |
212 |
| - (FLAGS_benchmark_report_aggregates_only || |
213 |
| - FLAGS_benchmark_display_aggregates_only); |
214 |
| - run_results.file_report_aggregates_only = |
215 |
| - FLAGS_benchmark_report_aggregates_only; |
216 |
| - if (b.aggregation_report_mode != internal::ARM_Unspecified) { |
217 |
| - run_results.display_report_aggregates_only = |
218 |
| - (b.aggregation_report_mode & |
219 |
| - internal::ARM_DisplayReportAggregatesOnly); |
220 |
| - run_results.file_report_aggregates_only = |
221 |
| - (b.aggregation_report_mode & internal::ARM_FileReportAggregatesOnly); |
222 |
| - } |
223 |
| - } |
224 |
| - for (int repetition_num = 0; repetition_num < repeats; repetition_num++) { |
225 |
| - for (;;) { |
226 |
| - // Try benchmark |
227 |
| - VLOG(2) << "Running " << b.name << " for " << iters << "\n"; |
228 |
| - |
229 |
| - manager.reset(new internal::ThreadManager(b.threads)); |
230 |
| - for (std::size_t ti = 0; ti < pool.size(); ++ti) { |
231 |
| - pool[ti] = std::thread(&RunInThread, &b, iters, |
232 |
| - static_cast<int>(ti + 1), manager.get()); |
233 |
|
F438
- } |
234 |
| - RunInThread(&b, iters, 0, manager.get()); |
235 |
| - manager->WaitForAllThreads(); |
236 |
| - for (std::thread& thread : pool) thread.join(); |
237 |
| - internal::ThreadManager::Result results; |
238 |
| - { |
239 |
| - MutexLock l(manager->GetBenchmarkMutex()); |
240 |
| - results = manager->results; |
241 |
| - } |
242 |
| - manager.reset(); |
243 |
| - // Adjust real/manual time stats since they were reported per thread. |
244 |
| - results.real_time_used /= b.threads; |
245 |
| - results.manual_time_used /= b.threads; |
246 |
| - |
247 |
| - VLOG(2) << "Ran in " << results.cpu_time_used << "/" |
248 |
| - << results.real_time_used << "\n"; |
249 |
| - |
250 |
| - // Base decisions off of real time if requested by this benchmark. |
251 |
| - double seconds = results.cpu_time_used; |
252 |
| - if (b.use_manual_time) { |
253 |
| - seconds = results.manual_time_used; |
254 |
| - } else if (b.use_real_time) { |
255 |
| - seconds = results.real_time_used; |
256 |
| - } |
257 |
| - |
258 |
| - const double min_time = |
259 |
| - !IsZero(b.min_time) ? b.min_time : FLAGS_benchmark_min_time; |
260 |
| - |
261 |
| - // clang-format off |
262 |
| - // turn off clang-format since it mangles prettiness here |
263 |
| - // Determine if this run should be reported; Either it has |
264 |
| - // run for a sufficient amount of time or because an error was reported. |
265 |
| - const bool should_report = repetition_num > 0 |
266 |
| - || has_explicit_iteration_count // An exact iteration count was requested |
267 |
| - || results.has_error_ |
268 |
| - || iters >= kMaxIterations // No chance to try again, we hit the limit. |
269 |
| - || seconds >= min_time // the elapsed time is large enough |
270 |
| - // CPU time is specified but the elapsed real time greatly exceeds the |
271 |
| - // minimum time. Note that user provided timers are except from this |
272 |
| - // sanity check. |
273 |
| - || ((results.real_time_used >= 5 * min_time) && !b.use_manual_time); |
274 |
| - // clang-format on |
275 |
| - |
276 |
| - if (should_report) { |
277 |
| - MemoryManager::Result memory_result; |
278 |
| - size_t memory_iterations = 0; |
279 |
| - if (memory_manager != nullptr) { |
280 |
| - // Only run a few iterations to reduce the impact of one-time |
281 |
| - // allocations in benchmarks that are not properly managed. |
282 |
| - memory_iterations = std::min<size_t>(16, iters); |
283 |
| - memory_manager->Start(); |
284 |
| - manager.reset(new internal::ThreadManager(1)); |
285 |
| - RunInThread(&b, memory_iterations, 0, manager.get()); |
286 |
| - manager->WaitForAllThreads(); |
287 |
| - manager.reset(); |
288 |
| - |
289 |
| - memory_manager->Stop(&memory_result); |
290 |
| - } |
291 |
| - |
292 |
| - BenchmarkReporter::Run report = CreateRunReport( |
293 |
| - b, results, memory_iterations, memory_result, seconds); |
294 |
| - if (!report.error_occurred && b.complexity != oNone) |
295 |
| - complexity_reports->push_back(report); |
296 |
| - run_results.non_aggregates.push_back(report); |
297 |
| - break; |
298 |
| - } |
299 |
| - |
300 |
| - // See how much iterations should be increased by |
301 |
| - // Note: Avoid division by zero with max(seconds, 1ns). |
302 |
| - double multiplier = min_time * 1.4 / std::max(seconds, 1e-9); |
303 |
| - // If our last run was at least 10% of FLAGS_benchmark_min_time then we |
304 |
| - // use the multiplier directly. Otherwise we use at most 10 times |
305 |
| - // expansion. |
306 |
| - // NOTE: When the last run was at least 10% of the min time the max |
307 |
| - // expansion should be 14x. |
308 |
| - bool is_significant = (seconds / min_time) > 0.1; |
309 |
| - multiplier = is_significant ? multiplier : std::min(10.0, multiplier); |
310 |
| - if (multiplier <= 1.0) multiplier = 2.0; |
311 |
| - double next_iters = std::max(multiplier * iters, iters + 1.0); |
312 |
| - if (next_iters > kMaxIterations) { |
313 |
| - next_iters = kMaxIterations; |
314 |
| - } |
315 |
| - VLOG(3) << "Next iters: " << next_iters << ", " << multiplier << "\n"; |
316 |
| - iters = static_cast<int>(next_iters + 0.5); |
317 |
| - } |
318 |
| - } |
319 |
| - |
320 |
| - // Calculate additional statistics |
321 |
| - run_results.aggregates_only = ComputeStats(run_results.non_aggregates); |
322 |
| - |
323 |
| - // Maybe calculate complexity report |
324 |
| - if ((b.complexity != oNone) && b.last_benchmark_instance) { |
325 |
| - auto additional_run_stats = ComputeBigO(*complexity_reports); |
326 |
| - run_results.aggregates_only.insert(run_results.aggregates_only.end(), |
327 |
| - additional_run_stats.begin(), |
328 |
| - additional_run_stats.end()); |
329 |
| - complexity_reports->clear(); |
330 |
| - } |
331 |
| - |
332 |
| - return run_results; |
333 |
| -} |
334 |
| - |
335 |
| -} // namespace |
336 | 122 | } // namespace internal
|
337 | 123 |
|
338 | 124 | State::State(size_t max_iters, const std::vector<int64_t>& ranges, int thread_i,
|
@@ -610,7 +396,9 @@ size_t RunSpecifiedBenchmarks(BenchmarkReporter* display_reporter,
|
610 | 396 | return benchmarks.size();
|
611 | 397 | }
|
612 | 398 |
|
613 |
| -void RegisterMemoryManager(MemoryManager* manager) { memory_manager = manager; } |
| 399 | +void RegisterMemoryManager(MemoryManager* manager) { |
| 400 | + internal::memory_manager = manager; |
| 401 | +} |
614 | 402 |
|
615 | 403 | namespace internal {
|
616 | 404 |
|
|
0 commit comments