Benchmark.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579
  1. /*
  2. * Copyright 2012-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <folly/Portability.h>
  18. #include <folly/Preprocessor.h> // for FB_ANONYMOUS_VARIABLE
  19. #include <folly/ScopeGuard.h>
  20. #include <folly/Traits.h>
  21. #include <folly/functional/Invoke.h>
  22. #include <folly/portability/GFlags.h>
  23. #include <cassert>
  24. #include <chrono>
  25. #include <functional>
  26. #include <limits>
  27. #include <type_traits>
  28. #include <boost/function_types/function_arity.hpp>
  29. #include <glog/logging.h>
  30. DECLARE_bool(benchmark);
  31. namespace folly {
  32. /**
  33. * Runs all benchmarks defined. Usually put in main().
  34. */
  35. void runBenchmarks();
  36. /**
  37. * Runs all benchmarks defined if and only if the --benchmark flag has
  38. * been passed to the program. Usually put in main().
  39. */
  40. inline bool runBenchmarksOnFlag() {
  41. if (FLAGS_benchmark) {
  42. runBenchmarks();
  43. }
  44. return FLAGS_benchmark;
  45. }
  46. namespace detail {
  47. using TimeIterPair =
  48. std::pair<std::chrono::high_resolution_clock::duration, unsigned int>;
  49. using BenchmarkFun = std::function<detail::TimeIterPair(unsigned int)>;
  50. struct BenchmarkRegistration {
  51. std::string file;
  52. std::string name;
  53. BenchmarkFun func;
  54. };
  55. struct BenchmarkResult {
  56. std::string file;
  57. std::string name;
  58. double timeInNs;
  59. };
  60. /**
  61. * Adds a benchmark wrapped in a std::function. Only used
  62. * internally. Pass by value is intentional.
  63. */
  64. void addBenchmarkImpl(
  65. const char* file,
  66. const char* name,
  67. std::function<TimeIterPair(unsigned int)>);
  68. } // namespace detail
  69. /**
  70. * Supporting type for BENCHMARK_SUSPEND defined below.
  71. */
  72. struct BenchmarkSuspender {
  73. using Clock = std::chrono::high_resolution_clock;
  74. using TimePoint = Clock::time_point;
  75. using Duration = Clock::duration;
  76. BenchmarkSuspender() {
  77. start = Clock::now();
  78. }
  79. BenchmarkSuspender(const BenchmarkSuspender&) = delete;
  80. BenchmarkSuspender(BenchmarkSuspender&& rhs) noexcept {
  81. start = rhs.start;
  82. rhs.start = {};
  83. }
  84. BenchmarkSuspender& operator=(const BenchmarkSuspender&) = delete;
  85. BenchmarkSuspender& operator=(BenchmarkSuspender&& rhs) {
  86. if (start != TimePoint{}) {
  87. tally();
  88. }
  89. start = rhs.start;
  90. rhs.start = {};
  91. return *this;
  92. }
  93. ~BenchmarkSuspender() {
  94. if (start != TimePoint{}) {
  95. tally();
  96. }
  97. }
  98. void dismiss() {
  99. assert(start != TimePoint{});
  100. tally();
  101. start = {};
  102. }
  103. void rehire() {
  104. assert(start == TimePoint{});
  105. start = Clock::now();
  106. }
  107. template <class F>
  108. auto dismissing(F f) -> invoke_result_t<F> {
  109. SCOPE_EXIT {
  110. rehire();
  111. };
  112. dismiss();
  113. return f();
  114. }
  115. /**
  116. * This is for use inside of if-conditions, used in BENCHMARK macros.
  117. * If-conditions bypass the explicit on operator bool.
  118. */
  119. explicit operator bool() const {
  120. return false;
  121. }
  122. /**
  123. * Accumulates time spent outside benchmark.
  124. */
  125. static Duration timeSpent;
  126. private:
  127. void tally() {
  128. auto end = Clock::now();
  129. timeSpent += end - start;
  130. start = end;
  131. }
  132. TimePoint start;
  133. };
  134. /**
  135. * Adds a benchmark. Usually not called directly but instead through
  136. * the macro BENCHMARK defined below. The lambda function involved
  137. * must take exactly one parameter of type unsigned, and the benchmark
  138. * uses it with counter semantics (iteration occurs inside the
  139. * function).
  140. */
  141. template <typename Lambda>
  142. typename std::enable_if<
  143. boost::function_types::function_arity<
  144. decltype(&Lambda::operator())>::value == 2>::type
  145. addBenchmark(const char* file, const char* name, Lambda&& lambda) {
  146. auto execute = [=](unsigned int times) {
  147. BenchmarkSuspender::timeSpent = {};
  148. unsigned int niter;
  149. // CORE MEASUREMENT STARTS
  150. auto start = std::chrono::high_resolution_clock::now();
  151. niter = lambda(times);
  152. auto end = std::chrono::high_resolution_clock::now();
  153. // CORE MEASUREMENT ENDS
  154. return detail::TimeIterPair(
  155. (end - start) - BenchmarkSuspender::timeSpent, niter);
  156. };
  157. detail::addBenchmarkImpl(
  158. file, name, std::function<detail::TimeIterPair(unsigned int)>(execute));
  159. }
  160. /**
  161. * Adds a benchmark. Usually not called directly but instead through
  162. * the macro BENCHMARK defined below. The lambda function involved
  163. * must take zero parameters, and the benchmark calls it repeatedly
  164. * (iteration occurs outside the function).
  165. */
  166. template <typename Lambda>
  167. typename std::enable_if<
  168. boost::function_types::function_arity<
  169. decltype(&Lambda::operator())>::value == 1>::type
  170. addBenchmark(const char* file, const char* name, Lambda&& lambda) {
  171. addBenchmark(file, name, [=](unsigned int times) {
  172. unsigned int niter = 0;
  173. while (times-- > 0) {
  174. niter += lambda();
  175. }
  176. return niter;
  177. });
  178. }
  179. /**
  180. * Call doNotOptimizeAway(var) to ensure that var will be computed even
  181. * post-optimization. Use it for variables that are computed during
  182. * benchmarking but otherwise are useless. The compiler tends to do a
  183. * good job at eliminating unused variables, and this function fools it
  184. * into thinking var is in fact needed.
  185. *
  186. * Call makeUnpredictable(var) when you don't want the optimizer to use
  187. * its knowledge of var to shape the following code. This is useful
  188. * when constant propagation or power reduction is possible during your
  189. * benchmark but not in real use cases.
  190. */
  191. #ifdef _MSC_VER
  192. #pragma optimize("", off)
  193. inline void doNotOptimizeDependencySink(const void*) {}
  194. #pragma optimize("", on)
  195. template <class T>
  196. void doNotOptimizeAway(const T& datum) {
  197. doNotOptimizeDependencySink(&datum);
  198. }
  199. template <typename T>
  200. void makeUnpredictable(T& datum) {
  201. doNotOptimizeDependencySink(&datum);
  202. }
  203. #else
  204. namespace detail {
  205. template <typename T>
  206. struct DoNotOptimizeAwayNeedsIndirect {
  207. using Decayed = typename std::decay<T>::type;
  208. // First two constraints ensure it can be an "r" operand.
  209. // std::is_pointer check is because callers seem to expect that
  210. // doNotOptimizeAway(&x) is equivalent to doNotOptimizeAway(x).
  211. constexpr static bool value = !folly::is_trivially_copyable<Decayed>::value ||
  212. sizeof(Decayed) > sizeof(long) || std::is_pointer<Decayed>::value;
  213. };
  214. } // namespace detail
  215. template <typename T>
  216. auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
  217. !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
  218. // The "r" constraint forces the compiler to make datum available
  219. // in a register to the asm block, which means that it must have
  220. // computed/loaded it. We use this path for things that are <=
  221. // sizeof(long) (they have to fit), trivial (otherwise the compiler
  222. // doesn't want to put them in a register), and not a pointer (because
  223. // doNotOptimizeAway(&foo) would otherwise be a foot gun that didn't
  224. // necessarily compute foo).
  225. //
  226. // An earlier version of this method had a more permissive input operand
  227. // constraint, but that caused unnecessary variation between clang and
  228. // gcc benchmarks.
  229. asm volatile("" ::"r"(datum));
  230. }
  231. template <typename T>
  232. auto doNotOptimizeAway(const T& datum) -> typename std::enable_if<
  233. detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
  234. // This version of doNotOptimizeAway tells the compiler that the asm
  235. // block will read datum from memory, and that in addition it might read
  236. // or write from any memory location. If the memory clobber could be
  237. // separated into input and output that would be preferrable.
  238. asm volatile("" ::"m"(datum) : "memory");
  239. }
  240. template <typename T>
  241. auto makeUnpredictable(T& datum) -> typename std::enable_if<
  242. !detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
  243. asm volatile("" : "+r"(datum));
  244. }
  245. template <typename T>
  246. auto makeUnpredictable(T& datum) -> typename std::enable_if<
  247. detail::DoNotOptimizeAwayNeedsIndirect<T>::value>::type {
  248. asm volatile("" ::"m"(datum) : "memory");
  249. }
  250. #endif
  251. struct dynamic;
  252. void benchmarkResultsToDynamic(
  253. const std::vector<detail::BenchmarkResult>& data,
  254. dynamic&);
  255. void benchmarkResultsFromDynamic(
  256. const dynamic&,
  257. std::vector<detail::BenchmarkResult>&);
  258. void printResultComparison(
  259. const std::vector<detail::BenchmarkResult>& base,
  260. const std::vector<detail::BenchmarkResult>& test);
  261. } // namespace folly
  262. /**
  263. * Introduces a benchmark function. Used internally, see BENCHMARK and
  264. * friends below.
  265. */
  266. #define BENCHMARK_IMPL(funName, stringName, rv, paramType, paramName) \
  267. static void funName(paramType); \
  268. static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
  269. (::folly::addBenchmark( \
  270. __FILE__, \
  271. stringName, \
  272. [](paramType paramName) -> unsigned { \
  273. funName(paramName); \
  274. return rv; \
  275. }), \
  276. true); \
  277. static void funName(paramType paramName)
  278. /**
  279. * Introduces a benchmark function with support for returning the actual
  280. * number of iterations. Used internally, see BENCHMARK_MULTI and friends
  281. * below.
  282. */
  283. #define BENCHMARK_MULTI_IMPL(funName, stringName, paramType, paramName) \
  284. static unsigned funName(paramType); \
  285. static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
  286. (::folly::addBenchmark( \
  287. __FILE__, \
  288. stringName, \
  289. [](paramType paramName) { return funName(paramName); }), \
  290. true); \
  291. static unsigned funName(paramType paramName)
  292. /**
  293. * Introduces a benchmark function. Use with either one or two arguments.
  294. * The first is the name of the benchmark. Use something descriptive, such
  295. * as insertVectorBegin. The second argument may be missing, or could be a
  296. * symbolic counter. The counter dictates how many internal iteration the
  297. * benchmark does. Example:
  298. *
  299. * BENCHMARK(vectorPushBack) {
  300. * vector<int> v;
  301. * v.push_back(42);
  302. * }
  303. *
  304. * BENCHMARK(insertVectorBegin, n) {
  305. * vector<int> v;
  306. * FOR_EACH_RANGE (i, 0, n) {
  307. * v.insert(v.begin(), 42);
  308. * }
  309. * }
  310. */
  311. #define BENCHMARK(name, ...) \
  312. BENCHMARK_IMPL( \
  313. name, \
  314. FB_STRINGIZE(name), \
  315. FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
  316. FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
  317. __VA_ARGS__)
  318. /**
  319. * Like BENCHMARK above, but allows the user to return the actual
  320. * number of iterations executed in the function body. This can be
  321. * useful if the benchmark function doesn't know upfront how many
  322. * iterations it's going to run or if it runs through a certain
  323. * number of test cases, e.g.:
  324. *
  325. * BENCHMARK_MULTI(benchmarkSomething) {
  326. * std::vector<int> testCases { 0, 1, 1, 2, 3, 5 };
  327. * for (int c : testCases) {
  328. * doSomething(c);
  329. * }
  330. * return testCases.size();
  331. * }
  332. */
  333. #define BENCHMARK_MULTI(name, ...) \
  334. BENCHMARK_MULTI_IMPL( \
  335. name, \
  336. FB_STRINGIZE(name), \
  337. FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
  338. __VA_ARGS__)
  339. /**
  340. * Defines a benchmark that passes a parameter to another one. This is
  341. * common for benchmarks that need a "problem size" in addition to
  342. * "number of iterations". Consider:
  343. *
  344. * void pushBack(uint32_t n, size_t initialSize) {
  345. * vector<int> v;
  346. * BENCHMARK_SUSPEND {
  347. * v.resize(initialSize);
  348. * }
  349. * FOR_EACH_RANGE (i, 0, n) {
  350. * v.push_back(i);
  351. * }
  352. * }
  353. * BENCHMARK_PARAM(pushBack, 0)
  354. * BENCHMARK_PARAM(pushBack, 1000)
  355. * BENCHMARK_PARAM(pushBack, 1000000)
  356. *
  357. * The benchmark above estimates the speed of push_back at different
  358. * initial sizes of the vector. The framework will pass 0, 1000, and
  359. * 1000000 for initialSize, and the iteration count for n.
  360. */
  361. #define BENCHMARK_PARAM(name, param) BENCHMARK_NAMED_PARAM(name, param, param)
  362. /**
  363. * Same as BENCHMARK_PARAM, but allows one to return the actual number of
  364. * iterations that have been run.
  365. */
  366. #define BENCHMARK_PARAM_MULTI(name, param) \
  367. BENCHMARK_NAMED_PARAM_MULTI(name, param, param)
  368. /*
  369. * Like BENCHMARK_PARAM(), but allows a custom name to be specified for each
  370. * parameter, rather than using the parameter value.
  371. *
  372. * Useful when the parameter value is not a valid token for string pasting,
  373. * of when you want to specify multiple parameter arguments.
  374. *
  375. * For example:
  376. *
  377. * void addValue(uint32_t n, int64_t bucketSize, int64_t min, int64_t max) {
  378. * Histogram<int64_t> hist(bucketSize, min, max);
  379. * int64_t num = min;
  380. * FOR_EACH_RANGE (i, 0, n) {
  381. * hist.addValue(num);
  382. * ++num;
  383. * if (num > max) { num = min; }
  384. * }
  385. * }
  386. *
  387. * BENCHMARK_NAMED_PARAM(addValue, 0_to_100, 1, 0, 100)
  388. * BENCHMARK_NAMED_PARAM(addValue, 0_to_1000, 10, 0, 1000)
  389. * BENCHMARK_NAMED_PARAM(addValue, 5k_to_20k, 250, 5000, 20000)
  390. */
  391. #define BENCHMARK_NAMED_PARAM(name, param_name, ...) \
  392. BENCHMARK_IMPL( \
  393. FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
  394. FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
  395. iters, \
  396. unsigned, \
  397. iters) { \
  398. name(iters, ##__VA_ARGS__); \
  399. }
  400. /**
  401. * Same as BENCHMARK_NAMED_PARAM, but allows one to return the actual number
  402. * of iterations that have been run.
  403. */
  404. #define BENCHMARK_NAMED_PARAM_MULTI(name, param_name, ...) \
  405. BENCHMARK_MULTI_IMPL( \
  406. FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
  407. FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
  408. unsigned, \
  409. iters) { \
  410. return name(iters, ##__VA_ARGS__); \
  411. }
  412. /**
  413. * Just like BENCHMARK, but prints the time relative to a
  414. * baseline. The baseline is the most recent BENCHMARK() seen in
  415. * the current scope. Example:
  416. *
  417. * // This is the baseline
  418. * BENCHMARK(insertVectorBegin, n) {
  419. * vector<int> v;
  420. * FOR_EACH_RANGE (i, 0, n) {
  421. * v.insert(v.begin(), 42);
  422. * }
  423. * }
  424. *
  425. * BENCHMARK_RELATIVE(insertListBegin, n) {
  426. * list<int> s;
  427. * FOR_EACH_RANGE (i, 0, n) {
  428. * s.insert(s.begin(), 42);
  429. * }
  430. * }
  431. *
  432. * Any number of relative benchmark can be associated with a
  433. * baseline. Another BENCHMARK() occurrence effectively establishes a
  434. * new baseline.
  435. */
  436. #define BENCHMARK_RELATIVE(name, ...) \
  437. BENCHMARK_IMPL( \
  438. name, \
  439. "%" FB_STRINGIZE(name), \
  440. FB_ARG_2_OR_1(1, ##__VA_ARGS__), \
  441. FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
  442. __VA_ARGS__)
  443. /**
  444. * Same as BENCHMARK_RELATIVE, but allows one to return the actual number
  445. * of iterations that have been run.
  446. */
  447. #define BENCHMARK_RELATIVE_MULTI(name, ...) \
  448. BENCHMARK_MULTI_IMPL( \
  449. name, \
  450. "%" FB_STRINGIZE(name), \
  451. FB_ONE_OR_NONE(unsigned, ##__VA_ARGS__), \
  452. __VA_ARGS__)
  453. /**
  454. * A combination of BENCHMARK_RELATIVE and BENCHMARK_PARAM.
  455. */
  456. #define BENCHMARK_RELATIVE_PARAM(name, param) \
  457. BENCHMARK_RELATIVE_NAMED_PARAM(name, param, param)
  458. /**
  459. * Same as BENCHMARK_RELATIVE_PARAM, but allows one to return the actual
  460. * number of iterations that have been run.
  461. */
  462. #define BENCHMARK_RELATIVE_PARAM_MULTI(name, param) \
  463. BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param, param)
  464. /**
  465. * A combination of BENCHMARK_RELATIVE and BENCHMARK_NAMED_PARAM.
  466. */
  467. #define BENCHMARK_RELATIVE_NAMED_PARAM(name, param_name, ...) \
  468. BENCHMARK_IMPL( \
  469. FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
  470. "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
  471. iters, \
  472. unsigned, \
  473. iters) { \
  474. name(iters, ##__VA_ARGS__); \
  475. }
  476. /**
  477. * Same as BENCHMARK_RELATIVE_NAMED_PARAM, but allows one to return the
  478. * actual number of iterations that have been run.
  479. */
  480. #define BENCHMARK_RELATIVE_NAMED_PARAM_MULTI(name, param_name, ...) \
  481. BENCHMARK_MULTI_IMPL( \
  482. FB_CONCATENATE(name, FB_CONCATENATE(_, param_name)), \
  483. "%" FB_STRINGIZE(name) "(" FB_STRINGIZE(param_name) ")", \
  484. unsigned, \
  485. iters) { \
  486. return name(iters, ##__VA_ARGS__); \
  487. }
  488. /**
  489. * Draws a line of dashes.
  490. */
  491. #define BENCHMARK_DRAW_LINE() \
  492. static bool FB_ANONYMOUS_VARIABLE(follyBenchmarkUnused) = \
  493. (::folly::addBenchmark(__FILE__, "-", []() -> unsigned { return 0; }), \
  494. true)
  495. /**
  496. * Allows execution of code that doesn't count torward the benchmark's
  497. * time budget. Example:
  498. *
  499. * BENCHMARK_START_GROUP(insertVectorBegin, n) {
  500. * vector<int> v;
  501. * BENCHMARK_SUSPEND {
  502. * v.reserve(n);
  503. * }
  504. * FOR_EACH_RANGE (i, 0, n) {
  505. * v.insert(v.begin(), 42);
  506. * }
  507. * }
  508. */
  509. #define BENCHMARK_SUSPEND \
  510. if (auto FB_ANONYMOUS_VARIABLE(BENCHMARK_SUSPEND) = \
  511. ::folly::BenchmarkSuspender()) { \
  512. } else