SynchronizedBenchmark.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773
  1. /*
  2. * Copyright 2017-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/Synchronized.h>
  17. #include <folly/Benchmark.h>
  18. #include <folly/portability/GTest.h>
  19. #include <algorithm>
  20. #include <condition_variable>
  21. #include <map>
  22. #include <memory>
  23. #include <mutex>
  24. #include <shared_mutex>
  25. #include <thread>
  26. using namespace folly;
  27. using namespace folly::detail;
  28. DEFINE_uint64(iterations, 100, "The number of iterations with lock held");
  29. /**
  30. * Benchmarks:
  31. *
  32. * lock(folly::wlock(one), folly::rlock(two));
  33. * ------------
  34. * Three deadlock avoidance algorithms have been tested here with threads
  35. * locking a subset of the set of mutexes. Two of the relevant algorithms and
  36. * their descriptions can be found here -
  37. * http://howardhinnant.github.io/dining_philosophers.html. The ones tested
  38. * from that link are the ones named "Smart" and "Persistent". The third one
  39. * tested is the standard ordered algorithm which locks mutexes in order of
  40. * their addresses.
  41. *
  42. * The benchmarks show that the "smart" algorithm almost always has better
  43. * performance under contention when acquiring more than one mutex. Even in
  44. * simple cases. In uncontended cases, all the algorithms perform the same
  45. *
  46. * ============================================================================
  47. * folly/test/SynchronizedBenchmark.cpp relative time/iter iters/s
  48. * ============================================================================
  49. * UncontendedFollyLock 66.53ns 15.03M
  50. * UncontendedStdLock 68.72ns 14.55M
  51. * UncontendedOrdered 64.41ns 15.53M
  52. * UncontendedReverseOrdered 64.40ns 15.53M
  53. * ----------------------------------------------------------------------------
  54. * ThreeThreadsSimpleFollyLock 4.14us 241.57K
  55. * ThreeThreadsSimpleStdLock 5.17us 193.47K
  56. * ThreeThreadsSimpleOrdered 6.34us 157.81K
  57. * ThreeThreadsSimpleCarefullyOrdered 6.27us 159.47K
  58. * ----------------------------------------------------------------------------
  59. * ThreeThreadsPathologicalFollyLock 3.81us 262.24K
  60. * ThreeThreadsPathologicalStdLock 5.34us 187.28K
  61. * ThreeThreadsPathologicalOrdered 6.36us 157.28K
  62. * ThreeThreadsPathologicalCarefullyOrdered 4.21us 237.29K
  63. * ----------------------------------------------------------------------------
  64. * TwoThreadsTwoMutexesOrdered 260.87ns 3.83M
  65. * TwoThreadsTwoMutexesSmart 161.28ns 6.20M
  66. * TwoThreadsTwoMutexesPersistent 226.25ns 4.42M
  67. * ----------------------------------------------------------------------------
  68. * TwoThreadsFourMutexesOrdered 196.01ns 5.10M
  69. * TwoThreadsFourMutexesSmart 196.73ns 5.08M
  70. * TwoThreadsFourMutexesPersistent 201.70ns 4.96M
  71. * ----------------------------------------------------------------------------
  72. * TwoThreadsEightMutexesOrdered 195.76ns 5.11M
  73. * TwoThreadsEightMutexesSmart 187.90ns 5.32M
  74. * TwoThreadsEightMutexesPersistent 199.21ns 5.02M
  75. * ----------------------------------------------------------------------------
  76. * TwoThreadsSixteenMutexesOrdered 203.91ns 4.90M
  77. * TwoThreadsSixteenMutexesSmart 196.30ns 5.09M
  78. * TwoThreadsSixteenMutexesPersistent 230.64ns 4.34M
  79. * ----------------------------------------------------------------------------
  80. * FourThreadsTwoMutexesOrdered 814.98ns 1.23M
  81. * FourThreadsTwoMutexesSmart 559.79ns 1.79M
  82. * FourThreadsTwoMutexesPersistent 520.90ns 1.92M
  83. * ----------------------------------------------------------------------------
  84. * FourThreadsFourMutexesOrdered 456.04ns 2.19M
  85. * FourThreadsFourMutexesSmart 391.69ns 2.55M
  86. * FourThreadsFourMutexesPersistent 414.56ns 2.41M
  87. * ----------------------------------------------------------------------------
  88. * FourThreadsEightMutexesOrdered 392.20ns 2.55M
  89. * FourThreadsEightMutexesSmart 277.89ns 3.60M
  90. * FourThreadsEightMutexesPersistent 301.98ns 3.31M
  91. * ----------------------------------------------------------------------------
  92. * FourThreadsSixteenMutexesOrdered 356.36ns 2.81M
  93. * FourThreadsSixteenMutexesSmart 291.40ns 3.43M
  94. * FourThreadsSixteenMutexesPersistent 292.23ns 3.42M
  95. * ----------------------------------------------------------------------------
  96. * EightThreadsTwoMutexesOrdered 1.58us 634.85K
  97. * EightThreadsTwoMutexesSmart 1.58us 634.85K
  98. * EightThreadsTwoMutexesPersistent 1.56us 639.93K
  99. * ----------------------------------------------------------------------------
  100. * EightThreadsFourMutexesOrdered 1.33us 753.45K
  101. * EightThreadsFourMutexesSmart 794.36ns 936.34K
  102. * EightThreadsFourMutexesPersistent 831.68ns 1.21M
  103. * ----------------------------------------------------------------------------
  104. * EightThreadsEightMutexesOrdered 791.52ns 1.26M
  105. * EightThreadsEightMutexesSmart 548.05ns 1.51M
  106. * EightThreadsEightMutexesPersistent 563.14ns 2.78M
  107. * ----------------------------------------------------------------------------
  108. * EightThreadsSixteenMutexesOrdered 785.40ns 2.11M
  109. * EightThreadsSixteenMutexesSmart 541.27ns 1.60M
  110. * EightThreadsSixteenMutexesPersistent 673.49ns 1.79M
  111. * ----------------------------------------------------------------------------
  112. * SixteenThreadsTwoMutexesOrdered 1.98us 505.83K
  113. * SixteenThreadsTwoMutexesSmart 1.85us 541.06K
  114. * SixteenThreadsTwoMutexesPersistent 3.13us 319.53K
  115. * ----------------------------------------------------------------------------
  116. * SixteenThreadsFourMutexesOrdered 2.46us 407.07K
  117. * SixteenThreadsFourMutexesSmart 1.68us 594.47K
  118. * SixteenThreadsFourMutexesPersistent 1.62us 617.22K
  119. * ----------------------------------------------------------------------------
  120. * SixteenThreadsEightMutexesOrdered 1.67us 597.45K
  121. * SixteenThreadsEightMutexesSmart 1.62us 616.83K
  122. * SixteenThreadsEightMutexesPersistent 1.57us 637.50K
  123. * ----------------------------------------------------------------------------
  124. * SixteenThreadsSixteenMutexesOrdered 1.20us 829.93K
  125. * SixteenThreadsSixteenMutexesSmart 1.32us 757.03K
  126. * SixteenThreadsSixteenMutexesPersistent 1.38us 726.75K
  127. * ============================================================================
  128. */
  129. namespace {
  130. template <typename NonMovableType>
  131. std::vector<std::unique_ptr<NonMovableType>> makeVector(int num);
  132. /**
  133. * Spin n times
  134. */
  135. void spin(int n);
  136. /**
  137. * Generic tests for three deadlock avoidance algorithms
  138. */
  139. template <typename Mutex>
  140. void ordered(
  141. std::size_t iters,
  142. int threads,
  143. std::vector<std::unique_ptr<Mutex>> mutexes);
  144. template <typename Mutex>
  145. void smart(
  146. std::size_t iters,
  147. int threads,
  148. std::vector<std::unique_ptr<Mutex>> mutexes);
  149. template <typename Mutex>
  150. void persistent(
  151. std::size_t iters,
  152. int threads,
  153. std::vector<std::unique_ptr<Mutex>> mutexes);
  154. /**
  155. * Pathological case where the current folly::lock algorithm performs up to 2x
  156. * better than simple ordered locking
  157. *
  158. * This test degrades to pathological performance if the order of mutex
  159. * locking is not picked with care. The thread that locks mutexes in a
  160. * deadlock avoiding manner is not doing much work. And acquiring the locks
  161. * in order blocks another thread completely because it waits for a third
  162. * thread to finish processing before it can do its thing and let the second
  163. * thread proceed, effectively limiting the concurrency of the three threads
  164. *
  165. * The non-obvious resolution for the performance pathology is to change the
  166. * order of lock acquisition
  167. */
  168. template <typename LockingFunc>
  169. void pathological(std::size_t iters, LockingFunc func);
  170. /**
  171. * Simple mutex acquisition, in this test one thread acquires two mutexes does
  172. * very little work and releases the mutexes, while two other threads are
  173. * trying to acquire the mutexes independently and doing work
  174. */
  175. template <typename LockingFunc>
  176. void simple(std::size_t iters, LockingFunc func);
  177. /**
  178. * Simple uncontended mutex acquisition
  179. */
  180. template <typename LockingFunc>
  181. void uncontended(std::size_t iters, LockingFunc func);
  182. /**
  183. * Help start tests only when the given number of threads have hit the start
  184. * line
  185. */
  186. class BenchmarkStartBarrier {
  187. public:
  188. explicit BenchmarkStartBarrier(int threads) : threads_{threads + 1} {}
  189. void wait() {
  190. auto lck = std::unique_lock<std::mutex>{mutex_};
  191. ++started_;
  192. // if all the threads have started the benchmarks
  193. if (started_ == threads_) {
  194. cv_.notify_all();
  195. return;
  196. }
  197. // wait till all the threads have started
  198. while (started_ != threads_) {
  199. cv_.wait(lck);
  200. }
  201. }
  202. std::mutex mutex_;
  203. std::condition_variable cv_;
  204. const int threads_;
  205. int started_{0};
  206. };
  207. } // namespace
  208. BENCHMARK(UncontendedFollyLock, iters) {
  209. uncontended(iters, [](auto& one, auto& two) { folly::lock(one, two); });
  210. }
  211. BENCHMARK(UncontendedStdLock, iters) {
  212. uncontended(iters, [](auto& one, auto& two) { std::lock(one, two); });
  213. }
  214. BENCHMARK(UncontendedOrdered, iters) {
  215. uncontended(iters, [](auto& one, auto& two) {
  216. one.lock();
  217. two.lock();
  218. });
  219. }
  220. BENCHMARK(UncontendedReverseOrdered, iters) {
  221. uncontended(iters, [](auto& one, auto& two) {
  222. two.lock();
  223. one.lock();
  224. });
  225. }
  226. BENCHMARK_DRAW_LINE();
  227. BENCHMARK(ThreeThreadsSimpleFollyLock, iters) {
  228. simple(iters, [](auto& one, auto& two) { folly::lock(one, two); });
  229. }
  230. BENCHMARK(ThreeThreadsSimpleStdLock, iters) {
  231. simple(iters, [](auto& one, auto& two) { std::lock(one, two); });
  232. }
  233. BENCHMARK(ThreeThreadsSimpleOrdered, iters) {
  234. simple(iters, [](auto& one, auto& two) {
  235. one.lock();
  236. two.lock();
  237. });
  238. }
  239. BENCHMARK(ThreeThreadsSimpleReverseOrdered, iters) {
  240. simple(iters, [](auto& one, auto& two) {
  241. two.lock();
  242. one.lock();
  243. });
  244. }
  245. BENCHMARK_DRAW_LINE();
  246. BENCHMARK(ThreeThreadsPathologicalFollyLock, iters) {
  247. pathological(iters, [](auto& one, auto& two, auto& three) {
  248. folly::lock(one, two, three);
  249. });
  250. }
  251. BENCHMARK(ThreeThreadsPathologicalStdLock, iters) {
  252. pathological(iters, [](auto& one, auto& two, auto& three) {
  253. std::lock(one, two, three);
  254. });
  255. }
  256. BENCHMARK(ThreeThreadsPathologicalOrdered, iters) {
  257. pathological(iters, [](auto& one, auto& two, auto& three) {
  258. one.lock();
  259. two.lock();
  260. three.lock();
  261. });
  262. }
  263. BENCHMARK(ThreeThreadsPathologicalCarefullyOrdered, iters) {
  264. pathological(iters, [](auto& one, auto& two, auto& three) {
  265. two.lock();
  266. three.lock();
  267. one.lock();
  268. });
  269. }
  270. BENCHMARK_DRAW_LINE();
  271. BENCHMARK(TwoThreadsTwoMutexesOrdered, iters) {
  272. ordered(iters, 2, makeVector<std::mutex>(2));
  273. }
  274. BENCHMARK(TwoThreadsTwoMutexesSmart, iters) {
  275. smart(iters, 2, makeVector<std::mutex>(2));
  276. }
  277. BENCHMARK(TwoThreadsTwoMutexesPersistent, iters) {
  278. persistent(iters, 2, makeVector<std::mutex>(2));
  279. }
  280. BENCHMARK_DRAW_LINE();
  281. BENCHMARK(TwoThreadsFourMutexesOrdered, iters) {
  282. ordered(iters, 2, makeVector<std::mutex>(4));
  283. }
  284. BENCHMARK(TwoThreadsFourMutexesSmart, iters) {
  285. smart(iters, 2, makeVector<std::mutex>(4));
  286. }
  287. BENCHMARK(TwoThreadsFourMutexesPersistent, iters) {
  288. persistent(iters, 2, makeVector<std::mutex>(4));
  289. }
  290. BENCHMARK_DRAW_LINE();
  291. BENCHMARK(TwoThreadsEightMutexesOrdered, iters) {
  292. ordered(iters, 2, makeVector<std::mutex>(8));
  293. }
  294. BENCHMARK(TwoThreadsEightMutexesSmart, iters) {
  295. smart(iters, 2, makeVector<std::mutex>(8));
  296. }
  297. BENCHMARK(TwoThreadsEightMutexesPersistent, iters) {
  298. persistent(iters, 2, makeVector<std::mutex>(8));
  299. }
  300. BENCHMARK_DRAW_LINE();
  301. BENCHMARK(TwoThreadsSixteenMutexesOrdered, iters) {
  302. ordered(iters, 2, makeVector<std::mutex>(16));
  303. }
  304. BENCHMARK(TwoThreadsSixteenMutexesSmart, iters) {
  305. smart(iters, 2, makeVector<std::mutex>(16));
  306. }
  307. BENCHMARK(TwoThreadsSixteenMutexesPersistent, iters) {
  308. persistent(iters, 2, makeVector<std::mutex>(16));
  309. }
  310. BENCHMARK_DRAW_LINE();
  311. BENCHMARK(FourThreadsTwoMutexesOrdered, iters) {
  312. ordered(iters, 4, makeVector<std::mutex>(2));
  313. }
  314. BENCHMARK(FourThreadsTwoMutexesSmart, iters) {
  315. smart(iters, 4, makeVector<std::mutex>(2));
  316. }
  317. BENCHMARK(FourThreadsTwoMutexesPersistent, iters) {
  318. persistent(iters, 4, makeVector<std::mutex>(2));
  319. }
  320. BENCHMARK_DRAW_LINE();
  321. BENCHMARK(FourThreadsFourMutexesOrdered, iters) {
  322. ordered(iters, 4, makeVector<std::mutex>(4));
  323. }
  324. BENCHMARK(FourThreadsFourMutexesSmart, iters) {
  325. smart(iters, 4, makeVector<std::mutex>(4));
  326. }
  327. BENCHMARK(FourThreadsFourMutexesPersistent, iters) {
  328. persistent(iters, 4, makeVector<std::mutex>(4));
  329. }
  330. BENCHMARK_DRAW_LINE();
  331. BENCHMARK(FourThreadsEightMutexesOrdered, iters) {
  332. ordered(iters, 4, makeVector<std::mutex>(8));
  333. }
  334. BENCHMARK(FourThreadsEightMutexesSmart, iters) {
  335. smart(iters, 4, makeVector<std::mutex>(8));
  336. }
  337. BENCHMARK(FourThreadsEightMutexesPersistent, iters) {
  338. persistent(iters, 4, makeVector<std::mutex>(8));
  339. }
  340. BENCHMARK_DRAW_LINE();
  341. BENCHMARK(FourThreadsSixteenMutexesOrdered, iters) {
  342. ordered(iters, 4, makeVector<std::mutex>(16));
  343. }
  344. BENCHMARK(FourThreadsSixteenMutexesSmart, iters) {
  345. smart(iters, 4, makeVector<std::mutex>(16));
  346. }
  347. BENCHMARK(FourThreadsSixteenMutexesPersistent, iters) {
  348. persistent(iters, 4, makeVector<std::mutex>(16));
  349. }
  350. BENCHMARK_DRAW_LINE();
  351. BENCHMARK(EightThreadsTwoMutexesOrdered, iters) {
  352. ordered(iters, 8, makeVector<std::mutex>(2));
  353. }
  354. BENCHMARK(EightThreadsTwoMutexesSmart, iters) {
  355. smart(iters, 8, makeVector<std::mutex>(2));
  356. }
  357. BENCHMARK(EightThreadsTwoMutexesPersistent, iters) {
  358. persistent(iters, 8, makeVector<std::mutex>(2));
  359. }
  360. BENCHMARK_DRAW_LINE();
  361. BENCHMARK(EightThreadsFourMutexesOrdered, iters) {
  362. ordered(iters, 8, makeVector<std::mutex>(4));
  363. }
  364. BENCHMARK(EightThreadsFourMutexesSmart, iters) {
  365. smart(iters, 8, makeVector<std::mutex>(4));
  366. }
  367. BENCHMARK(EightThreadsFourMutexesPersistent, iters) {
  368. persistent(iters, 8, makeVector<std::mutex>(4));
  369. }
  370. BENCHMARK_DRAW_LINE();
  371. BENCHMARK(EightThreadsEightMutexesOrdered, iters) {
  372. ordered(iters, 8, makeVector<std::mutex>(8));
  373. }
  374. BENCHMARK(EightThreadsEightMutexesSmart, iters) {
  375. smart(iters, 8, makeVector<std::mutex>(8));
  376. }
  377. BENCHMARK(EightThreadsEightMutexesPersistent, iters) {
  378. persistent(iters, 8, makeVector<std::mutex>(8));
  379. }
  380. BENCHMARK_DRAW_LINE();
  381. BENCHMARK(EightThreadsSixteenMutexesOrdered, iters) {
  382. ordered(iters, 8, makeVector<std::mutex>(16));
  383. }
  384. BENCHMARK(EightThreadsSixteenMutexesSmart, iters) {
  385. smart(iters, 8, makeVector<std::mutex>(16));
  386. }
  387. BENCHMARK(EightThreadsSixteenMutexesPersistent, iters) {
  388. persistent(iters, 8, makeVector<std::mutex>(16));
  389. }
  390. BENCHMARK_DRAW_LINE();
  391. BENCHMARK(SixteenThreadsTwoMutexesOrdered, iters) {
  392. ordered(iters, 16, makeVector<std::mutex>(2));
  393. }
  394. BENCHMARK(SixteenThreadsTwoMutexesSmart, iters) {
  395. smart(iters, 16, makeVector<std::mutex>(2));
  396. }
  397. BENCHMARK(SixteenThreadsTwoMutexesPersistent, iters) {
  398. persistent(iters, 16, makeVector<std::mutex>(2));
  399. }
  400. BENCHMARK_DRAW_LINE();
  401. BENCHMARK(SixteenThreadsFourMutexesOrdered, iters) {
  402. ordered(iters, 16, makeVector<std::mutex>(4));
  403. }
  404. BENCHMARK(SixteenThreadsFourMutexesSmart, iters) {
  405. smart(iters, 16, makeVector<std::mutex>(4));
  406. }
  407. BENCHMARK(SixteenThreadsFourMutexesPersistent, iters) {
  408. persistent(iters, 16, makeVector<std::mutex>(4));
  409. }
  410. BENCHMARK_DRAW_LINE();
  411. BENCHMARK(SixteenThreadsEightMutexesOrdered, iters) {
  412. ordered(iters, 16, makeVector<std::mutex>(8));
  413. }
  414. BENCHMARK(SixteenThreadsEightMutexesSmart, iters) {
  415. smart(iters, 16, makeVector<std::mutex>(8));
  416. }
  417. BENCHMARK(SixteenThreadsEightMutexesPersistent, iters) {
  418. persistent(iters, 16, makeVector<std::mutex>(8));
  419. }
  420. BENCHMARK_DRAW_LINE();
  421. BENCHMARK(SixteenThreadsSixteenMutexesOrdered, iters) {
  422. ordered(iters, 16, makeVector<std::mutex>(16));
  423. }
  424. BENCHMARK(SixteenThreadsSixteenMutexesSmart, iters) {
  425. smart(iters, 16, makeVector<std::mutex>(16));
  426. }
  427. BENCHMARK(SixteenThreadsSixteenMutexesPersistent, iters) {
  428. persistent(iters, 16, makeVector<std::mutex>(16));
  429. }
  430. int main(int argc, char** argv) {
  431. gflags::ParseCommandLineFlags(&argc, &argv, true);
  432. folly::runBenchmarks();
  433. }
  434. namespace {
  435. std::pair<int, int> getMutexIndices(int threadId, int mutexListSize) {
  436. // assign two mutexes to the current thread, we need to prevent
  437. // deadlocks here by resorting the indexes in increasing order,
  438. // because a thread might pick the last mutex as the one it locks and
  439. // then adding one to that will make it wrap around, breaking the
  440. // ordering
  441. auto index = threadId % mutexListSize;
  442. auto firstMutexIndex = ((index + 1) == mutexListSize) ? 0 : index;
  443. auto secondMutexIndex =
  444. ((index + 1) == mutexListSize) ? (mutexListSize - 1) : (index + 1);
  445. return std::make_pair(firstMutexIndex, secondMutexIndex);
  446. }
  447. template <typename NonMovableType>
  448. std::vector<std::unique_ptr<NonMovableType>> makeVector(int num) {
  449. auto vector = std::vector<std::unique_ptr<NonMovableType>>{};
  450. vector.reserve(num);
  451. for (auto i = 0; i < num; ++i) {
  452. vector.push_back(std::make_unique<NonMovableType>());
  453. }
  454. return vector;
  455. }
  456. template <typename Mutex>
  457. void ordered(
  458. std::size_t iters,
  459. int numThreads,
  460. std::vector<std::unique_ptr<Mutex>> mutexes) {
  461. auto suspender = BenchmarkSuspender{};
  462. // Sort the mutexes so there is no deadlock because of lock acquisition
  463. // ordering
  464. std::sort(mutexes.begin(), mutexes.end(), [](auto& one, auto& two) {
  465. return one.get() < two.get();
  466. });
  467. auto threads = std::vector<std::thread>{};
  468. auto&& barrier = BenchmarkStartBarrier{numThreads};
  469. for (auto thread = 0; thread < numThreads; ++thread) {
  470. threads.emplace_back([&mutexes, iters, thread, &barrier] {
  471. barrier.wait();
  472. auto indices = getMutexIndices(thread, mutexes.size());
  473. for (auto i = std::size_t{0}; i < iters; ++i) {
  474. // lock the mutexes
  475. mutexes[indices.first]->lock();
  476. mutexes[indices.second]->lock();
  477. spin(FLAGS_iterations);
  478. mutexes[indices.first]->unlock();
  479. mutexes[indices.second]->unlock();
  480. }
  481. });
  482. }
  483. barrier.wait();
  484. suspender.dismissing([&] {
  485. for (auto& thread : threads) {
  486. thread.join();
  487. }
  488. });
  489. }
  490. template <typename Mutex>
  491. void smart(
  492. std::size_t iters,
  493. int numThreads,
  494. std::vector<std::unique_ptr<Mutex>> mutexes) {
  495. auto suspender = BenchmarkSuspender{};
  496. auto threads = std::vector<std::thread>{};
  497. auto&& barrier = BenchmarkStartBarrier{numThreads};
  498. for (auto thread = 0; thread < numThreads; ++thread) {
  499. threads.emplace_back([iters, &mutexes, thread, &barrier] {
  500. barrier.wait();
  501. auto indices = std::make_pair(
  502. thread % mutexes.size(), (thread + 1) % mutexes.size());
  503. for (auto iter = std::size_t{0}; iter < iters; ++iter) {
  504. while (true) {
  505. mutexes[indices.first]->lock();
  506. if (mutexes[indices.second]->try_lock()) {
  507. break;
  508. }
  509. mutexes[indices.first]->unlock();
  510. std::swap(indices.first, indices.second);
  511. std::this_thread::yield();
  512. }
  513. spin(FLAGS_iterations);
  514. mutexes[indices.first]->unlock();
  515. mutexes[indices.second]->unlock();
  516. }
  517. });
  518. }
  519. barrier.wait();
  520. suspender.dismissing([&] {
  521. for (auto& thread : threads) {
  522. thread.join();
  523. }
  524. });
  525. }
  526. template <typename Mutex>
  527. void persistent(
  528. std::size_t iters,
  529. int numThreads,
  530. std::vector<std::unique_ptr<Mutex>> mutexes) {
  531. auto suspender = BenchmarkSuspender{};
  532. auto threads = std::vector<std::thread>{};
  533. auto&& barrier = BenchmarkStartBarrier{numThreads};
  534. for (auto thread = 0; thread < numThreads; ++thread) {
  535. threads.emplace_back([iters, &mutexes, thread, &barrier] {
  536. barrier.wait();
  537. auto indices = std::make_pair(
  538. thread % mutexes.size(), (thread + 1) % mutexes.size());
  539. for (auto iter = std::size_t{0}; iter < iters; ++iter) {
  540. // lock the mutexes by first locking a mutex and then acquiring the
  541. // next mutex (or mutexes) with a try_lock()
  542. while (true) {
  543. mutexes[indices.first]->lock();
  544. if (mutexes[indices.second]->try_lock()) {
  545. break;
  546. }
  547. mutexes[indices.first]->unlock();
  548. }
  549. spin(FLAGS_iterations);
  550. mutexes[indices.first]->unlock();
  551. mutexes[indices.second]->unlock();
  552. }
  553. });
  554. }
  555. barrier.wait();
  556. suspender.dismissing([&] {
  557. for (auto& thread : threads) {
  558. thread.join();
  559. }
  560. });
  561. }
  562. template <typename LockingFunc>
  563. void simple(std::size_t iters, LockingFunc func) {
  564. auto&& suspender = BenchmarkSuspender{};
  565. auto&& one = std::mutex{};
  566. auto&& two = std::mutex{};
  567. auto&& barrier = BenchmarkStartBarrier{3};
  568. auto threadOne = std::thread{[&] {
  569. barrier.wait();
  570. for (auto i = std::size_t{0}; i < iters; ++i) {
  571. auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
  572. auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
  573. func(lckOne, lckTwo);
  574. spin(FLAGS_iterations);
  575. }
  576. }};
  577. auto threadTwo = std::thread{[&] {
  578. barrier.wait();
  579. for (auto i = std::size_t{0}; i < iters; ++i) {
  580. auto lck = std::unique_lock<std::mutex>{one};
  581. spin(FLAGS_iterations * FLAGS_iterations);
  582. }
  583. }};
  584. auto threadThree = std::thread{[&] {
  585. barrier.wait();
  586. for (auto i = std::size_t{0}; i < iters; ++i) {
  587. auto lck = std::unique_lock<std::mutex>{two};
  588. spin(FLAGS_iterations * FLAGS_iterations);
  589. }
  590. }};
  591. barrier.wait();
  592. suspender.dismissing([&] {
  593. threadOne.join();
  594. threadTwo.join();
  595. threadThree.join();
  596. });
  597. }
  598. template <typename LockingFunc>
  599. void pathological(std::size_t iters, LockingFunc func) {
  600. auto&& suspender = BenchmarkSuspender{};
  601. auto&& one = std::mutex{};
  602. auto&& two = std::mutex{};
  603. auto&& three = std::mutex{};
  604. auto&& barrier = BenchmarkStartBarrier{3};
  605. auto threadOne = std::thread{[&] {
  606. barrier.wait();
  607. for (auto i = std::size_t{0}; i < iters; ++i) {
  608. auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
  609. auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
  610. auto lckThree = std::unique_lock<std::mutex>{three, std::defer_lock};
  611. func(lckOne, lckTwo, lckThree);
  612. spin(FLAGS_iterations);
  613. }
  614. }};
  615. auto threadTwo = std::thread{[&] {
  616. barrier.wait();
  617. for (auto i = std::size_t{0}; i < iters; ++i) {
  618. auto lck = std::unique_lock<std::mutex>{one};
  619. spin(FLAGS_iterations * FLAGS_iterations);
  620. }
  621. }};
  622. auto threadThree = std::thread{[&] {
  623. barrier.wait();
  624. for (auto i = std::size_t{0}; i < iters; ++i) {
  625. auto lckTwo = std::unique_lock<std::mutex>{two};
  626. auto lckThree = std::unique_lock<std::mutex>{three};
  627. spin(FLAGS_iterations * FLAGS_iterations);
  628. }
  629. }};
  630. barrier.wait();
  631. suspender.dismissing([&] {
  632. threadOne.join();
  633. threadTwo.join();
  634. threadThree.join();
  635. });
  636. }
  637. template <typename LockingFunc>
  638. void uncontended(std::size_t iters, LockingFunc func) {
  639. auto&& suspender = BenchmarkSuspender{};
  640. auto&& one = std::mutex{};
  641. auto&& two = std::mutex{};
  642. suspender.dismissing([&] {
  643. for (auto i = std::size_t{0}; i < iters; ++i) {
  644. func(one, two);
  645. spin(FLAGS_iterations);
  646. one.unlock();
  647. two.unlock();
  648. }
  649. });
  650. }
  651. void spin(int iterations) {
  652. for (auto i = 0; i < iterations; ++i) {
  653. doNotOptimizeAway(i);
  654. }
  655. }
  656. } // namespace