ParkingLotBenchmark.cpp 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238
  1. /*
  2. * Copyright 2018-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <thread>
  17. #include <folly/synchronization/ParkingLot.h>
  18. #include <folly/Benchmark.h>
  19. #include <folly/detail/Futex.h>
  20. #include <folly/synchronization/Baton.h>
  21. #include <folly/synchronization/test/Barrier.h>
  22. DEFINE_uint64(threads, 32, "Number of threads for benchmark");
  23. using namespace folly;
  24. using namespace folly::test;
  25. ParkingLot<> lot;
  26. BENCHMARK(FutexNoWaitersWake, iters) {
  27. BenchmarkSuspender susp;
  28. folly::detail::Futex<> fu;
  29. Barrier b(FLAGS_threads + 1);
  30. std::vector<std::thread> threads{FLAGS_threads};
  31. for (auto& t : threads) {
  32. t = std::thread([&]() {
  33. b.wait();
  34. for (auto i = 0u; i < iters; i++) {
  35. detail::futexWake(&fu, 1);
  36. }
  37. });
  38. }
  39. susp.dismiss();
  40. b.wait();
  41. for (auto& t : threads) {
  42. t.join();
  43. }
  44. }
  45. BENCHMARK_RELATIVE(ParkingLotNoWaitersWake, iters) {
  46. BenchmarkSuspender susp;
  47. Barrier b(FLAGS_threads + 1);
  48. std::vector<std::thread> threads{FLAGS_threads};
  49. for (auto& t : threads) {
  50. t = std::thread([&]() {
  51. b.wait();
  52. for (auto i = 0u; i < iters; i++) {
  53. lot.unpark(&lot, [](Unit) { return UnparkControl::RetainContinue; });
  54. }
  55. });
  56. }
  57. susp.dismiss();
  58. b.wait();
  59. for (auto& t : threads) {
  60. t.join();
  61. }
  62. }
  63. BENCHMARK(FutexWakeOne, iters) {
  64. BenchmarkSuspender susp;
  65. folly::detail::Futex<> fu;
  66. Barrier b(FLAGS_threads + 1);
  67. std::vector<std::thread> threads{FLAGS_threads};
  68. for (auto& t : threads) {
  69. t = std::thread([&]() {
  70. b.wait();
  71. while (true) {
  72. detail::futexWait(&fu, 0);
  73. if (fu.load(std::memory_order_relaxed)) {
  74. return;
  75. }
  76. }
  77. });
  78. }
  79. susp.dismiss();
  80. b.wait();
  81. for (auto i = 0u; i < iters; i++) {
  82. detail::futexWake(&fu, 1);
  83. }
  84. fu.store(1);
  85. detail::futexWake(&fu, threads.size());
  86. for (auto& t : threads) {
  87. t.join();
  88. }
  89. }
  90. BENCHMARK_RELATIVE(ParkingLotWakeOne, iters) {
  91. BenchmarkSuspender susp;
  92. std::atomic<bool> done{false};
  93. Barrier b(FLAGS_threads + 1);
  94. std::vector<std::thread> threads{FLAGS_threads};
  95. for (auto& t : threads) {
  96. t = std::thread([&]() {
  97. b.wait();
  98. while (true) {
  99. Unit f;
  100. lot.park(
  101. &done,
  102. f,
  103. [&] { return done.load(std::memory_order_relaxed) == 0; },
  104. [] {});
  105. if (done.load(std::memory_order_relaxed)) {
  106. return;
  107. }
  108. }
  109. });
  110. }
  111. susp.dismiss();
  112. b.wait();
  113. for (auto i = 0u; i < iters; i++) {
  114. lot.unpark(&done, [](Unit) { return UnparkControl::RemoveBreak; });
  115. }
  116. done = true;
  117. lot.unpark(&done, [](Unit) { return UnparkControl::RemoveContinue; });
  118. for (auto& t : threads) {
  119. t.join();
  120. }
  121. }
  122. BENCHMARK(FutexWakeAll, iters) {
  123. BenchmarkSuspender susp;
  124. Barrier b(FLAGS_threads + 1);
  125. folly::detail::Futex<> fu;
  126. std::atomic<bool> done{false};
  127. std::vector<std::thread> threads{FLAGS_threads};
  128. for (auto& t : threads) {
  129. t = std::thread([&]() {
  130. b.wait();
  131. while (true) {
  132. detail::futexWait(&fu, 0);
  133. if (done.load(std::memory_order_relaxed)) {
  134. return;
  135. }
  136. }
  137. });
  138. }
  139. susp.dismiss();
  140. b.wait();
  141. for (auto i = 0u; i < iters; i++) {
  142. detail::futexWake(&fu, threads.size());
  143. }
  144. fu.store(1);
  145. done = true;
  146. detail::futexWake(&fu, threads.size());
  147. for (auto& t : threads) {
  148. t.join();
  149. }
  150. }
  151. BENCHMARK_RELATIVE(ParkingLotWakeAll, iters) {
  152. BenchmarkSuspender susp;
  153. Barrier b(FLAGS_threads + 1);
  154. std::atomic<bool> done{false};
  155. std::vector<std::thread> threads{FLAGS_threads};
  156. for (auto& t : threads) {
  157. t = std::thread([&]() {
  158. b.wait();
  159. while (true) {
  160. Unit f;
  161. lot.park(
  162. &done,
  163. f,
  164. [&] { return done.load(std::memory_order_relaxed) == 0; },
  165. [] {});
  166. if (done.load(std::memory_order_relaxed)) {
  167. return;
  168. }
  169. }
  170. });
  171. }
  172. susp.dismiss();
  173. b.wait();
  174. for (auto i = 0u; i < iters; i++) {
  175. lot.unpark(&done, [](Unit) { return UnparkControl::RemoveContinue; });
  176. }
  177. done = true;
  178. lot.unpark(&done, [](Unit) { return UnparkControl::RemoveContinue; });
  179. for (auto& t : threads) {
  180. t.join();
  181. }
  182. }
  183. int main(int argc, char** argv) {
  184. gflags::ParseCommandLineFlags(&argc, &argv, true);
  185. folly::runBenchmarks();
  186. }
  187. /*
  188. ./buck-out/gen/folly/synchronization/test/parking_lot_test --benchmark
  189. --bm_min_iters=10000 --threads=4
  190. ============================================================================
  191. folly/synchronization/test/ParkingLotBenchmark.cpprelative time/iter iters/s
  192. ============================================================================
  193. FutexNoWaitersWake 163.43ns 6.12M
  194. ParkingLotNoWaitersWake 29.64% 551.43ns 1.81M
  195. FutexWakeOne 156.78ns 6.38M
  196. ParkingLotWakeOne 37.49% 418.21ns 2.39M
  197. FutexWakeAll 1.82us 549.52K
  198. ParkingLotWakeAll 449.63% 404.73ns 2.47M
  199. ============================================================================
  200. ./buck-out/gen/folly/synchronization/test/parking_lot_test --benchmark
  201. --bm_min_iters=10000 --threads=32
  202. ============================================================================
  203. folly/synchronization/test/ParkingLotBenchmark.cpprelative time/iter iters/s
  204. ============================================================================
  205. FutexNoWaitersWake 379.59ns 2.63M
  206. ParkingLotNoWaitersWake 7.94% 4.78us 209.08K
  207. FutexWakeOne 163.59ns 6.11M
  208. ParkingLotWakeOne 6.41% 2.55us 392.07K
  209. FutexWakeAll 12.46us 80.27K
  210. ParkingLotWakeAll 784.76% 1.59us 629.92K
  211. ============================================================================ */