123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773 |
- /*
- * Copyright 2017-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #include <folly/Synchronized.h>
- #include <folly/Benchmark.h>
- #include <folly/portability/GTest.h>
- #include <algorithm>
- #include <condition_variable>
- #include <map>
- #include <memory>
- #include <mutex>
- #include <shared_mutex>
- #include <thread>
- using namespace folly;
- using namespace folly::detail;
- DEFINE_uint64(iterations, 100, "The number of iterations with lock held");
- /**
- * Benchmarks:
- *
- * lock(folly::wlock(one), folly::rlock(two));
- * ------------
- * Three deadlock avoidance algorithms have been tested here with threads
- * locking a subset of the set of mutexes. Two of the relevant algorithms and
- * their descriptions can be found here -
- * http://howardhinnant.github.io/dining_philosophers.html. The ones tested
- * from that link are the ones named "Smart" and "Persistent". The third one
- * tested is the standard ordered algorithm which locks mutexes in order of
- * their addresses.
- *
- * The benchmarks show that the "smart" algorithm almost always has better
- * performance under contention when acquiring more than one mutex. Even in
- * simple cases. In uncontended cases, all the algorithms perform the same
- *
- * ============================================================================
- * folly/test/SynchronizedBenchmark.cpp relative time/iter iters/s
- * ============================================================================
- * UncontendedFollyLock 66.53ns 15.03M
- * UncontendedStdLock 68.72ns 14.55M
- * UncontendedOrdered 64.41ns 15.53M
- * UncontendedReverseOrdered 64.40ns 15.53M
- * ----------------------------------------------------------------------------
- * ThreeThreadsSimpleFollyLock 4.14us 241.57K
- * ThreeThreadsSimpleStdLock 5.17us 193.47K
- * ThreeThreadsSimpleOrdered 6.34us 157.81K
- * ThreeThreadsSimpleCarefullyOrdered 6.27us 159.47K
- * ----------------------------------------------------------------------------
- * ThreeThreadsPathologicalFollyLock 3.81us 262.24K
- * ThreeThreadsPathologicalStdLock 5.34us 187.28K
- * ThreeThreadsPathologicalOrdered 6.36us 157.28K
- * ThreeThreadsPathologicalCarefullyOrdered 4.21us 237.29K
- * ----------------------------------------------------------------------------
- * TwoThreadsTwoMutexesOrdered 260.87ns 3.83M
- * TwoThreadsTwoMutexesSmart 161.28ns 6.20M
- * TwoThreadsTwoMutexesPersistent 226.25ns 4.42M
- * ----------------------------------------------------------------------------
- * TwoThreadsFourMutexesOrdered 196.01ns 5.10M
- * TwoThreadsFourMutexesSmart 196.73ns 5.08M
- * TwoThreadsFourMutexesPersistent 201.70ns 4.96M
- * ----------------------------------------------------------------------------
- * TwoThreadsEightMutexesOrdered 195.76ns 5.11M
- * TwoThreadsEightMutexesSmart 187.90ns 5.32M
- * TwoThreadsEightMutexesPersistent 199.21ns 5.02M
- * ----------------------------------------------------------------------------
- * TwoThreadsSixteenMutexesOrdered 203.91ns 4.90M
- * TwoThreadsSixteenMutexesSmart 196.30ns 5.09M
- * TwoThreadsSixteenMutexesPersistent 230.64ns 4.34M
- * ----------------------------------------------------------------------------
- * FourThreadsTwoMutexesOrdered 814.98ns 1.23M
- * FourThreadsTwoMutexesSmart 559.79ns 1.79M
- * FourThreadsTwoMutexesPersistent 520.90ns 1.92M
- * ----------------------------------------------------------------------------
- * FourThreadsFourMutexesOrdered 456.04ns 2.19M
- * FourThreadsFourMutexesSmart 391.69ns 2.55M
- * FourThreadsFourMutexesPersistent 414.56ns 2.41M
- * ----------------------------------------------------------------------------
- * FourThreadsEightMutexesOrdered 392.20ns 2.55M
- * FourThreadsEightMutexesSmart 277.89ns 3.60M
- * FourThreadsEightMutexesPersistent 301.98ns 3.31M
- * ----------------------------------------------------------------------------
- * FourThreadsSixteenMutexesOrdered 356.36ns 2.81M
- * FourThreadsSixteenMutexesSmart 291.40ns 3.43M
- * FourThreadsSixteenMutexesPersistent 292.23ns 3.42M
- * ----------------------------------------------------------------------------
- * EightThreadsTwoMutexesOrdered 1.58us 634.85K
- * EightThreadsTwoMutexesSmart 1.58us 634.85K
- * EightThreadsTwoMutexesPersistent 1.56us 639.93K
- * ----------------------------------------------------------------------------
- * EightThreadsFourMutexesOrdered 1.33us 753.45K
- * EightThreadsFourMutexesSmart 794.36ns 936.34K
- * EightThreadsFourMutexesPersistent 831.68ns 1.21M
- * ----------------------------------------------------------------------------
- * EightThreadsEightMutexesOrdered 791.52ns 1.26M
- * EightThreadsEightMutexesSmart 548.05ns 1.51M
- * EightThreadsEightMutexesPersistent 563.14ns 2.78M
- * ----------------------------------------------------------------------------
- * EightThreadsSixteenMutexesOrdered 785.40ns 2.11M
- * EightThreadsSixteenMutexesSmart 541.27ns 1.60M
- * EightThreadsSixteenMutexesPersistent 673.49ns 1.79M
- * ----------------------------------------------------------------------------
- * SixteenThreadsTwoMutexesOrdered 1.98us 505.83K
- * SixteenThreadsTwoMutexesSmart 1.85us 541.06K
- * SixteenThreadsTwoMutexesPersistent 3.13us 319.53K
- * ----------------------------------------------------------------------------
- * SixteenThreadsFourMutexesOrdered 2.46us 407.07K
- * SixteenThreadsFourMutexesSmart 1.68us 594.47K
- * SixteenThreadsFourMutexesPersistent 1.62us 617.22K
- * ----------------------------------------------------------------------------
- * SixteenThreadsEightMutexesOrdered 1.67us 597.45K
- * SixteenThreadsEightMutexesSmart 1.62us 616.83K
- * SixteenThreadsEightMutexesPersistent 1.57us 637.50K
- * ----------------------------------------------------------------------------
- * SixteenThreadsSixteenMutexesOrdered 1.20us 829.93K
- * SixteenThreadsSixteenMutexesSmart 1.32us 757.03K
- * SixteenThreadsSixteenMutexesPersistent 1.38us 726.75K
- * ============================================================================
- */
- namespace {
- template <typename NonMovableType>
- std::vector<std::unique_ptr<NonMovableType>> makeVector(int num);
- /**
- * Spin n times
- */
- void spin(int n);
- /**
- * Generic tests for three deadlock avoidance algorithms
- */
- template <typename Mutex>
- void ordered(
- std::size_t iters,
- int threads,
- std::vector<std::unique_ptr<Mutex>> mutexes);
- template <typename Mutex>
- void smart(
- std::size_t iters,
- int threads,
- std::vector<std::unique_ptr<Mutex>> mutexes);
- template <typename Mutex>
- void persistent(
- std::size_t iters,
- int threads,
- std::vector<std::unique_ptr<Mutex>> mutexes);
- /**
- * Pathological case where the current folly::lock algorithm performs up to 2x
- * better than simple ordered locking
- *
- * This test degrades to pathological performance if the order of mutex
- * locking is not picked with care. The thread that locks mutexes in a
- * deadlock avoiding manner is not doing much work. And acquiring the locks
- * in order blocks another thread completely because it waits for a third
- * thread to finish processing before it can do its thing and let the second
- * thread proceed, effectively limiting the concurrency of the three threads
- *
- * The non-obvious resolution for the performance pathology is to change the
- * order of lock acquisition
- */
- template <typename LockingFunc>
- void pathological(std::size_t iters, LockingFunc func);
- /**
- * Simple mutex acquisition, in this test one thread acquires two mutexes does
- * very little work and releases the mutexes, while two other threads are
- * trying to acquire the mutexes independently and doing work
- */
- template <typename LockingFunc>
- void simple(std::size_t iters, LockingFunc func);
- /**
- * Simple uncontended mutex acquisition
- */
- template <typename LockingFunc>
- void uncontended(std::size_t iters, LockingFunc func);
- /**
- * Help start tests only when the given number of threads have hit the start
- * line
- */
- class BenchmarkStartBarrier {
- public:
- explicit BenchmarkStartBarrier(int threads) : threads_{threads + 1} {}
- void wait() {
- auto lck = std::unique_lock<std::mutex>{mutex_};
- ++started_;
- // if all the threads have started the benchmarks
- if (started_ == threads_) {
- cv_.notify_all();
- return;
- }
- // wait till all the threads have started
- while (started_ != threads_) {
- cv_.wait(lck);
- }
- }
- std::mutex mutex_;
- std::condition_variable cv_;
- const int threads_;
- int started_{0};
- };
- } // namespace
- BENCHMARK(UncontendedFollyLock, iters) {
- uncontended(iters, [](auto& one, auto& two) { folly::lock(one, two); });
- }
- BENCHMARK(UncontendedStdLock, iters) {
- uncontended(iters, [](auto& one, auto& two) { std::lock(one, two); });
- }
- BENCHMARK(UncontendedOrdered, iters) {
- uncontended(iters, [](auto& one, auto& two) {
- one.lock();
- two.lock();
- });
- }
- BENCHMARK(UncontendedReverseOrdered, iters) {
- uncontended(iters, [](auto& one, auto& two) {
- two.lock();
- one.lock();
- });
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(ThreeThreadsSimpleFollyLock, iters) {
- simple(iters, [](auto& one, auto& two) { folly::lock(one, two); });
- }
- BENCHMARK(ThreeThreadsSimpleStdLock, iters) {
- simple(iters, [](auto& one, auto& two) { std::lock(one, two); });
- }
- BENCHMARK(ThreeThreadsSimpleOrdered, iters) {
- simple(iters, [](auto& one, auto& two) {
- one.lock();
- two.lock();
- });
- }
- BENCHMARK(ThreeThreadsSimpleReverseOrdered, iters) {
- simple(iters, [](auto& one, auto& two) {
- two.lock();
- one.lock();
- });
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(ThreeThreadsPathologicalFollyLock, iters) {
- pathological(iters, [](auto& one, auto& two, auto& three) {
- folly::lock(one, two, three);
- });
- }
- BENCHMARK(ThreeThreadsPathologicalStdLock, iters) {
- pathological(iters, [](auto& one, auto& two, auto& three) {
- std::lock(one, two, three);
- });
- }
- BENCHMARK(ThreeThreadsPathologicalOrdered, iters) {
- pathological(iters, [](auto& one, auto& two, auto& three) {
- one.lock();
- two.lock();
- three.lock();
- });
- }
- BENCHMARK(ThreeThreadsPathologicalCarefullyOrdered, iters) {
- pathological(iters, [](auto& one, auto& two, auto& three) {
- two.lock();
- three.lock();
- one.lock();
- });
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(TwoThreadsTwoMutexesOrdered, iters) {
- ordered(iters, 2, makeVector<std::mutex>(2));
- }
- BENCHMARK(TwoThreadsTwoMutexesSmart, iters) {
- smart(iters, 2, makeVector<std::mutex>(2));
- }
- BENCHMARK(TwoThreadsTwoMutexesPersistent, iters) {
- persistent(iters, 2, makeVector<std::mutex>(2));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(TwoThreadsFourMutexesOrdered, iters) {
- ordered(iters, 2, makeVector<std::mutex>(4));
- }
- BENCHMARK(TwoThreadsFourMutexesSmart, iters) {
- smart(iters, 2, makeVector<std::mutex>(4));
- }
- BENCHMARK(TwoThreadsFourMutexesPersistent, iters) {
- persistent(iters, 2, makeVector<std::mutex>(4));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(TwoThreadsEightMutexesOrdered, iters) {
- ordered(iters, 2, makeVector<std::mutex>(8));
- }
- BENCHMARK(TwoThreadsEightMutexesSmart, iters) {
- smart(iters, 2, makeVector<std::mutex>(8));
- }
- BENCHMARK(TwoThreadsEightMutexesPersistent, iters) {
- persistent(iters, 2, makeVector<std::mutex>(8));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(TwoThreadsSixteenMutexesOrdered, iters) {
- ordered(iters, 2, makeVector<std::mutex>(16));
- }
- BENCHMARK(TwoThreadsSixteenMutexesSmart, iters) {
- smart(iters, 2, makeVector<std::mutex>(16));
- }
- BENCHMARK(TwoThreadsSixteenMutexesPersistent, iters) {
- persistent(iters, 2, makeVector<std::mutex>(16));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(FourThreadsTwoMutexesOrdered, iters) {
- ordered(iters, 4, makeVector<std::mutex>(2));
- }
- BENCHMARK(FourThreadsTwoMutexesSmart, iters) {
- smart(iters, 4, makeVector<std::mutex>(2));
- }
- BENCHMARK(FourThreadsTwoMutexesPersistent, iters) {
- persistent(iters, 4, makeVector<std::mutex>(2));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(FourThreadsFourMutexesOrdered, iters) {
- ordered(iters, 4, makeVector<std::mutex>(4));
- }
- BENCHMARK(FourThreadsFourMutexesSmart, iters) {
- smart(iters, 4, makeVector<std::mutex>(4));
- }
- BENCHMARK(FourThreadsFourMutexesPersistent, iters) {
- persistent(iters, 4, makeVector<std::mutex>(4));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(FourThreadsEightMutexesOrdered, iters) {
- ordered(iters, 4, makeVector<std::mutex>(8));
- }
- BENCHMARK(FourThreadsEightMutexesSmart, iters) {
- smart(iters, 4, makeVector<std::mutex>(8));
- }
- BENCHMARK(FourThreadsEightMutexesPersistent, iters) {
- persistent(iters, 4, makeVector<std::mutex>(8));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(FourThreadsSixteenMutexesOrdered, iters) {
- ordered(iters, 4, makeVector<std::mutex>(16));
- }
- BENCHMARK(FourThreadsSixteenMutexesSmart, iters) {
- smart(iters, 4, makeVector<std::mutex>(16));
- }
- BENCHMARK(FourThreadsSixteenMutexesPersistent, iters) {
- persistent(iters, 4, makeVector<std::mutex>(16));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(EightThreadsTwoMutexesOrdered, iters) {
- ordered(iters, 8, makeVector<std::mutex>(2));
- }
- BENCHMARK(EightThreadsTwoMutexesSmart, iters) {
- smart(iters, 8, makeVector<std::mutex>(2));
- }
- BENCHMARK(EightThreadsTwoMutexesPersistent, iters) {
- persistent(iters, 8, makeVector<std::mutex>(2));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(EightThreadsFourMutexesOrdered, iters) {
- ordered(iters, 8, makeVector<std::mutex>(4));
- }
- BENCHMARK(EightThreadsFourMutexesSmart, iters) {
- smart(iters, 8, makeVector<std::mutex>(4));
- }
- BENCHMARK(EightThreadsFourMutexesPersistent, iters) {
- persistent(iters, 8, makeVector<std::mutex>(4));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(EightThreadsEightMutexesOrdered, iters) {
- ordered(iters, 8, makeVector<std::mutex>(8));
- }
- BENCHMARK(EightThreadsEightMutexesSmart, iters) {
- smart(iters, 8, makeVector<std::mutex>(8));
- }
- BENCHMARK(EightThreadsEightMutexesPersistent, iters) {
- persistent(iters, 8, makeVector<std::mutex>(8));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(EightThreadsSixteenMutexesOrdered, iters) {
- ordered(iters, 8, makeVector<std::mutex>(16));
- }
- BENCHMARK(EightThreadsSixteenMutexesSmart, iters) {
- smart(iters, 8, makeVector<std::mutex>(16));
- }
- BENCHMARK(EightThreadsSixteenMutexesPersistent, iters) {
- persistent(iters, 8, makeVector<std::mutex>(16));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(SixteenThreadsTwoMutexesOrdered, iters) {
- ordered(iters, 16, makeVector<std::mutex>(2));
- }
- BENCHMARK(SixteenThreadsTwoMutexesSmart, iters) {
- smart(iters, 16, makeVector<std::mutex>(2));
- }
- BENCHMARK(SixteenThreadsTwoMutexesPersistent, iters) {
- persistent(iters, 16, makeVector<std::mutex>(2));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(SixteenThreadsFourMutexesOrdered, iters) {
- ordered(iters, 16, makeVector<std::mutex>(4));
- }
- BENCHMARK(SixteenThreadsFourMutexesSmart, iters) {
- smart(iters, 16, makeVector<std::mutex>(4));
- }
- BENCHMARK(SixteenThreadsFourMutexesPersistent, iters) {
- persistent(iters, 16, makeVector<std::mutex>(4));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(SixteenThreadsEightMutexesOrdered, iters) {
- ordered(iters, 16, makeVector<std::mutex>(8));
- }
- BENCHMARK(SixteenThreadsEightMutexesSmart, iters) {
- smart(iters, 16, makeVector<std::mutex>(8));
- }
- BENCHMARK(SixteenThreadsEightMutexesPersistent, iters) {
- persistent(iters, 16, makeVector<std::mutex>(8));
- }
- BENCHMARK_DRAW_LINE();
- BENCHMARK(SixteenThreadsSixteenMutexesOrdered, iters) {
- ordered(iters, 16, makeVector<std::mutex>(16));
- }
- BENCHMARK(SixteenThreadsSixteenMutexesSmart, iters) {
- smart(iters, 16, makeVector<std::mutex>(16));
- }
- BENCHMARK(SixteenThreadsSixteenMutexesPersistent, iters) {
- persistent(iters, 16, makeVector<std::mutex>(16));
- }
- int main(int argc, char** argv) {
- gflags::ParseCommandLineFlags(&argc, &argv, true);
- folly::runBenchmarks();
- }
- namespace {
- std::pair<int, int> getMutexIndices(int threadId, int mutexListSize) {
- // assign two mutexes to the current thread, we need to prevent
- // deadlocks here by resorting the indexes in increasing order,
- // because a thread might pick the last mutex as the one it locks and
- // then adding one to that will make it wrap around, breaking the
- // ordering
- auto index = threadId % mutexListSize;
- auto firstMutexIndex = ((index + 1) == mutexListSize) ? 0 : index;
- auto secondMutexIndex =
- ((index + 1) == mutexListSize) ? (mutexListSize - 1) : (index + 1);
- return std::make_pair(firstMutexIndex, secondMutexIndex);
- }
- template <typename NonMovableType>
- std::vector<std::unique_ptr<NonMovableType>> makeVector(int num) {
- auto vector = std::vector<std::unique_ptr<NonMovableType>>{};
- vector.reserve(num);
- for (auto i = 0; i < num; ++i) {
- vector.push_back(std::make_unique<NonMovableType>());
- }
- return vector;
- }
- template <typename Mutex>
- void ordered(
- std::size_t iters,
- int numThreads,
- std::vector<std::unique_ptr<Mutex>> mutexes) {
- auto suspender = BenchmarkSuspender{};
- // Sort the mutexes so there is no deadlock because of lock acquisition
- // ordering
- std::sort(mutexes.begin(), mutexes.end(), [](auto& one, auto& two) {
- return one.get() < two.get();
- });
- auto threads = std::vector<std::thread>{};
- auto&& barrier = BenchmarkStartBarrier{numThreads};
- for (auto thread = 0; thread < numThreads; ++thread) {
- threads.emplace_back([&mutexes, iters, thread, &barrier] {
- barrier.wait();
- auto indices = getMutexIndices(thread, mutexes.size());
- for (auto i = std::size_t{0}; i < iters; ++i) {
- // lock the mutexes
- mutexes[indices.first]->lock();
- mutexes[indices.second]->lock();
- spin(FLAGS_iterations);
- mutexes[indices.first]->unlock();
- mutexes[indices.second]->unlock();
- }
- });
- }
- barrier.wait();
- suspender.dismissing([&] {
- for (auto& thread : threads) {
- thread.join();
- }
- });
- }
- template <typename Mutex>
- void smart(
- std::size_t iters,
- int numThreads,
- std::vector<std::unique_ptr<Mutex>> mutexes) {
- auto suspender = BenchmarkSuspender{};
- auto threads = std::vector<std::thread>{};
- auto&& barrier = BenchmarkStartBarrier{numThreads};
- for (auto thread = 0; thread < numThreads; ++thread) {
- threads.emplace_back([iters, &mutexes, thread, &barrier] {
- barrier.wait();
- auto indices = std::make_pair(
- thread % mutexes.size(), (thread + 1) % mutexes.size());
- for (auto iter = std::size_t{0}; iter < iters; ++iter) {
- while (true) {
- mutexes[indices.first]->lock();
- if (mutexes[indices.second]->try_lock()) {
- break;
- }
- mutexes[indices.first]->unlock();
- std::swap(indices.first, indices.second);
- std::this_thread::yield();
- }
- spin(FLAGS_iterations);
- mutexes[indices.first]->unlock();
- mutexes[indices.second]->unlock();
- }
- });
- }
- barrier.wait();
- suspender.dismissing([&] {
- for (auto& thread : threads) {
- thread.join();
- }
- });
- }
- template <typename Mutex>
- void persistent(
- std::size_t iters,
- int numThreads,
- std::vector<std::unique_ptr<Mutex>> mutexes) {
- auto suspender = BenchmarkSuspender{};
- auto threads = std::vector<std::thread>{};
- auto&& barrier = BenchmarkStartBarrier{numThreads};
- for (auto thread = 0; thread < numThreads; ++thread) {
- threads.emplace_back([iters, &mutexes, thread, &barrier] {
- barrier.wait();
- auto indices = std::make_pair(
- thread % mutexes.size(), (thread + 1) % mutexes.size());
- for (auto iter = std::size_t{0}; iter < iters; ++iter) {
- // lock the mutexes by first locking a mutex and then acquiring the
- // next mutex (or mutexes) with a try_lock()
- while (true) {
- mutexes[indices.first]->lock();
- if (mutexes[indices.second]->try_lock()) {
- break;
- }
- mutexes[indices.first]->unlock();
- }
- spin(FLAGS_iterations);
- mutexes[indices.first]->unlock();
- mutexes[indices.second]->unlock();
- }
- });
- }
- barrier.wait();
- suspender.dismissing([&] {
- for (auto& thread : threads) {
- thread.join();
- }
- });
- }
- template <typename LockingFunc>
- void simple(std::size_t iters, LockingFunc func) {
- auto&& suspender = BenchmarkSuspender{};
- auto&& one = std::mutex{};
- auto&& two = std::mutex{};
- auto&& barrier = BenchmarkStartBarrier{3};
- auto threadOne = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
- auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
- func(lckOne, lckTwo);
- spin(FLAGS_iterations);
- }
- }};
- auto threadTwo = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lck = std::unique_lock<std::mutex>{one};
- spin(FLAGS_iterations * FLAGS_iterations);
- }
- }};
- auto threadThree = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lck = std::unique_lock<std::mutex>{two};
- spin(FLAGS_iterations * FLAGS_iterations);
- }
- }};
- barrier.wait();
- suspender.dismissing([&] {
- threadOne.join();
- threadTwo.join();
- threadThree.join();
- });
- }
- template <typename LockingFunc>
- void pathological(std::size_t iters, LockingFunc func) {
- auto&& suspender = BenchmarkSuspender{};
- auto&& one = std::mutex{};
- auto&& two = std::mutex{};
- auto&& three = std::mutex{};
- auto&& barrier = BenchmarkStartBarrier{3};
- auto threadOne = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lckOne = std::unique_lock<std::mutex>{one, std::defer_lock};
- auto lckTwo = std::unique_lock<std::mutex>{two, std::defer_lock};
- auto lckThree = std::unique_lock<std::mutex>{three, std::defer_lock};
- func(lckOne, lckTwo, lckThree);
- spin(FLAGS_iterations);
- }
- }};
- auto threadTwo = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lck = std::unique_lock<std::mutex>{one};
- spin(FLAGS_iterations * FLAGS_iterations);
- }
- }};
- auto threadThree = std::thread{[&] {
- barrier.wait();
- for (auto i = std::size_t{0}; i < iters; ++i) {
- auto lckTwo = std::unique_lock<std::mutex>{two};
- auto lckThree = std::unique_lock<std::mutex>{three};
- spin(FLAGS_iterations * FLAGS_iterations);
- }
- }};
- barrier.wait();
- suspender.dismissing([&] {
- threadOne.join();
- threadTwo.join();
- threadThree.join();
- });
- }
- template <typename LockingFunc>
- void uncontended(std::size_t iters, LockingFunc func) {
- auto&& suspender = BenchmarkSuspender{};
- auto&& one = std::mutex{};
- auto&& two = std::mutex{};
- suspender.dismissing([&] {
- for (auto i = std::size_t{0}; i < iters; ++i) {
- func(one, two);
- spin(FLAGS_iterations);
- one.unlock();
- two.unlock();
- }
- });
- }
- void spin(int iterations) {
- for (auto i = 0; i < iterations; ++i) {
- doNotOptimizeAway(i);
- }
- }
- } // namespace
|