AtomicUnorderedMapTest.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/AtomicUnorderedMap.h>
  17. #include <memory>
  18. #include <thread>
  19. #include <unordered_map>
  20. #include <folly/Benchmark.h>
  21. #include <folly/portability/GFlags.h>
  22. #include <folly/portability/GTest.h>
  23. #include <folly/portability/Semaphore.h>
  24. #include <folly/test/DeterministicSchedule.h>
  25. using namespace folly;
  26. using namespace folly::test;
  27. template <class T>
  28. struct non_atomic {
  29. T value;
  30. non_atomic() = default;
  31. non_atomic(const non_atomic&) = delete;
  32. constexpr /* implicit */ non_atomic(T desired) : value(desired) {}
  33. T operator+=(T arg) {
  34. value += arg;
  35. return load();
  36. }
  37. T load(std::memory_order /* order */ = std::memory_order_seq_cst) const {
  38. return value;
  39. }
  40. /* implicit */
  41. operator T() const {
  42. return load();
  43. }
  44. void store(
  45. T desired,
  46. std::memory_order /* order */ = std::memory_order_seq_cst) {
  47. value = desired;
  48. }
  49. T exchange(
  50. T desired,
  51. std::memory_order /* order */ = std::memory_order_seq_cst) {
  52. T old = load();
  53. store(desired);
  54. return old;
  55. }
  56. bool compare_exchange_weak(
  57. T& expected,
  58. T desired,
  59. std::memory_order /* success */ = std::memory_order_seq_cst,
  60. std::memory_order /* failure */ = std::memory_order_seq_cst) {
  61. if (value == expected) {
  62. value = desired;
  63. return true;
  64. }
  65. expected = value;
  66. return false;
  67. }
  68. bool compare_exchange_strong(
  69. T& expected,
  70. T desired,
  71. std::memory_order /* success */ = std::memory_order_seq_cst,
  72. std::memory_order /* failure */ = std::memory_order_seq_cst) {
  73. if (value == expected) {
  74. value = desired;
  75. return true;
  76. }
  77. expected = value;
  78. return false;
  79. }
  80. bool is_lock_free() const {
  81. return true;
  82. }
  83. };
  84. template <
  85. typename Key,
  86. typename Value,
  87. typename IndexType,
  88. template <typename> class Atom = std::atomic,
  89. typename Allocator = std::allocator<char>>
  90. using UIM = AtomicUnorderedInsertMap<
  91. Key,
  92. Value,
  93. std::hash<Key>,
  94. std::equal_to<Key>,
  95. (boost::has_trivial_destructor<Key>::value &&
  96. boost::has_trivial_destructor<Value>::value),
  97. Atom,
  98. IndexType,
  99. Allocator>;
  100. namespace {
  101. template <typename T>
  102. struct AtomicUnorderedInsertMapTest : public ::testing::Test {};
  103. } // namespace
  104. // uint16_t doesn't make sense for most platforms, but we might as well
  105. // test it
  106. using IndexTypesToTest = ::testing::Types<uint16_t, uint32_t, uint64_t>;
  107. TYPED_TEST_CASE(AtomicUnorderedInsertMapTest, IndexTypesToTest);
  108. TYPED_TEST(AtomicUnorderedInsertMapTest, basic) {
  109. UIM<std::string,
  110. std::string,
  111. TypeParam,
  112. std::atomic,
  113. folly::detail::MMapAlloc>
  114. m(100);
  115. m.emplace("abc", "ABC");
  116. EXPECT_TRUE(m.find("abc") != m.cend());
  117. EXPECT_EQ(m.find("abc")->first, "abc");
  118. EXPECT_EQ(m.find("abc")->second, "ABC");
  119. EXPECT_TRUE(m.find("def") == m.cend());
  120. auto iter = m.cbegin();
  121. EXPECT_TRUE(iter != m.cend());
  122. EXPECT_TRUE(iter == m.find("abc"));
  123. auto a = iter;
  124. EXPECT_TRUE(a == iter);
  125. auto b = iter;
  126. ++iter;
  127. EXPECT_TRUE(iter == m.cend());
  128. EXPECT_TRUE(a == b);
  129. EXPECT_TRUE(a != iter);
  130. a++;
  131. EXPECT_TRUE(a == iter);
  132. EXPECT_TRUE(a != b);
  133. }
  134. TEST(AtomicUnorderedInsertMap, load_factor) {
  135. AtomicUnorderedInsertMap<int, bool> m(5000, 0.5f);
  136. // we should be able to put in much more than 5000 things because of
  137. // our load factor request
  138. for (int i = 0; i < 10000; ++i) {
  139. m.emplace(i, true);
  140. }
  141. }
  142. TEST(AtomicUnorderedInsertMap, capacity_exceeded) {
  143. AtomicUnorderedInsertMap<int, bool> m(5000, 1.0f);
  144. EXPECT_THROW(
  145. {
  146. for (int i = 0; i < 6000; ++i) {
  147. m.emplace(i, false);
  148. }
  149. },
  150. std::bad_alloc);
  151. }
  152. TYPED_TEST(AtomicUnorderedInsertMapTest, value_mutation) {
  153. UIM<int, MutableAtom<int>, TypeParam> m(100);
  154. for (int i = 0; i < 50; ++i) {
  155. m.emplace(i, i);
  156. }
  157. m.find(1)->second.data++;
  158. }
  159. TEST(UnorderedInsertMap, value_mutation) {
  160. UIM<int, MutableData<int>, uint32_t, non_atomic> m(100);
  161. for (int i = 0; i < 50; ++i) {
  162. m.emplace(i, i);
  163. }
  164. m.find(1)->second.data++;
  165. EXPECT_EQ(m.find(1)->second.data, 2);
  166. }
  167. // This test is too expensive to run automatically. On my dev server it
  168. // takes about 10 minutes for dbg build, 2 for opt.
  169. TEST(AtomicUnorderedInsertMap, DISABLED_mega_map) {
  170. size_t capacity = 2000000000;
  171. AtomicUnorderedInsertMap64<size_t, size_t> big(capacity);
  172. for (size_t i = 0; i < capacity * 2; i += 2) {
  173. big.emplace(i, i * 10);
  174. }
  175. for (size_t i = 0; i < capacity * 3; i += capacity / 1000 + 1) {
  176. auto iter = big.find(i);
  177. if ((i & 1) == 0 && i < capacity * 2) {
  178. EXPECT_EQ(iter->second, i * 10);
  179. } else {
  180. EXPECT_TRUE(iter == big.cend());
  181. }
  182. }
  183. }
  184. BENCHMARK(lookup_int_int_hit, iters) {
  185. std::unique_ptr<AtomicUnorderedInsertMap<int, size_t>> ptr = {};
  186. size_t capacity = 100000;
  187. BENCHMARK_SUSPEND {
  188. ptr = std::make_unique<AtomicUnorderedInsertMap<int, size_t>>(capacity);
  189. for (size_t i = 0; i < capacity; ++i) {
  190. auto k = 3 * ((5641 * i) % capacity);
  191. ptr->emplace(k, k + 1);
  192. EXPECT_EQ(ptr->find(k)->second, k + 1);
  193. }
  194. }
  195. for (size_t i = 0; i < iters; ++i) {
  196. size_t k = 3 * (((i * 7919) ^ (i * 4001)) % capacity);
  197. auto iter = ptr->find(k);
  198. if (iter == ptr->cend() || iter->second != k + 1) {
  199. auto jter = ptr->find(k);
  200. EXPECT_TRUE(iter == jter);
  201. }
  202. EXPECT_EQ(iter->second, k + 1);
  203. }
  204. BENCHMARK_SUSPEND {
  205. ptr.reset(nullptr);
  206. }
  207. }
  208. struct PairHash {
  209. size_t operator()(const std::pair<uint64_t, uint64_t>& pr) const {
  210. return pr.first ^ pr.second;
  211. }
  212. };
  213. void contendedRW(
  214. size_t itersPerThread,
  215. size_t capacity,
  216. size_t numThreads,
  217. size_t readsPerWrite) {
  218. typedef std::pair<uint64_t, uint64_t> Key;
  219. typedef AtomicUnorderedInsertMap<Key, MutableAtom<uint32_t>, PairHash> Map;
  220. std::unique_ptr<Map> ptr = {};
  221. std::atomic<bool> go{false};
  222. std::vector<std::thread> threads;
  223. BENCHMARK_SUSPEND {
  224. ptr = std::make_unique<Map>(capacity);
  225. while (threads.size() < numThreads) {
  226. threads.emplace_back([&]() {
  227. while (!go) {
  228. std::this_thread::yield();
  229. }
  230. size_t reads = 0;
  231. size_t writes = 0;
  232. while (reads + writes < itersPerThread) {
  233. auto r = Random::rand32();
  234. Key key(reads + writes, r);
  235. if (reads < writes * readsPerWrite ||
  236. writes >= capacity / numThreads) {
  237. // read needed
  238. ++reads;
  239. auto iter = ptr->find(key);
  240. EXPECT_TRUE(
  241. iter == ptr->cend() ||
  242. iter->second.data.load(std::memory_order_acquire) >= key.first);
  243. } else {
  244. ++writes;
  245. try {
  246. auto pr = ptr->emplace(key, key.first);
  247. if (!pr.second) {
  248. pr.first->second.data++;
  249. }
  250. } catch (std::bad_alloc&) {
  251. LOG(INFO) << "bad alloc";
  252. }
  253. }
  254. }
  255. });
  256. }
  257. }
  258. go = true;
  259. for (auto& thr : threads) {
  260. thr.join();
  261. }
  262. BENCHMARK_SUSPEND {
  263. ptr.reset(nullptr);
  264. }
  265. }
  266. // clang-format off
  267. // sudo nice -n -20 ~/fbcode/_bin/common/concurrency/experimental/atomic_unordered_map --benchmark --bm_min_iters=1000000
  268. //
  269. // without MAP_HUGETLB (default)
  270. //
  271. // ============================================================================
  272. // common/concurrency/experimental/AtomicUnorderedMapTest.cpprelative time/iter
  273. // iters/s
  274. // ============================================================================
  275. // lookup_int_int_hit 20.05ns 49.89M
  276. // contendedRW(small_32thr_99pct) 70.36ns 14.21M
  277. // contendedRW(large_32thr_99pct) 164.23ns 6.09M
  278. // contendedRW(large_32thr_99_9pct) 158.81ns 6.30M
  279. // ============================================================================
  280. //
  281. // with MAP_HUGETLB hacked in
  282. // ============================================================================
  283. // lookup_int_int_hit 19.67ns 50.84M
  284. // contendedRW(small_32thr_99pct) 62.46ns 16.01M
  285. // contendedRW(large_32thr_99pct) 119.41ns 8.37M
  286. // contendedRW(large_32thr_99_9pct) 111.23ns 8.99M
  287. // ============================================================================
  288. // clang-format on
  289. BENCHMARK_NAMED_PARAM(contendedRW, small_32thr_99pct, 100000, 32, 99)
  290. BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99pct, 100000000, 32, 99)
  291. BENCHMARK_NAMED_PARAM(contendedRW, large_32thr_99_9pct, 100000000, 32, 999)
  292. BENCHMARK_DRAW_LINE();
  293. // clang-format off
  294. // sudo nice -n -20 ~/fbcode/_build/opt/site_integrity/quasar/experimental/atomic_unordered_map_test --benchmark --bm_min_iters=10000
  295. // Single threaded benchmarks to test how much better we are than
  296. // std::unordered_map and what is the cost of using atomic operations
  297. // in the uncontended use case
  298. // ============================================================================
  299. // std_map 1.20ms 832.58
  300. // atomic_fast_map 511.35us 1.96K
  301. // fast_map 196.28us 5.09K
  302. // ============================================================================
  303. // clang-format on
  304. BENCHMARK(std_map) {
  305. std::unordered_map<long, long> m;
  306. m.reserve(10000);
  307. for (int i = 0; i < 10000; ++i) {
  308. m.emplace(i, i);
  309. }
  310. for (int i = 0; i < 10000; ++i) {
  311. auto a = m.find(i);
  312. folly::doNotOptimizeAway(&*a);
  313. }
  314. }
  315. BENCHMARK(atomic_fast_map) {
  316. UIM<long, long, uint32_t, std::atomic> m(10000);
  317. for (int i = 0; i < 10000; ++i) {
  318. m.emplace(i, i);
  319. }
  320. for (int i = 0; i < 10000; ++i) {
  321. auto a = m.find(i);
  322. folly::doNotOptimizeAway(&*a);
  323. }
  324. }
  325. BENCHMARK(fast_map) {
  326. UIM<long, long, uint32_t, non_atomic> m(10000);
  327. for (int i = 0; i < 10000; ++i) {
  328. m.emplace(i, i);
  329. }
  330. for (int i = 0; i < 10000; ++i) {
  331. auto a = m.find(i);
  332. folly::doNotOptimizeAway(&*a);
  333. }
  334. }
  335. BENCHMARK(atomic_fast_map_64) {
  336. UIM<long, long, uint64_t, std::atomic> m(10000);
  337. for (int i = 0; i < 10000; ++i) {
  338. m.emplace(i, i);
  339. }
  340. for (int i = 0; i < 10000; ++i) {
  341. auto a = m.find(i);
  342. folly::doNotOptimizeAway(&*a);
  343. }
  344. }
  345. BENCHMARK(fast_map_64) {
  346. UIM<long, long, uint64_t, non_atomic> m(10000);
  347. for (int i = 0; i < 10000; ++i) {
  348. m.emplace(i, i);
  349. }
  350. for (int i = 0; i < 10000; ++i) {
  351. auto a = m.find(i);
  352. folly::doNotOptimizeAway(&*a);
  353. }
  354. }
  355. int main(int argc, char** argv) {
  356. testing::InitGoogleTest(&argc, argv);
  357. gflags::ParseCommandLineFlags(&argc, &argv, true);
  358. int rv = RUN_ALL_TESTS();
  359. folly::runBenchmarksOnFlag();
  360. return rv;
  361. }