AtomicUnorderedMap.h 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <atomic>
  18. #include <cstdint>
  19. #include <functional>
  20. #include <limits>
  21. #include <stdexcept>
  22. #include <system_error>
  23. #include <type_traits>
  24. #include <boost/type_traits/has_trivial_destructor.hpp>
  25. #include <folly/Conv.h>
  26. #include <folly/Likely.h>
  27. #include <folly/Random.h>
  28. #include <folly/detail/AtomicUnorderedMapUtils.h>
  29. #include <folly/lang/Bits.h>
  30. #include <folly/portability/SysMman.h>
  31. #include <folly/portability/Unistd.h>
  32. namespace folly {
  33. /// You're probably reading this because you are looking for an
  34. /// AtomicUnorderedMap<K,V> that is fully general, highly concurrent (for
  35. /// reads, writes, and iteration), and makes no performance compromises.
  36. /// We haven't figured that one out yet. What you will find here is a
  37. /// hash table implementation that sacrifices generality so that it can
  38. /// give you all of the other things.
  39. ///
  40. /// LIMITATIONS:
  41. ///
  42. /// * Insert only (*) - the only write operation supported directly by
  43. /// AtomicUnorderedInsertMap is findOrConstruct. There is a (*) because
  44. /// values aren't moved, so you can roll your own concurrency control for
  45. /// in-place updates of values (see MutableData and MutableAtom below),
  46. /// but the hash table itself doesn't help you.
  47. ///
  48. /// * No resizing - you must specify the capacity up front, and once
  49. /// the hash map gets full you won't be able to insert. Insert
  50. /// performance will degrade once the load factor is high. Insert is
  51. /// O(1/(1-actual_load_factor)). Note that this is a pretty strong
  52. /// limitation, because you can't remove existing keys.
  53. ///
  54. /// * 2^30 maximum default capacity - by default AtomicUnorderedInsertMap
  55. /// uses uint32_t internal indexes (and steals 2 bits), limiting you
  56. /// to about a billion entries. If you need more you can fill in all
  57. /// of the template params so you change IndexType to uint64_t, or you
  58. /// can use AtomicUnorderedInsertMap64. 64-bit indexes will increase
  59. /// the space over of the map, of course.
  60. ///
  61. /// WHAT YOU GET IN EXCHANGE:
  62. ///
  63. /// * Arbitrary key and value types - any K and V that can be used in a
  64. /// std::unordered_map can be used here. In fact, the key and value
  65. /// types don't even have to be copyable or moveable!
  66. ///
  67. /// * Keys and values in the map won't be moved - it is safe to keep
  68. /// pointers or references to the keys and values in the map, because
  69. /// they are never moved or destroyed (until the map itself is destroyed).
  70. ///
  71. /// * Iterators are never invalidated - writes don't invalidate iterators,
  72. /// so you can scan and insert in parallel.
  73. ///
  74. /// * Fast wait-free reads - reads are usually only a single cache miss,
  75. /// even when the hash table is very large. Wait-freedom means that
  76. /// you won't see latency outliers even in the face of concurrent writes.
  77. ///
  78. /// * Lock-free insert - writes proceed in parallel. If a thread in the
  79. /// middle of a write is unlucky and gets suspended, it doesn't block
  80. /// anybody else.
  81. ///
  82. /// COMMENTS ON INSERT-ONLY
  83. ///
  84. /// This map provides wait-free linearizable reads and lock-free
  85. /// linearizable inserts. Inserted values won't be moved, but no
  86. /// concurrency control is provided for safely updating them. To remind
  87. /// you of that fact they are only provided in const form. This is the
  88. /// only simple safe thing to do while preserving something like the normal
  89. /// std::map iteration form, which requires that iteration be exposed
  90. /// via std::pair (and prevents encapsulation of access to the value).
  91. ///
  92. /// There are a couple of reasonable policies for doing in-place
  93. /// concurrency control on the values. I am hoping that the policy can
  94. /// be injected via the value type or an extra template param, to keep
  95. /// the core AtomicUnorderedInsertMap insert-only:
  96. ///
  97. /// CONST: this is the currently implemented strategy, which is simple,
  98. /// performant, and not that expressive. You can always put in a value
  99. /// with a mutable field (see MutableAtom below), but that doesn't look
  100. /// as pretty as it should.
  101. ///
  102. /// ATOMIC: for integers and integer-size trivially copyable structs
  103. /// (via an adapter like tao/queues/AtomicStruct) the value can be a
  104. /// std::atomic and read and written atomically.
  105. ///
  106. /// SEQ-LOCK: attach a counter incremented before and after write.
  107. /// Writers serialize by using CAS to make an even->odd transition,
  108. /// then odd->even after the write. Readers grab the value with memcpy,
  109. /// checking sequence value before and after. Readers retry until they
  110. /// see an even sequence number that doesn't change. This works for
  111. /// larger structs, but still requires memcpy to be equivalent to copy
  112. /// assignment, and it is no longer lock-free. It scales very well,
  113. /// because the readers are still invisible (no cache line writes).
  114. ///
  115. /// LOCK: folly's SharedMutex would be a good choice here.
  116. ///
  117. /// MEMORY ALLOCATION
  118. ///
  119. /// Underlying memory is allocated as a big anonymous mmap chunk, which
  120. /// might be cheaper than calloc() and is certainly not more expensive
  121. /// for large maps. If the SkipKeyValueDeletion template param is true
  122. /// then deletion of the map consists of unmapping the backing memory,
  123. /// which is much faster than destructing all of the keys and values.
  124. /// Feel free to override if std::is_trivial_destructor isn't recognizing
  125. /// the triviality of your destructors.
  126. template <
  127. typename Key,
  128. typename Value,
  129. typename Hash = std::hash<Key>,
  130. typename KeyEqual = std::equal_to<Key>,
  131. bool SkipKeyValueDeletion =
  132. (boost::has_trivial_destructor<Key>::value &&
  133. boost::has_trivial_destructor<Value>::value),
  134. template <typename> class Atom = std::atomic,
  135. typename IndexType = uint32_t,
  136. typename Allocator = folly::detail::MMapAlloc>
  137. struct AtomicUnorderedInsertMap {
  138. typedef Key key_type;
  139. typedef Value mapped_type;
  140. typedef std::pair<Key, Value> value_type;
  141. typedef std::size_t size_type;
  142. typedef std::ptrdiff_t difference_type;
  143. typedef Hash hasher;
  144. typedef KeyEqual key_equal;
  145. typedef const value_type& const_reference;
  146. typedef struct ConstIterator {
  147. ConstIterator(const AtomicUnorderedInsertMap& owner, IndexType slot)
  148. : owner_(owner), slot_(slot) {}
  149. ConstIterator(const ConstIterator&) = default;
  150. ConstIterator& operator=(const ConstIterator&) = default;
  151. const value_type& operator*() const {
  152. return owner_.slots_[slot_].keyValue();
  153. }
  154. const value_type* operator->() const {
  155. return &owner_.slots_[slot_].keyValue();
  156. }
  157. // pre-increment
  158. const ConstIterator& operator++() {
  159. while (slot_ > 0) {
  160. --slot_;
  161. if (owner_.slots_[slot_].state() == LINKED) {
  162. break;
  163. }
  164. }
  165. return *this;
  166. }
  167. // post-increment
  168. ConstIterator operator++(int /* dummy */) {
  169. auto prev = *this;
  170. ++*this;
  171. return prev;
  172. }
  173. bool operator==(const ConstIterator& rhs) const {
  174. return slot_ == rhs.slot_;
  175. }
  176. bool operator!=(const ConstIterator& rhs) const {
  177. return !(*this == rhs);
  178. }
  179. private:
  180. const AtomicUnorderedInsertMap& owner_;
  181. IndexType slot_;
  182. } const_iterator;
  183. friend ConstIterator;
  184. /// Constructs a map that will support the insertion of maxSize key-value
  185. /// pairs without exceeding the max load factor. Load factors of greater
  186. /// than 1 are not supported, and once the actual load factor of the
  187. /// map approaches 1 the insert performance will suffer. The capacity
  188. /// is limited to 2^30 (about a billion) for the default IndexType,
  189. /// beyond which we will throw invalid_argument.
  190. explicit AtomicUnorderedInsertMap(
  191. size_t maxSize,
  192. float maxLoadFactor = 0.8f,
  193. const Allocator& alloc = Allocator())
  194. : allocator_(alloc) {
  195. size_t capacity = size_t(maxSize / std::min(1.0f, maxLoadFactor) + 128);
  196. size_t avail = size_t{1} << (8 * sizeof(IndexType) - 2);
  197. if (capacity > avail && maxSize < avail) {
  198. // we'll do our best
  199. capacity = avail;
  200. }
  201. if (capacity < maxSize || capacity > avail) {
  202. throw std::invalid_argument(
  203. "AtomicUnorderedInsertMap capacity must fit in IndexType with 2 bits "
  204. "left over");
  205. }
  206. numSlots_ = capacity;
  207. slotMask_ = folly::nextPowTwo(capacity * 4) - 1;
  208. mmapRequested_ = sizeof(Slot) * capacity;
  209. slots_ = reinterpret_cast<Slot*>(allocator_.allocate(mmapRequested_));
  210. zeroFillSlots();
  211. // mark the zero-th slot as in-use but not valid, since that happens
  212. // to be our nil value
  213. slots_[0].stateUpdate(EMPTY, CONSTRUCTING);
  214. }
  215. ~AtomicUnorderedInsertMap() {
  216. if (!SkipKeyValueDeletion) {
  217. for (size_t i = 1; i < numSlots_; ++i) {
  218. slots_[i].~Slot();
  219. }
  220. }
  221. allocator_.deallocate(reinterpret_cast<char*>(slots_), mmapRequested_);
  222. }
  223. /// Searches for the key, returning (iter,false) if it is found.
  224. /// If it is not found calls the functor Func with a void* argument
  225. /// that is raw storage suitable for placement construction of a Value
  226. /// (see raw_value_type), then returns (iter,true). May call Func and
  227. /// then return (iter,false) if there are other concurrent writes, in
  228. /// which case the newly constructed value will be immediately destroyed.
  229. ///
  230. /// This function does not block other readers or writers. If there
  231. /// are other concurrent writes, many parallel calls to func may happen
  232. /// and only the first one to complete will win. The values constructed
  233. /// by the other calls to func will be destroyed.
  234. ///
  235. /// Usage:
  236. ///
  237. /// AtomicUnorderedInsertMap<std::string,std::string> memo;
  238. ///
  239. /// auto value = memo.findOrConstruct(key, [=](void* raw) {
  240. /// new (raw) std::string(computation(key));
  241. /// })->first;
  242. template <typename Func>
  243. std::pair<const_iterator, bool> findOrConstruct(const Key& key, Func&& func) {
  244. auto const slot = keyToSlotIdx(key);
  245. auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
  246. auto existing = find(key, slot);
  247. if (existing != 0) {
  248. return std::make_pair(ConstIterator(*this, existing), false);
  249. }
  250. auto idx = allocateNear(slot);
  251. new (&slots_[idx].keyValue().first) Key(key);
  252. func(static_cast<void*>(&slots_[idx].keyValue().second));
  253. while (true) {
  254. slots_[idx].next_ = prev >> 2;
  255. // we can merge the head update and the CONSTRUCTING -> LINKED update
  256. // into a single CAS if slot == idx (which should happen often)
  257. auto after = idx << 2;
  258. if (slot == idx) {
  259. after += LINKED;
  260. } else {
  261. after += (prev & 3);
  262. }
  263. if (slots_[slot].headAndState_.compare_exchange_strong(prev, after)) {
  264. // success
  265. if (idx != slot) {
  266. slots_[idx].stateUpdate(CONSTRUCTING, LINKED);
  267. }
  268. return std::make_pair(ConstIterator(*this, idx), true);
  269. }
  270. // compare_exchange_strong updates its first arg on failure, so
  271. // there is no need to reread prev
  272. existing = find(key, slot);
  273. if (existing != 0) {
  274. // our allocated key and value are no longer needed
  275. slots_[idx].keyValue().first.~Key();
  276. slots_[idx].keyValue().second.~Value();
  277. slots_[idx].stateUpdate(CONSTRUCTING, EMPTY);
  278. return std::make_pair(ConstIterator(*this, existing), false);
  279. }
  280. }
  281. }
  282. /// This isn't really emplace, but it is what we need to test.
  283. /// Eventually we can duplicate all of the std::pair constructor
  284. /// forms, including a recursive tuple forwarding template
  285. /// http://functionalcpp.wordpress.com/2013/08/28/tuple-forwarding/).
  286. template <class K, class V>
  287. std::pair<const_iterator, bool> emplace(const K& key, V&& value) {
  288. return findOrConstruct(
  289. key, [&](void* raw) { new (raw) Value(std::forward<V>(value)); });
  290. }
  291. const_iterator find(const Key& key) const {
  292. return ConstIterator(*this, find(key, keyToSlotIdx(key)));
  293. }
  294. const_iterator cbegin() const {
  295. IndexType slot = numSlots_ - 1;
  296. while (slot > 0 && slots_[slot].state() != LINKED) {
  297. --slot;
  298. }
  299. return ConstIterator(*this, slot);
  300. }
  301. const_iterator cend() const {
  302. return ConstIterator(*this, 0);
  303. }
  304. private:
  305. enum : IndexType {
  306. kMaxAllocationTries = 1000, // after this we throw
  307. };
  308. enum BucketState : IndexType {
  309. EMPTY = 0,
  310. CONSTRUCTING = 1,
  311. LINKED = 2,
  312. };
  313. /// Lock-free insertion is easiest by prepending to collision chains.
  314. /// A large chaining hash table takes two cache misses instead of
  315. /// one, however. Our solution is to colocate the bucket storage and
  316. /// the head storage, so that even though we are traversing chains we
  317. /// are likely to stay within the same cache line. Just make sure to
  318. /// traverse head before looking at any keys. This strategy gives us
  319. /// 32 bit pointers and fast iteration.
  320. struct Slot {
  321. /// The bottom two bits are the BucketState, the rest is the index
  322. /// of the first bucket for the chain whose keys map to this slot.
  323. /// When things are going well the head usually links to this slot,
  324. /// but that doesn't always have to happen.
  325. Atom<IndexType> headAndState_;
  326. /// The next bucket in the chain
  327. IndexType next_;
  328. /// Key and Value
  329. typename std::aligned_storage<sizeof(value_type), alignof(value_type)>::type
  330. raw_;
  331. ~Slot() {
  332. auto s = state();
  333. assert(s == EMPTY || s == LINKED);
  334. if (s == LINKED) {
  335. keyValue().first.~Key();
  336. keyValue().second.~Value();
  337. }
  338. }
  339. BucketState state() const {
  340. return BucketState(headAndState_.load(std::memory_order_acquire) & 3);
  341. }
  342. void stateUpdate(BucketState before, BucketState after) {
  343. assert(state() == before);
  344. headAndState_ += (after - before);
  345. }
  346. value_type& keyValue() {
  347. assert(state() != EMPTY);
  348. return *static_cast<value_type*>(static_cast<void*>(&raw_));
  349. }
  350. const value_type& keyValue() const {
  351. assert(state() != EMPTY);
  352. return *static_cast<const value_type*>(static_cast<const void*>(&raw_));
  353. }
  354. };
  355. // We manually manage the slot memory so we can bypass initialization
  356. // (by getting a zero-filled mmap chunk) and optionally destruction of
  357. // the slots
  358. size_t mmapRequested_;
  359. size_t numSlots_;
  360. /// tricky, see keyToSlodIdx
  361. size_t slotMask_;
  362. Allocator allocator_;
  363. Slot* slots_;
  364. IndexType keyToSlotIdx(const Key& key) const {
  365. size_t h = hasher()(key);
  366. h &= slotMask_;
  367. while (h >= numSlots_) {
  368. h -= numSlots_;
  369. }
  370. return h;
  371. }
  372. IndexType find(const Key& key, IndexType slot) const {
  373. KeyEqual ke = {};
  374. auto hs = slots_[slot].headAndState_.load(std::memory_order_acquire);
  375. for (slot = hs >> 2; slot != 0; slot = slots_[slot].next_) {
  376. if (ke(key, slots_[slot].keyValue().first)) {
  377. return slot;
  378. }
  379. }
  380. return 0;
  381. }
  382. /// Allocates a slot and returns its index. Tries to put it near
  383. /// slots_[start].
  384. IndexType allocateNear(IndexType start) {
  385. for (IndexType tries = 0; tries < kMaxAllocationTries; ++tries) {
  386. auto slot = allocationAttempt(start, tries);
  387. auto prev = slots_[slot].headAndState_.load(std::memory_order_acquire);
  388. if ((prev & 3) == EMPTY &&
  389. slots_[slot].headAndState_.compare_exchange_strong(
  390. prev, prev + CONSTRUCTING - EMPTY)) {
  391. return slot;
  392. }
  393. }
  394. throw std::bad_alloc();
  395. }
  396. /// Returns the slot we should attempt to allocate after tries failed
  397. /// tries, starting from the specified slot. This is pulled out so we
  398. /// can specialize it differently during deterministic testing
  399. IndexType allocationAttempt(IndexType start, IndexType tries) const {
  400. if (LIKELY(tries < 8 && start + tries < numSlots_)) {
  401. return IndexType(start + tries);
  402. } else {
  403. IndexType rv;
  404. if (sizeof(IndexType) <= 4) {
  405. rv = IndexType(folly::Random::rand32(numSlots_));
  406. } else {
  407. rv = IndexType(folly::Random::rand64(numSlots_));
  408. }
  409. assert(rv < numSlots_);
  410. return rv;
  411. }
  412. }
  413. void zeroFillSlots() {
  414. using folly::detail::GivesZeroFilledMemory;
  415. if (!GivesZeroFilledMemory<Allocator>::value) {
  416. memset(slots_, 0, mmapRequested_);
  417. }
  418. }
  419. };
  420. /// AtomicUnorderedInsertMap64 is just a type alias that makes it easier
  421. /// to select a 64 bit slot index type. Use this if you need a capacity
  422. /// bigger than 2^30 (about a billion). This increases memory overheads,
  423. /// obviously.
  424. template <
  425. typename Key,
  426. typename Value,
  427. typename Hash = std::hash<Key>,
  428. typename KeyEqual = std::equal_to<Key>,
  429. bool SkipKeyValueDeletion =
  430. (boost::has_trivial_destructor<Key>::value &&
  431. boost::has_trivial_destructor<Value>::value),
  432. template <typename> class Atom = std::atomic,
  433. typename Allocator = folly::detail::MMapAlloc>
  434. using AtomicUnorderedInsertMap64 = AtomicUnorderedInsertMap<
  435. Key,
  436. Value,
  437. Hash,
  438. KeyEqual,
  439. SkipKeyValueDeletion,
  440. Atom,
  441. uint64_t,
  442. Allocator>;
  443. /// MutableAtom is a tiny wrapper than gives you the option of atomically
  444. /// updating values inserted into an AtomicUnorderedInsertMap<K,
  445. /// MutableAtom<V>>. This relies on AtomicUnorderedInsertMap's guarantee
  446. /// that it doesn't move values.
  447. template <typename T, template <typename> class Atom = std::atomic>
  448. struct MutableAtom {
  449. mutable Atom<T> data;
  450. explicit MutableAtom(const T& init) : data(init) {}
  451. };
  452. /// MutableData is a tiny wrapper than gives you the option of using an
  453. /// external concurrency control mechanism to updating values inserted
  454. /// into an AtomicUnorderedInsertMap.
  455. template <typename T>
  456. struct MutableData {
  457. mutable T data;
  458. explicit MutableData(const T& init) : data(init) {}
  459. };
  460. } // namespace folly