ThreadLocal.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. /*
  2. * Copyright 2011-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * Improved thread local storage for non-trivial types (similar speed as
  18. * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
  19. * than boost::thread_specific_ptr).
  20. *
  21. * Also includes an accessor interface to walk all the thread local child
  22. * objects of a parent. accessAllThreads() initializes an accessor which holds
  23. * a global lock *that blocks all creation and destruction of ThreadLocal
  24. * objects with the same Tag* and can be used as an iterable container.
  25. * accessAllThreads() can race with destruction of thread-local elements. We
  26. * provide a strict mode which is dangerous because it requires the access lock
  27. * to be held while destroying thread-local elements which could cause
  28. * deadlocks. We gate this mode behind the AccessModeStrict template parameter.
  29. *
  30. * Intended use is for frequent write, infrequent read data access patterns such
  31. * as counters.
  32. *
  33. * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr
  34. * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
  35. * wrapper around ThreadLocalPtr that manages allocation automatically.
  36. *
  37. * @author Spencer Ahrens (sahrens)
  38. */
  39. #pragma once
  40. #include <iterator>
  41. #include <type_traits>
  42. #include <utility>
  43. #include <folly/Likely.h>
  44. #include <folly/Portability.h>
  45. #include <folly/ScopeGuard.h>
  46. #include <folly/SharedMutex.h>
  47. #include <folly/detail/ThreadLocalDetail.h>
  48. namespace folly {
  49. template <class T, class Tag, class AccessMode>
  50. class ThreadLocalPtr;
  51. template <class T, class Tag = void, class AccessMode = void>
  52. class ThreadLocal {
  53. public:
  54. constexpr ThreadLocal() : constructor_([]() { return new T(); }) {}
  55. template <
  56. typename F,
  57. _t<std::enable_if<is_invocable_r<T*, F>::value, int>> = 0>
  58. explicit ThreadLocal(F&& constructor)
  59. : constructor_(std::forward<F>(constructor)) {}
  60. FOLLY_ALWAYS_INLINE FOLLY_ATTR_VISIBILITY_HIDDEN T* get() const {
  61. auto const ptr = tlp_.get();
  62. return FOLLY_LIKELY(!!ptr) ? ptr : makeTlp();
  63. }
  64. T* operator->() const {
  65. return get();
  66. }
  67. T& operator*() const {
  68. return *get();
  69. }
  70. void reset(T* newPtr = nullptr) {
  71. tlp_.reset(newPtr);
  72. }
  73. typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor;
  74. Accessor accessAllThreads() const {
  75. return tlp_.accessAllThreads();
  76. }
  77. // movable
  78. ThreadLocal(ThreadLocal&&) = default;
  79. ThreadLocal& operator=(ThreadLocal&&) = default;
  80. private:
  81. // non-copyable
  82. ThreadLocal(const ThreadLocal&) = delete;
  83. ThreadLocal& operator=(const ThreadLocal&) = delete;
  84. FOLLY_NOINLINE T* makeTlp() const {
  85. auto const ptr = constructor_();
  86. tlp_.reset(ptr);
  87. return ptr;
  88. }
  89. mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_;
  90. std::function<T*()> constructor_;
  91. };
  92. /*
  93. * The idea here is that __thread is faster than pthread_getspecific, so we
  94. * keep a __thread array of pointers to objects (ThreadEntry::elements) where
  95. * each array has an index for each unique instance of the ThreadLocalPtr
  96. * object. Each ThreadLocalPtr object has a unique id that is an index into
  97. * these arrays so we can fetch the correct object from thread local storage
  98. * very efficiently.
  99. *
  100. * In order to prevent unbounded growth of the id space and thus huge
  101. * ThreadEntry::elements, arrays, for example due to continuous creation and
  102. * destruction of ThreadLocalPtr objects, we keep a set of all active
  103. * instances. When an instance is destroyed we remove it from the active
  104. * set and insert the id into freeIds_ for reuse. These operations require a
  105. * global mutex, but only happen at construction and destruction time.
  106. *
  107. * We use a single global pthread_key_t per Tag to manage object destruction and
  108. * memory cleanup upon thread exit because there is a finite number of
  109. * pthread_key_t's available per machine.
  110. *
  111. * NOTE: Apple platforms don't support the same semantics for __thread that
  112. * Linux does (and it's only supported at all on i386). For these, use
  113. * pthread_setspecific()/pthread_getspecific() for the per-thread
  114. * storage. Windows (MSVC and GCC) does support the same semantics
  115. * with __declspec(thread)
  116. */
  117. template <class T, class Tag = void, class AccessMode = void>
  118. class ThreadLocalPtr {
  119. private:
  120. typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta;
  121. public:
  122. constexpr ThreadLocalPtr() : id_() {}
  123. ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(std::move(other.id_)) {}
  124. ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
  125. assert(this != &other);
  126. destroy();
  127. id_ = std::move(other.id_);
  128. return *this;
  129. }
  130. ~ThreadLocalPtr() {
  131. destroy();
  132. }
  133. T* get() const {
  134. threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
  135. return static_cast<T*>(w.ptr);
  136. }
  137. T* operator->() const {
  138. return get();
  139. }
  140. T& operator*() const {
  141. return *get();
  142. }
  143. T* release() {
  144. threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
  145. return static_cast<T*>(w.release());
  146. }
  147. void reset(T* newPtr = nullptr) {
  148. auto guard = makeGuard([&] { delete newPtr; });
  149. threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
  150. w->dispose(TLPDestructionMode::THIS_THREAD);
  151. // need to get a new ptr since the
  152. // ThreadEntry::elements array can be reallocated
  153. w = &StaticMeta::get(&id_);
  154. w->cleanup();
  155. guard.dismiss();
  156. w->set(newPtr);
  157. }
  158. explicit operator bool() const {
  159. return get() != nullptr;
  160. }
  161. /**
  162. * reset() that transfers ownership from a smart pointer
  163. */
  164. template <
  165. typename SourceT,
  166. typename Deleter,
  167. typename = typename std::enable_if<
  168. std::is_convertible<SourceT*, T*>::value>::type>
  169. void reset(std::unique_ptr<SourceT, Deleter> source) {
  170. auto deleter = [delegate = source.get_deleter()](
  171. T* ptr, TLPDestructionMode) { delegate(ptr); };
  172. reset(source.release(), deleter);
  173. }
  174. /**
  175. * reset() that transfers ownership from a smart pointer with the default
  176. * deleter
  177. */
  178. template <
  179. typename SourceT,
  180. typename = typename std::enable_if<
  181. std::is_convertible<SourceT*, T*>::value>::type>
  182. void reset(std::unique_ptr<SourceT> source) {
  183. reset(source.release());
  184. }
  185. /**
  186. * reset() with a custom deleter:
  187. * deleter(T* ptr, TLPDestructionMode mode)
  188. * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
  189. * deleting pointers for all threads), and THIS_THREAD if we're only deleting
  190. * the member for one thread (because of thread exit or reset()).
  191. * Invoking the deleter must not throw.
  192. */
  193. template <class Deleter>
  194. void reset(T* newPtr, const Deleter& deleter) {
  195. auto guard = makeGuard([&] {
  196. if (newPtr) {
  197. deleter(newPtr, TLPDestructionMode::THIS_THREAD);
  198. }
  199. });
  200. threadlocal_detail::ElementWrapper* w = &StaticMeta::get(&id_);
  201. w->dispose(TLPDestructionMode::THIS_THREAD);
  202. // need to get a new ptr since the
  203. // ThreadEntry::elements array can be reallocated
  204. w = &StaticMeta::get(&id_);
  205. w->cleanup();
  206. guard.dismiss();
  207. w->set(newPtr, deleter);
  208. }
  209. // Holds a global lock for iteration through all thread local child objects.
  210. // Can be used as an iterable container.
  211. // Use accessAllThreads() to obtain one.
  212. class Accessor {
  213. friend class ThreadLocalPtr<T, Tag, AccessMode>;
  214. threadlocal_detail::StaticMetaBase& meta_;
  215. SharedMutex* accessAllThreadsLock_;
  216. std::mutex* lock_;
  217. uint32_t id_;
  218. public:
  219. class Iterator;
  220. friend class Iterator;
  221. // The iterators obtained from Accessor are bidirectional iterators.
  222. class Iterator {
  223. friend class Accessor;
  224. const Accessor* accessor_;
  225. threadlocal_detail::ThreadEntryNode* e_;
  226. void increment() {
  227. e_ = e_->getNext();
  228. incrementToValid();
  229. }
  230. void decrement() {
  231. e_ = e_->getPrev();
  232. decrementToValid();
  233. }
  234. const T& dereference() const {
  235. return *static_cast<T*>(
  236. e_->getThreadEntry()->elements[accessor_->id_].ptr);
  237. }
  238. T& dereference() {
  239. return *static_cast<T*>(
  240. e_->getThreadEntry()->elements[accessor_->id_].ptr);
  241. }
  242. bool equal(const Iterator& other) const {
  243. return (accessor_->id_ == other.accessor_->id_ && e_ == other.e_);
  244. }
  245. explicit Iterator(const Accessor* accessor)
  246. : accessor_(accessor),
  247. e_(&accessor_->meta_.head_.elements[accessor_->id_].node) {}
  248. // we just need to check the ptr since it can be set to nullptr
  249. // even if the entry is part of the list
  250. bool valid() const {
  251. return (e_->getThreadEntry()->elements[accessor_->id_].ptr);
  252. }
  253. void incrementToValid() {
  254. for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
  255. !valid();
  256. e_ = e_->getNext()) {
  257. }
  258. }
  259. void decrementToValid() {
  260. for (; e_ != &accessor_->meta_.head_.elements[accessor_->id_].node &&
  261. !valid();
  262. e_ = e_->getPrev()) {
  263. }
  264. }
  265. public:
  266. using difference_type = ssize_t;
  267. using value_type = T;
  268. using reference = T const&;
  269. using pointer = T const*;
  270. using iterator_category = std::bidirectional_iterator_tag;
  271. Iterator& operator++() {
  272. increment();
  273. return *this;
  274. }
  275. Iterator& operator++(int) {
  276. Iterator copy(*this);
  277. increment();
  278. return copy;
  279. }
  280. Iterator& operator--() {
  281. decrement();
  282. return *this;
  283. }
  284. Iterator& operator--(int) {
  285. Iterator copy(*this);
  286. decrement();
  287. return copy;
  288. }
  289. T& operator*() {
  290. return dereference();
  291. }
  292. T const& operator*() const {
  293. return dereference();
  294. }
  295. T* operator->() {
  296. return &dereference();
  297. }
  298. T const* operator->() const {
  299. return &dereference();
  300. }
  301. bool operator==(Iterator const& rhs) const {
  302. return equal(rhs);
  303. }
  304. bool operator!=(Iterator const& rhs) const {
  305. return !equal(rhs);
  306. }
  307. };
  308. ~Accessor() {
  309. release();
  310. }
  311. Iterator begin() const {
  312. return ++Iterator(this);
  313. }
  314. Iterator end() const {
  315. return Iterator(this);
  316. }
  317. Accessor(const Accessor&) = delete;
  318. Accessor& operator=(const Accessor&) = delete;
  319. Accessor(Accessor&& other) noexcept
  320. : meta_(other.meta_),
  321. accessAllThreadsLock_(other.accessAllThreadsLock_),
  322. lock_(other.lock_),
  323. id_(other.id_) {
  324. other.id_ = 0;
  325. other.accessAllThreadsLock_ = nullptr;
  326. other.lock_ = nullptr;
  327. }
  328. Accessor& operator=(Accessor&& other) noexcept {
  329. // Each Tag has its own unique meta, and accessors with different Tags
  330. // have different types. So either *this is empty, or this and other
  331. // have the same tag. But if they have the same tag, they have the same
  332. // meta (and lock), so they'd both hold the lock at the same time,
  333. // which is impossible, which leaves only one possible scenario --
  334. // *this is empty. Assert it.
  335. assert(&meta_ == &other.meta_);
  336. assert(lock_ == nullptr);
  337. using std::swap;
  338. swap(accessAllThreadsLock_, other.accessAllThreadsLock_);
  339. swap(lock_, other.lock_);
  340. swap(id_, other.id_);
  341. }
  342. Accessor()
  343. : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
  344. accessAllThreadsLock_(nullptr),
  345. lock_(nullptr),
  346. id_(0) {}
  347. private:
  348. explicit Accessor(uint32_t id)
  349. : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
  350. accessAllThreadsLock_(&meta_.accessAllThreadsLock_),
  351. lock_(&meta_.lock_) {
  352. accessAllThreadsLock_->lock();
  353. lock_->lock();
  354. id_ = id;
  355. }
  356. void release() {
  357. if (lock_) {
  358. lock_->unlock();
  359. DCHECK(accessAllThreadsLock_ != nullptr);
  360. accessAllThreadsLock_->unlock();
  361. id_ = 0;
  362. lock_ = nullptr;
  363. accessAllThreadsLock_ = nullptr;
  364. }
  365. }
  366. };
  367. // accessor allows a client to iterate through all thread local child
  368. // elements of this ThreadLocal instance. Holds a global lock for each <Tag>
  369. Accessor accessAllThreads() const {
  370. static_assert(
  371. !std::is_same<Tag, void>::value,
  372. "Must use a unique Tag to use the accessAllThreads feature");
  373. return Accessor(id_.getOrAllocate(StaticMeta::instance()));
  374. }
  375. private:
  376. void destroy() {
  377. StaticMeta::instance().destroy(&id_);
  378. }
  379. // non-copyable
  380. ThreadLocalPtr(const ThreadLocalPtr&) = delete;
  381. ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
  382. mutable typename StaticMeta::EntryID id_;
  383. };
  384. namespace threadlocal_detail {
  385. template <typename>
  386. struct static_meta_of;
  387. template <typename T, typename Tag, typename AccessMode>
  388. struct static_meta_of<ThreadLocalPtr<T, Tag, AccessMode>> {
  389. using type = StaticMeta<Tag, AccessMode>;
  390. };
  391. template <typename T, typename Tag, typename AccessMode>
  392. struct static_meta_of<ThreadLocal<T, Tag, AccessMode>> {
  393. using type = StaticMeta<Tag, AccessMode>;
  394. };
  395. } // namespace threadlocal_detail
  396. } // namespace folly