ThreadLocalDetail.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. /*
  2. * Copyright 2011-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <limits.h>
  18. #include <atomic>
  19. #include <functional>
  20. #include <mutex>
  21. #include <string>
  22. #include <vector>
  23. #include <glog/logging.h>
  24. #include <folly/Exception.h>
  25. #include <folly/Function.h>
  26. #include <folly/Portability.h>
  27. #include <folly/ScopeGuard.h>
  28. #include <folly/SharedMutex.h>
  29. #include <folly/container/Foreach.h>
  30. #include <folly/detail/AtFork.h>
  31. #include <folly/memory/Malloc.h>
  32. #include <folly/portability/PThread.h>
  33. #include <folly/synchronization/MicroSpinLock.h>
  34. #include <folly/detail/StaticSingletonManager.h>
  35. // In general, emutls cleanup is not guaranteed to play nice with the way
  36. // StaticMeta mixes direct pthread calls and the use of __thread. This has
  37. // caused problems on multiple platforms so don't use __thread there.
  38. //
  39. // XXX: Ideally we would instead determine if emutls is in use at runtime as it
  40. // is possible to configure glibc on Linux to use emutls regardless.
  41. #if !FOLLY_MOBILE && !defined(__APPLE__) && !defined(_MSC_VER)
  42. #define FOLLY_TLD_USE_FOLLY_TLS 1
  43. #else
  44. #undef FOLLY_TLD_USE_FOLLY_TLS
  45. #endif
  46. namespace folly {
  47. enum class TLPDestructionMode { THIS_THREAD, ALL_THREADS };
  48. struct AccessModeStrict {};
  49. namespace threadlocal_detail {
  50. constexpr uint32_t kEntryIDInvalid = std::numeric_limits<uint32_t>::max();
  51. struct ThreadEntry;
  52. /* This represents a node in doubly linked list where all the nodes
  53. * are part of an ElementWrapper struct that has the same id.
  54. * we cannot use prev and next as ThreadEntryNode pointers since the
  55. * ThreadEntry::elements can be reallocated and the pointers will change
  56. * in this case. So we keep a pointer to the parent ThreadEntry struct
  57. * one for the prev and next and also the id.
  58. * We will traverse and update the list only when holding the
  59. * StaticMetaBase::lock_
  60. */
  61. struct ThreadEntryNode {
  62. uint32_t id;
  63. ThreadEntry* parent;
  64. ThreadEntry* prev;
  65. ThreadEntry* next;
  66. void initIfZero(bool locked);
  67. void init(ThreadEntry* entry, uint32_t newId) {
  68. id = newId;
  69. parent = prev = next = entry;
  70. }
  71. void initZero(ThreadEntry* entry, uint32_t newId) {
  72. id = newId;
  73. parent = entry;
  74. prev = next = nullptr;
  75. }
  76. // if the list this node is part of is empty
  77. FOLLY_ALWAYS_INLINE bool empty() const {
  78. return (next == parent);
  79. }
  80. FOLLY_ALWAYS_INLINE bool zero() const {
  81. return (!prev);
  82. }
  83. FOLLY_ALWAYS_INLINE ThreadEntry* getThreadEntry() {
  84. return parent;
  85. }
  86. FOLLY_ALWAYS_INLINE ThreadEntryNode* getPrev();
  87. FOLLY_ALWAYS_INLINE ThreadEntryNode* getNext();
  88. void push_back(ThreadEntry* head);
  89. void eraseZero();
  90. };
  91. /**
  92. * POD wrapper around an element (a void*) and an associated deleter.
  93. * This must be POD, as we memset() it to 0 and memcpy() it around.
  94. */
  95. struct ElementWrapper {
  96. using DeleterFunType = void(void*, TLPDestructionMode);
  97. bool dispose(TLPDestructionMode mode) {
  98. if (ptr == nullptr) {
  99. return false;
  100. }
  101. DCHECK(deleter1 != nullptr);
  102. ownsDeleter ? (*deleter2)(ptr, mode) : (*deleter1)(ptr, mode);
  103. return true;
  104. }
  105. void* release() {
  106. auto retPtr = ptr;
  107. if (ptr != nullptr) {
  108. cleanup();
  109. }
  110. return retPtr;
  111. }
  112. template <class Ptr>
  113. void set(Ptr p) {
  114. auto guard = makeGuard([&] { delete p; });
  115. DCHECK(ptr == nullptr);
  116. DCHECK(deleter1 == nullptr);
  117. if (p) {
  118. node.initIfZero(true /*locked*/);
  119. ptr = p;
  120. deleter1 = [](void* pt, TLPDestructionMode) {
  121. delete static_cast<Ptr>(pt);
  122. };
  123. ownsDeleter = false;
  124. guard.dismiss();
  125. }
  126. }
  127. template <class Ptr, class Deleter>
  128. void set(Ptr p, const Deleter& d) {
  129. auto guard = makeGuard([&] {
  130. if (p) {
  131. d(p, TLPDestructionMode::THIS_THREAD);
  132. }
  133. });
  134. DCHECK(ptr == nullptr);
  135. DCHECK(deleter2 == nullptr);
  136. if (p) {
  137. node.initIfZero(true /*locked*/);
  138. ptr = p;
  139. auto d2 = d; // gcc-4.8 doesn't decay types correctly in lambda captures
  140. deleter2 = new std::function<DeleterFunType>(
  141. [d2](void* pt, TLPDestructionMode mode) {
  142. d2(static_cast<Ptr>(pt), mode);
  143. });
  144. ownsDeleter = true;
  145. guard.dismiss();
  146. }
  147. }
  148. void cleanup() {
  149. if (ownsDeleter) {
  150. delete deleter2;
  151. }
  152. ptr = nullptr;
  153. deleter1 = nullptr;
  154. ownsDeleter = false;
  155. }
  156. void* ptr;
  157. union {
  158. DeleterFunType* deleter1;
  159. std::function<DeleterFunType>* deleter2;
  160. };
  161. bool ownsDeleter;
  162. ThreadEntryNode node;
  163. };
  164. struct StaticMetaBase;
  165. struct ThreadEntryList;
  166. /**
  167. * Per-thread entry. Each thread using a StaticMeta object has one.
  168. * This is written from the owning thread only (under the lock), read
  169. * from the owning thread (no lock necessary), and read from other threads
  170. * (under the lock).
  171. * StaticMetaBase::head_ elementsCapacity can be read from any thread on
  172. * reallocate (no lock)
  173. */
  174. struct ThreadEntry {
  175. ElementWrapper* elements{nullptr};
  176. std::atomic<size_t> elementsCapacity{0};
  177. ThreadEntry* next{nullptr};
  178. ThreadEntry* prev{nullptr};
  179. ThreadEntryList* list{nullptr};
  180. ThreadEntry* listNext{nullptr};
  181. StaticMetaBase* meta{nullptr};
  182. bool removed_{false};
  183. size_t getElementsCapacity() const noexcept {
  184. return elementsCapacity.load(std::memory_order_relaxed);
  185. }
  186. void setElementsCapacity(size_t capacity) noexcept {
  187. elementsCapacity.store(capacity, std::memory_order_relaxed);
  188. }
  189. };
  190. struct ThreadEntryList {
  191. ThreadEntry* head{nullptr};
  192. size_t count{0};
  193. };
  194. struct PthreadKeyUnregisterTester;
  195. FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getPrev() {
  196. return &prev->elements[id].node;
  197. }
  198. FOLLY_ALWAYS_INLINE ThreadEntryNode* ThreadEntryNode::getNext() {
  199. return &next->elements[id].node;
  200. }
  201. /**
  202. * We want to disable onThreadExit call at the end of shutdown, we don't care
  203. * about leaking memory at that point.
  204. *
  205. * Otherwise if ThreadLocal is used in a shared library, onThreadExit may be
  206. * called after dlclose().
  207. *
  208. * This class has one single static instance; however since it's so widely used,
  209. * directly or indirectly, by so many classes, we need to take care to avoid
  210. * problems stemming from the Static Initialization/Destruction Order Fiascos.
  211. * Therefore this class needs to be constexpr-constructible, so as to avoid
  212. * the need for this to participate in init/destruction order.
  213. */
  214. class PthreadKeyUnregister {
  215. public:
  216. static constexpr size_t kMaxKeys = 1UL << 16;
  217. ~PthreadKeyUnregister() {
  218. // If static constructor priorities are not supported then
  219. // ~PthreadKeyUnregister logic is not safe.
  220. #if !defined(__APPLE__) && !defined(_MSC_VER)
  221. MSLGuard lg(lock_);
  222. while (size_) {
  223. pthread_key_delete(keys_[--size_]);
  224. }
  225. #endif
  226. }
  227. static void registerKey(pthread_key_t key) {
  228. instance_.registerKeyImpl(key);
  229. }
  230. private:
  231. /**
  232. * Only one global instance should exist, hence this is private.
  233. * See also the important note at the top of this class about `constexpr`
  234. * usage.
  235. */
  236. constexpr PthreadKeyUnregister() : lock_(), size_(0), keys_() {}
  237. friend struct folly::threadlocal_detail::PthreadKeyUnregisterTester;
  238. void registerKeyImpl(pthread_key_t key) {
  239. MSLGuard lg(lock_);
  240. if (size_ == kMaxKeys) {
  241. throw std::logic_error("pthread_key limit has already been reached");
  242. }
  243. keys_[size_++] = key;
  244. }
  245. MicroSpinLock lock_;
  246. size_t size_;
  247. pthread_key_t keys_[kMaxKeys];
  248. static PthreadKeyUnregister instance_;
  249. };
  250. struct StaticMetaBase {
  251. // Represents an ID of a thread local object. Initially set to the maximum
  252. // uint. This representation allows us to avoid a branch in accessing TLS data
  253. // (because if you test capacity > id if id = maxint then the test will always
  254. // fail). It allows us to keep a constexpr constructor and avoid SIOF.
  255. class EntryID {
  256. public:
  257. std::atomic<uint32_t> value;
  258. constexpr EntryID() : value(kEntryIDInvalid) {}
  259. EntryID(EntryID&& other) noexcept : value(other.value.load()) {
  260. other.value = kEntryIDInvalid;
  261. }
  262. EntryID& operator=(EntryID&& other) {
  263. assert(this != &other);
  264. value = other.value.load();
  265. other.value = kEntryIDInvalid;
  266. return *this;
  267. }
  268. EntryID(const EntryID& other) = delete;
  269. EntryID& operator=(const EntryID& other) = delete;
  270. uint32_t getOrInvalid() {
  271. // It's OK for this to be relaxed, even though we're effectively doing
  272. // double checked locking in using this value. We only care about the
  273. // uniqueness of IDs, getOrAllocate does not modify any other memory
  274. // this thread will use.
  275. return value.load(std::memory_order_relaxed);
  276. }
  277. uint32_t getOrAllocate(StaticMetaBase& meta) {
  278. uint32_t id = getOrInvalid();
  279. if (id != kEntryIDInvalid) {
  280. return id;
  281. }
  282. // The lock inside allocate ensures that a single value is allocated
  283. return meta.allocate(this);
  284. }
  285. };
  286. StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict);
  287. void push_back(ThreadEntry* t) {
  288. t->next = &head_;
  289. t->prev = head_.prev;
  290. head_.prev->next = t;
  291. head_.prev = t;
  292. }
  293. void erase(ThreadEntry* t) {
  294. t->next->prev = t->prev;
  295. t->prev->next = t->next;
  296. t->next = t->prev = t;
  297. }
  298. FOLLY_EXPORT static ThreadEntryList* getThreadEntryList();
  299. static void onThreadExit(void* ptr);
  300. // returns the elementsCapacity for the
  301. // current thread ThreadEntry struct
  302. uint32_t elementsCapacity() const;
  303. uint32_t allocate(EntryID* ent);
  304. void destroy(EntryID* ent);
  305. /**
  306. * Reserve enough space in the ThreadEntry::elements for the item
  307. * @id to fit in.
  308. */
  309. void reserve(EntryID* id);
  310. ElementWrapper& getElement(EntryID* ent);
  311. // reserve an id in the head_ ThreadEntry->elements
  312. // array if not already there
  313. void reserveHeadUnlocked(uint32_t id);
  314. // push back an entry in the doubly linked list
  315. // that corresponds to idx id
  316. void pushBackLocked(ThreadEntry* t, uint32_t id);
  317. void pushBackUnlocked(ThreadEntry* t, uint32_t id);
  318. // static helper method to reallocate the ThreadEntry::elements
  319. // returns != nullptr if the ThreadEntry::elements was reallocated
  320. // nullptr if the ThreadEntry::elements was just extended
  321. // and throws stdd:bad_alloc if memory cannot be allocated
  322. static ElementWrapper*
  323. reallocate(ThreadEntry* threadEntry, uint32_t idval, size_t& newCapacity);
  324. uint32_t nextId_;
  325. std::vector<uint32_t> freeIds_;
  326. std::mutex lock_;
  327. SharedMutex accessAllThreadsLock_;
  328. pthread_key_t pthreadKey_;
  329. ThreadEntry head_;
  330. ThreadEntry* (*threadEntry_)();
  331. bool strict_;
  332. protected:
  333. ~StaticMetaBase() {}
  334. };
  335. // Held in a singleton to track our global instances.
  336. // We have one of these per "Tag", by default one for the whole system
  337. // (Tag=void).
  338. //
  339. // Creating and destroying ThreadLocalPtr objects, as well as thread exit
  340. // for threads that use ThreadLocalPtr objects collide on a lock inside
  341. // StaticMeta; you can specify multiple Tag types to break that lock.
  342. template <class Tag, class AccessMode>
  343. struct StaticMeta final : StaticMetaBase {
  344. StaticMeta()
  345. : StaticMetaBase(
  346. &StaticMeta::getThreadEntrySlow,
  347. std::is_same<AccessMode, AccessModeStrict>::value) {
  348. detail::AtFork::registerHandler(
  349. this,
  350. /*prepare*/ &StaticMeta::preFork,
  351. /*parent*/ &StaticMeta::onForkParent,
  352. /*child*/ &StaticMeta::onForkChild);
  353. }
  354. ~StaticMeta() = delete;
  355. static StaticMeta<Tag, AccessMode>& instance() {
  356. // Leak it on exit, there's only one per process and we don't have to
  357. // worry about synchronization with exiting threads.
  358. /* library-local */ static auto instance =
  359. detail::createGlobal<StaticMeta<Tag, AccessMode>, void>();
  360. return *instance;
  361. }
  362. FOLLY_EXPORT FOLLY_ALWAYS_INLINE static ElementWrapper& get(EntryID* ent) {
  363. // Eliminate as many branches and as much extra code as possible in the
  364. // cached fast path, leaving only one branch here and one indirection below.
  365. uint32_t id = ent->getOrInvalid();
  366. #ifdef FOLLY_TLD_USE_FOLLY_TLS
  367. static FOLLY_TLS ThreadEntry* threadEntry{};
  368. static FOLLY_TLS size_t capacity{};
  369. #else
  370. ThreadEntry* threadEntry{};
  371. size_t capacity{};
  372. #endif
  373. if (FOLLY_UNLIKELY(capacity <= id)) {
  374. getSlowReserveAndCache(ent, id, threadEntry, capacity);
  375. }
  376. return threadEntry->elements[id];
  377. }
  378. FOLLY_NOINLINE static void getSlowReserveAndCache(
  379. EntryID* ent,
  380. uint32_t& id,
  381. ThreadEntry*& threadEntry,
  382. size_t& capacity) {
  383. auto& inst = instance();
  384. threadEntry = inst.threadEntry_();
  385. if (UNLIKELY(threadEntry->getElementsCapacity() <= id)) {
  386. inst.reserve(ent);
  387. id = ent->getOrInvalid();
  388. }
  389. capacity = threadEntry->getElementsCapacity();
  390. assert(capacity > id);
  391. }
  392. FOLLY_EXPORT FOLLY_NOINLINE static ThreadEntry* getThreadEntrySlow() {
  393. auto& meta = instance();
  394. auto key = meta.pthreadKey_;
  395. ThreadEntry* threadEntry =
  396. static_cast<ThreadEntry*>(pthread_getspecific(key));
  397. if (!threadEntry) {
  398. ThreadEntryList* threadEntryList = StaticMeta::getThreadEntryList();
  399. #ifdef FOLLY_TLD_USE_FOLLY_TLS
  400. static FOLLY_TLS ThreadEntry threadEntrySingleton;
  401. threadEntry = &threadEntrySingleton;
  402. #else
  403. threadEntry = new ThreadEntry();
  404. #endif
  405. // if the ThreadEntry already exists
  406. // but pthread_getspecific returns NULL
  407. // do not add the same entry twice to the list
  408. // since this would create a loop in the list
  409. if (!threadEntry->list) {
  410. threadEntry->list = threadEntryList;
  411. threadEntry->listNext = threadEntryList->head;
  412. threadEntryList->head = threadEntry;
  413. }
  414. // if we're adding a thread entry
  415. // we need to increment the list count
  416. // even if the entry is reused
  417. threadEntryList->count++;
  418. threadEntry->meta = &meta;
  419. int ret = pthread_setspecific(key, threadEntry);
  420. checkPosixError(ret, "pthread_setspecific failed");
  421. }
  422. return threadEntry;
  423. }
  424. static bool preFork() {
  425. return instance().lock_.try_lock(); // Make sure it's created
  426. }
  427. static void onForkParent() {
  428. instance().lock_.unlock();
  429. }
  430. static void onForkChild() {
  431. // only the current thread survives
  432. auto& head = instance().head_;
  433. // init the head list
  434. head.next = head.prev = &head;
  435. // init the circular lists
  436. auto elementsCapacity = head.getElementsCapacity();
  437. for (size_t i = 0u; i < elementsCapacity; ++i) {
  438. head.elements[i].node.init(&head, static_cast<uint32_t>(i));
  439. }
  440. // init the thread entry
  441. ThreadEntry* threadEntry = instance().threadEntry_();
  442. elementsCapacity = threadEntry->getElementsCapacity();
  443. for (size_t i = 0u; i < elementsCapacity; ++i) {
  444. if (!threadEntry->elements[i].node.zero()) {
  445. threadEntry->elements[i].node.initZero(
  446. threadEntry, static_cast<uint32_t>(i));
  447. threadEntry->elements[i].node.initIfZero(false /*locked*/);
  448. }
  449. }
  450. // If this thread was in the list before the fork, add it back.
  451. if (elementsCapacity != 0) {
  452. instance().push_back(threadEntry);
  453. }
  454. instance().lock_.unlock();
  455. }
  456. };
  457. } // namespace threadlocal_detail
  458. } // namespace folly