ThreadLocalDetail.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470
  1. /*
  2. * Copyright 2015-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/detail/ThreadLocalDetail.h>
  17. #include <folly/synchronization/CallOnce.h>
  18. #include <list>
  19. #include <mutex>
  20. constexpr auto kSmallGrowthFactor = 1.1;
  21. constexpr auto kBigGrowthFactor = 1.7;
  22. namespace folly {
  23. namespace threadlocal_detail {
  24. void ThreadEntryNode::initIfZero(bool locked) {
  25. if (UNLIKELY(!next)) {
  26. if (LIKELY(locked)) {
  27. parent->meta->pushBackLocked(parent, id);
  28. } else {
  29. parent->meta->pushBackUnlocked(parent, id);
  30. }
  31. }
  32. }
  33. void ThreadEntryNode::push_back(ThreadEntry* head) {
  34. // get the head prev and next nodes
  35. ThreadEntryNode* hnode = &head->elements[id].node;
  36. // update current
  37. next = head;
  38. prev = hnode->prev;
  39. // hprev
  40. ThreadEntryNode* hprev = &hnode->prev->elements[id].node;
  41. hprev->next = parent;
  42. hnode->prev = parent;
  43. }
  44. void ThreadEntryNode::eraseZero() {
  45. if (LIKELY(prev != nullptr)) {
  46. // get the prev and next nodes
  47. ThreadEntryNode* nprev = &prev->elements[id].node;
  48. ThreadEntryNode* nnext = &next->elements[id].node;
  49. // update the prev and next
  50. nnext->prev = prev;
  51. nprev->next = next;
  52. // set the prev and next to nullptr
  53. next = prev = nullptr;
  54. }
  55. }
  56. StaticMetaBase::StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict)
  57. : nextId_(1), threadEntry_(threadEntry), strict_(strict) {
  58. head_.next = head_.prev = &head_;
  59. int ret = pthread_key_create(&pthreadKey_, &onThreadExit);
  60. checkPosixError(ret, "pthread_key_create failed");
  61. PthreadKeyUnregister::registerKey(pthreadKey_);
  62. }
  63. ThreadEntryList* StaticMetaBase::getThreadEntryList() {
  64. #ifdef FOLLY_TLD_USE_FOLLY_TLS
  65. static FOLLY_TLS ThreadEntryList threadEntryListSingleton;
  66. return &threadEntryListSingleton;
  67. #else
  68. class PthreadKey {
  69. public:
  70. PthreadKey() {
  71. int ret = pthread_key_create(&pthreadKey_, nullptr);
  72. checkPosixError(ret, "pthread_key_create failed");
  73. PthreadKeyUnregister::registerKey(pthreadKey_);
  74. }
  75. FOLLY_ALWAYS_INLINE pthread_key_t get() const {
  76. return pthreadKey_;
  77. }
  78. private:
  79. pthread_key_t pthreadKey_;
  80. };
  81. static auto instance = detail::createGlobal<PthreadKey, void>();
  82. ThreadEntryList* threadEntryList =
  83. static_cast<ThreadEntryList*>(pthread_getspecific(instance->get()));
  84. if (UNLIKELY(!threadEntryList)) {
  85. threadEntryList = new ThreadEntryList();
  86. int ret = pthread_setspecific(instance->get(), threadEntryList);
  87. checkPosixError(ret, "pthread_setspecific failed");
  88. }
  89. return threadEntryList;
  90. #endif
  91. }
  92. void StaticMetaBase::onThreadExit(void* ptr) {
  93. auto threadEntry = static_cast<ThreadEntry*>(ptr);
  94. {
  95. auto& meta = *threadEntry->meta;
  96. // Make sure this ThreadEntry is available if ThreadLocal A is accessed in
  97. // ThreadLocal B destructor.
  98. pthread_setspecific(meta.pthreadKey_, threadEntry);
  99. SharedMutex::ReadHolder rlock(nullptr);
  100. if (meta.strict_) {
  101. rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
  102. }
  103. {
  104. std::lock_guard<std::mutex> g(meta.lock_);
  105. // mark it as removed
  106. threadEntry->removed_ = true;
  107. meta.erase(&(*threadEntry));
  108. auto elementsCapacity = threadEntry->getElementsCapacity();
  109. for (size_t i = 0u; i < elementsCapacity; ++i) {
  110. threadEntry->elements[i].node.eraseZero();
  111. }
  112. // No need to hold the lock any longer; the ThreadEntry is private to this
  113. // thread now that it's been removed from meta.
  114. }
  115. // NOTE: User-provided deleter / object dtor itself may be using ThreadLocal
  116. // with the same Tag, so dispose() calls below may (re)create some of the
  117. // elements or even increase elementsCapacity, thus multiple cleanup rounds
  118. // may be required.
  119. for (bool shouldRun = true; shouldRun;) {
  120. shouldRun = false;
  121. auto elementsCapacity = threadEntry->getElementsCapacity();
  122. FOR_EACH_RANGE (i, 0, elementsCapacity) {
  123. if (threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
  124. threadEntry->elements[i].cleanup();
  125. shouldRun = true;
  126. }
  127. }
  128. }
  129. pthread_setspecific(meta.pthreadKey_, nullptr);
  130. }
  131. auto threadEntryList = threadEntry->list;
  132. DCHECK_GT(threadEntryList->count, 0u);
  133. --threadEntryList->count;
  134. if (threadEntryList->count) {
  135. return;
  136. }
  137. // dispose all the elements
  138. for (bool shouldRunOuter = true; shouldRunOuter;) {
  139. shouldRunOuter = false;
  140. auto tmp = threadEntryList->head;
  141. while (tmp) {
  142. auto& meta = *tmp->meta;
  143. pthread_setspecific(meta.pthreadKey_, tmp);
  144. SharedMutex::ReadHolder rlock(nullptr);
  145. if (meta.strict_) {
  146. rlock = SharedMutex::ReadHolder(meta.accessAllThreadsLock_);
  147. }
  148. for (bool shouldRunInner = true; shouldRunInner;) {
  149. shouldRunInner = false;
  150. auto elementsCapacity = tmp->getElementsCapacity();
  151. FOR_EACH_RANGE (i, 0, elementsCapacity) {
  152. if (tmp->elements[i].dispose(TLPDestructionMode::THIS_THREAD)) {
  153. tmp->elements[i].cleanup();
  154. shouldRunInner = true;
  155. shouldRunOuter = true;
  156. }
  157. }
  158. }
  159. pthread_setspecific(meta.pthreadKey_, nullptr);
  160. tmp = tmp->listNext;
  161. }
  162. }
  163. // free the entry list
  164. auto head = threadEntryList->head;
  165. threadEntryList->head = nullptr;
  166. while (head) {
  167. auto tmp = head;
  168. head = head->listNext;
  169. if (tmp->elements) {
  170. free(tmp->elements);
  171. tmp->elements = nullptr;
  172. tmp->setElementsCapacity(0);
  173. }
  174. #ifndef FOLLY_TLD_USE_FOLLY_TLS
  175. delete tmp;
  176. #endif
  177. }
  178. #ifndef FOLLY_TLD_USE_FOLLY_TLS
  179. delete threadEntryList;
  180. #endif
  181. }
  182. uint32_t StaticMetaBase::elementsCapacity() const {
  183. ThreadEntry* threadEntry = (*threadEntry_)();
  184. return FOLLY_LIKELY(!!threadEntry) ? threadEntry->getElementsCapacity() : 0;
  185. }
  186. uint32_t StaticMetaBase::allocate(EntryID* ent) {
  187. uint32_t id;
  188. auto& meta = *this;
  189. std::lock_guard<std::mutex> g(meta.lock_);
  190. id = ent->value.load();
  191. if (id != kEntryIDInvalid) {
  192. return id;
  193. }
  194. if (!meta.freeIds_.empty()) {
  195. id = meta.freeIds_.back();
  196. meta.freeIds_.pop_back();
  197. } else {
  198. id = meta.nextId_++;
  199. }
  200. uint32_t old_id = ent->value.exchange(id);
  201. DCHECK_EQ(old_id, kEntryIDInvalid);
  202. reserveHeadUnlocked(id);
  203. return id;
  204. }
  205. void StaticMetaBase::destroy(EntryID* ent) {
  206. try {
  207. auto& meta = *this;
  208. // Elements in other threads that use this id.
  209. std::vector<ElementWrapper> elements;
  210. {
  211. SharedMutex::WriteHolder wlock(nullptr);
  212. if (meta.strict_) {
  213. /*
  214. * In strict mode, the logic guarantees per-thread instances are
  215. * destroyed by the moment ThreadLocal<> dtor returns.
  216. * In order to achieve that, we should wait until concurrent
  217. * onThreadExit() calls (that might acquire ownership over per-thread
  218. * instances in order to destroy them) are finished.
  219. */
  220. wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_);
  221. }
  222. {
  223. std::lock_guard<std::mutex> g(meta.lock_);
  224. uint32_t id = ent->value.exchange(kEntryIDInvalid);
  225. if (id == kEntryIDInvalid) {
  226. return;
  227. }
  228. auto& node = meta.head_.elements[id].node;
  229. while (!node.empty()) {
  230. auto* next = node.getNext();
  231. next->eraseZero();
  232. ThreadEntry* e = next->parent;
  233. auto elementsCapacity = e->getElementsCapacity();
  234. if (id < elementsCapacity && e->elements[id].ptr) {
  235. elements.push_back(e->elements[id]);
  236. /*
  237. * Writing another thread's ThreadEntry from here is fine;
  238. * the only other potential reader is the owning thread --
  239. * from onThreadExit (which grabs the lock, so is properly
  240. * synchronized with us) or from get(), which also grabs
  241. * the lock if it needs to resize the elements vector.
  242. *
  243. * We can't conflict with reads for a get(id), because
  244. * it's illegal to call get on a thread local that's
  245. * destructing.
  246. */
  247. e->elements[id].ptr = nullptr;
  248. e->elements[id].deleter1 = nullptr;
  249. e->elements[id].ownsDeleter = false;
  250. }
  251. }
  252. meta.freeIds_.push_back(id);
  253. }
  254. }
  255. // Delete elements outside the locks.
  256. for (ElementWrapper& elem : elements) {
  257. if (elem.dispose(TLPDestructionMode::ALL_THREADS)) {
  258. elem.cleanup();
  259. }
  260. }
  261. } catch (...) { // Just in case we get a lock error or something anyway...
  262. LOG(WARNING) << "Destructor discarding an exception that was thrown.";
  263. }
  264. }
  265. ElementWrapper* StaticMetaBase::reallocate(
  266. ThreadEntry* threadEntry,
  267. uint32_t idval,
  268. size_t& newCapacity) {
  269. size_t prevCapacity = threadEntry->getElementsCapacity();
  270. // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent
  271. // very slow start.
  272. auto smallCapacity = static_cast<size_t>((idval + 5) * kSmallGrowthFactor);
  273. auto bigCapacity = static_cast<size_t>((idval + 5) * kBigGrowthFactor);
  274. newCapacity =
  275. (threadEntry->meta &&
  276. (bigCapacity <= threadEntry->meta->head_.getElementsCapacity()))
  277. ? bigCapacity
  278. : smallCapacity;
  279. assert(newCapacity > prevCapacity);
  280. ElementWrapper* reallocated = nullptr;
  281. // Need to grow. Note that we can't call realloc, as elements is
  282. // still linked in meta, so another thread might access invalid memory
  283. // after realloc succeeds. We'll copy by hand and update our ThreadEntry
  284. // under the lock.
  285. if (usingJEMalloc()) {
  286. bool success = false;
  287. size_t newByteSize = nallocx(newCapacity * sizeof(ElementWrapper), 0);
  288. // Try to grow in place.
  289. //
  290. // Note that xallocx(MALLOCX_ZERO) will only zero newly allocated memory,
  291. // even if a previous allocation allocated more than we requested.
  292. // This is fine; we always use MALLOCX_ZERO with jemalloc and we
  293. // always expand our allocation to the real size.
  294. if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) {
  295. success =
  296. (xallocx(threadEntry->elements, newByteSize, 0, MALLOCX_ZERO) ==
  297. newByteSize);
  298. }
  299. // In-place growth failed.
  300. if (!success) {
  301. success =
  302. ((reallocated = static_cast<ElementWrapper*>(
  303. mallocx(newByteSize, MALLOCX_ZERO))) != nullptr);
  304. }
  305. if (success) {
  306. // Expand to real size
  307. assert(newByteSize / sizeof(ElementWrapper) >= newCapacity);
  308. newCapacity = newByteSize / sizeof(ElementWrapper);
  309. } else {
  310. throw std::bad_alloc();
  311. }
  312. } else { // no jemalloc
  313. // calloc() is simpler than malloc() followed by memset(), and
  314. // potentially faster when dealing with a lot of memory, as it can get
  315. // already-zeroed pages from the kernel.
  316. reallocated = static_cast<ElementWrapper*>(
  317. calloc(newCapacity, sizeof(ElementWrapper)));
  318. if (!reallocated) {
  319. throw std::bad_alloc();
  320. }
  321. }
  322. return reallocated;
  323. }
  324. /**
  325. * Reserve enough space in the ThreadEntry::elements for the item
  326. * @id to fit in.
  327. */
  328. void StaticMetaBase::reserve(EntryID* id) {
  329. auto& meta = *this;
  330. ThreadEntry* threadEntry = (*threadEntry_)();
  331. size_t prevCapacity = threadEntry->getElementsCapacity();
  332. uint32_t idval = id->getOrAllocate(meta);
  333. if (prevCapacity > idval) {
  334. return;
  335. }
  336. size_t newCapacity;
  337. ElementWrapper* reallocated = reallocate(threadEntry, idval, newCapacity);
  338. // Success, update the entry
  339. {
  340. std::lock_guard<std::mutex> g(meta.lock_);
  341. if (prevCapacity == 0) {
  342. meta.push_back(threadEntry);
  343. }
  344. if (reallocated) {
  345. /*
  346. * Note: we need to hold the meta lock when copying data out of
  347. * the old vector, because some other thread might be
  348. * destructing a ThreadLocal and writing to the elements vector
  349. * of this thread.
  350. */
  351. if (prevCapacity != 0) {
  352. memcpy(
  353. reallocated,
  354. threadEntry->elements,
  355. sizeof(*reallocated) * prevCapacity);
  356. }
  357. std::swap(reallocated, threadEntry->elements);
  358. }
  359. for (size_t i = prevCapacity; i < newCapacity; i++) {
  360. threadEntry->elements[i].node.initZero(threadEntry, i);
  361. }
  362. threadEntry->setElementsCapacity(newCapacity);
  363. }
  364. free(reallocated);
  365. }
  366. void StaticMetaBase::reserveHeadUnlocked(uint32_t id) {
  367. if (head_.getElementsCapacity() <= id) {
  368. size_t prevCapacity = head_.getElementsCapacity();
  369. size_t newCapacity;
  370. ElementWrapper* reallocated = reallocate(&head_, id, newCapacity);
  371. if (reallocated) {
  372. if (prevCapacity != 0) {
  373. memcpy(
  374. reallocated, head_.elements, sizeof(*reallocated) * prevCapacity);
  375. }
  376. std::swap(reallocated, head_.elements);
  377. }
  378. for (size_t i = prevCapacity; i < newCapacity; i++) {
  379. head_.elements[i].node.init(&head_, i);
  380. }
  381. head_.setElementsCapacity(newCapacity);
  382. free(reallocated);
  383. }
  384. }
  385. void StaticMetaBase::pushBackLocked(ThreadEntry* t, uint32_t id) {
  386. if (LIKELY(!t->removed_)) {
  387. std::lock_guard<std::mutex> g(lock_);
  388. auto* node = &t->elements[id].node;
  389. node->push_back(&head_);
  390. }
  391. }
  392. void StaticMetaBase::pushBackUnlocked(ThreadEntry* t, uint32_t id) {
  393. if (LIKELY(!t->removed_)) {
  394. auto* node = &t->elements[id].node;
  395. node->push_back(&head_);
  396. }
  397. }
  398. FOLLY_STATIC_CTOR_PRIORITY_MAX
  399. PthreadKeyUnregister PthreadKeyUnregister::instance_;
  400. } // namespace threadlocal_detail
  401. } // namespace folly