MemoryIdler.cpp 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/detail/MemoryIdler.h>
  17. #include <folly/GLog.h>
  18. #include <folly/Portability.h>
  19. #include <folly/ScopeGuard.h>
  20. #include <folly/concurrency/CacheLocality.h>
  21. #include <folly/memory/MallctlHelper.h>
  22. #include <folly/memory/Malloc.h>
  23. #include <folly/portability/PThread.h>
  24. #include <folly/portability/SysMman.h>
  25. #include <folly/portability/Unistd.h>
  26. #include <folly/synchronization/CallOnce.h>
  27. #include <limits.h>
  28. #include <stdio.h>
  29. #include <string.h>
  30. #include <utility>
  31. namespace folly {
  32. namespace detail {
  33. AtomicStruct<std::chrono::steady_clock::duration>
  34. MemoryIdler::defaultIdleTimeout(std::chrono::seconds(5));
  35. void MemoryIdler::flushLocalMallocCaches() {
  36. if (!usingJEMalloc()) {
  37. return;
  38. }
  39. if (!mallctl || !mallctlnametomib || !mallctlbymib) {
  40. FB_LOG_EVERY_MS(ERROR, 10000) << "mallctl* weak link failed";
  41. return;
  42. }
  43. try {
  44. // Not using mallctlCall as this will fail if tcache is disabled.
  45. mallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0);
  46. // By default jemalloc has 4 arenas per cpu, and then assigns each
  47. // thread to one of those arenas. This means that in any service
  48. // that doesn't perform a lot of context switching, the chances that
  49. // another thread will be using the current thread's arena (and hence
  50. // doing the appropriate dirty-page purging) are low. Some good
  51. // tuned configurations (such as that used by hhvm) use fewer arenas
  52. // and then pin threads to avoid contended access. In that case,
  53. // purging the arenas is counter-productive. We use the heuristic
  54. // that if narenas <= 2 * num_cpus then we shouldn't do anything here,
  55. // which detects when the narenas has been reduced from the default
  56. unsigned narenas;
  57. unsigned arenaForCurrent;
  58. size_t mib[3];
  59. size_t miblen = 3;
  60. mallctlRead("opt.narenas", &narenas);
  61. mallctlRead("thread.arena", &arenaForCurrent);
  62. if (narenas > 2 * CacheLocality::system().numCpus &&
  63. mallctlnametomib("arena.0.purge", mib, &miblen) == 0) {
  64. mib[1] = static_cast<size_t>(arenaForCurrent);
  65. mallctlbymib(mib, miblen, nullptr, nullptr, nullptr, 0);
  66. }
  67. } catch (const std::runtime_error& ex) {
  68. FB_LOG_EVERY_MS(WARNING, 10000) << ex.what();
  69. }
  70. }
  71. // Stack madvise isn't Linux or glibc specific, but the system calls
  72. // and arithmetic (and bug compatibility) are not portable. The set of
  73. // platforms could be increased if it was useful.
  74. #if (FOLLY_X64 || FOLLY_PPC64) && defined(_GNU_SOURCE) && \
  75. defined(__linux__) && !FOLLY_MOBILE && !FOLLY_SANITIZE_ADDRESS
  76. static FOLLY_TLS uintptr_t tls_stackLimit;
  77. static FOLLY_TLS size_t tls_stackSize;
  78. static size_t pageSize() {
  79. static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
  80. return s_pageSize;
  81. }
  82. static void fetchStackLimits() {
  83. int err;
  84. pthread_attr_t attr;
  85. if ((err = pthread_getattr_np(pthread_self(), &attr))) {
  86. // some restricted environments can't access /proc
  87. static folly::once_flag flag;
  88. folly::call_once(flag, [err]() {
  89. LOG(WARNING) << "pthread_getaddr_np failed errno=" << err;
  90. });
  91. tls_stackSize = 1;
  92. return;
  93. }
  94. SCOPE_EXIT {
  95. pthread_attr_destroy(&attr);
  96. };
  97. void* addr;
  98. size_t rawSize;
  99. if ((err = pthread_attr_getstack(&attr, &addr, &rawSize))) {
  100. // unexpected, but it is better to continue in prod than do nothing
  101. FB_LOG_EVERY_MS(ERROR, 10000) << "pthread_attr_getstack error " << err;
  102. assert(false);
  103. tls_stackSize = 1;
  104. return;
  105. }
  106. if (rawSize >= (1ULL << 32)) {
  107. // Avoid unmapping huge swaths of memory if there is an insane
  108. // stack size. The boundary of sanity is somewhat arbitrary: 4GB.
  109. //
  110. // If we went into /proc to find the actual contiguous mapped pages
  111. // before unmapping we wouldn't care about the stack size at all,
  112. // but our current strategy is to unmap the entire range that might
  113. // be used for the stack even if it hasn't been fully faulted-in.
  114. //
  115. // Very large stack size is a bug (hence the assert), but we can
  116. // carry on if we are in prod.
  117. FB_LOG_EVERY_MS(ERROR, 10000)
  118. << "pthread_attr_getstack returned insane stack size " << rawSize;
  119. assert(false);
  120. tls_stackSize = 1;
  121. return;
  122. }
  123. assert(addr != nullptr);
  124. assert(rawSize >= PTHREAD_STACK_MIN);
  125. // glibc subtracts guard page from stack size, even though pthread docs
  126. // seem to imply the opposite
  127. size_t guardSize;
  128. if (pthread_attr_getguardsize(&attr, &guardSize) != 0) {
  129. guardSize = 0;
  130. }
  131. assert(rawSize > guardSize);
  132. // stack goes down, so guard page adds to the base addr
  133. tls_stackLimit = reinterpret_cast<uintptr_t>(addr) + guardSize;
  134. tls_stackSize = rawSize - guardSize;
  135. assert((tls_stackLimit & (pageSize() - 1)) == 0);
  136. }
  137. FOLLY_NOINLINE static uintptr_t getStackPtr() {
  138. char marker;
  139. auto rv = reinterpret_cast<uintptr_t>(&marker);
  140. return rv;
  141. }
  142. void MemoryIdler::unmapUnusedStack(size_t retain) {
  143. if (tls_stackSize == 0) {
  144. fetchStackLimits();
  145. }
  146. if (tls_stackSize <= std::max(static_cast<size_t>(1), retain)) {
  147. // covers both missing stack info, and impossibly large retain
  148. return;
  149. }
  150. auto sp = getStackPtr();
  151. assert(sp >= tls_stackLimit);
  152. assert(sp - tls_stackLimit < tls_stackSize);
  153. auto end = (sp - retain) & ~(pageSize() - 1);
  154. if (end <= tls_stackLimit) {
  155. // no pages are eligible for unmapping
  156. return;
  157. }
  158. size_t len = end - tls_stackLimit;
  159. assert((len & (pageSize() - 1)) == 0);
  160. if (madvise((void*)tls_stackLimit, len, MADV_DONTNEED) != 0) {
  161. // It is likely that the stack vma hasn't been fully grown. In this
  162. // case madvise will apply dontneed to the present vmas, then return
  163. // errno of ENOMEM.
  164. // If thread stack pages are backed by locked or huge pages, madvise will
  165. // fail with EINVAL. (EINVAL may also be returned if the address or length
  166. // are bad.) Warn in debug mode, since MemoryIdler may not function as
  167. // expected.
  168. // We can also get an EAGAIN, theoretically.
  169. PLOG_IF(WARNING, kIsDebug && errno == EINVAL) << "madvise failed";
  170. assert(errno == EAGAIN || errno == ENOMEM || errno == EINVAL);
  171. }
  172. }
  173. #else
  174. void MemoryIdler::unmapUnusedStack(size_t /* retain */) {}
  175. #endif
  176. } // namespace detail
  177. } // namespace folly