MemoryIdler.h 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <algorithm>
  18. #include <atomic>
  19. #include <chrono>
  20. #include <folly/detail/Futex.h>
  21. #include <folly/hash/Hash.h>
  22. #include <folly/synchronization/AtomicStruct.h>
  23. #include <folly/system/ThreadId.h>
  24. namespace folly {
  25. namespace detail {
  26. /// MemoryIdler provides helper routines that allow routines to return
  27. /// some assigned memory resources back to the system. The intended
  28. /// use is that when a thread is waiting for a long time (perhaps it
  29. /// is in a LIFO thread pool and hasn't been needed for a long time)
  30. /// it should release its thread-local malloc caches (both jemalloc and
  31. /// tcmalloc use these for better performance) and unmap the stack pages
  32. /// that contain no useful data.
  33. struct MemoryIdler {
  34. /// Returns memory from thread-local allocation pools to the global
  35. /// pool, if we know how to for the current malloc implementation.
  36. /// jemalloc is supported.
  37. static void flushLocalMallocCaches();
  38. enum {
  39. /// This value is a tradeoff between reclaiming memory and triggering
  40. /// a page fault immediately on wakeup. Note that the actual unit
  41. /// of idling for the stack is pages, so the actual stack that
  42. /// will be available on wakeup without a page fault is between
  43. /// kDefaultStackToRetain and kDefaultStackToRetain + PageSize -
  44. /// 1 bytes.
  45. kDefaultStackToRetain = 1024,
  46. };
  47. /// Uses madvise to discard the portion of the thread's stack that
  48. /// currently doesn't hold any data, trying to ensure that no page
  49. /// faults will occur during the next retain bytes of stack allocation
  50. static void unmapUnusedStack(size_t retain = kDefaultStackToRetain);
  51. /// The system-wide default for the amount of time a blocking
  52. /// thread should wait before reclaiming idle memory. Set this to
  53. /// Duration::max() to never wait. The default value is 5 seconds.
  54. /// Endpoints using this idle timeout might randomly wait longer to
  55. /// avoid synchronizing their flushes.
  56. static AtomicStruct<std::chrono::steady_clock::duration> defaultIdleTimeout;
  57. /// Selects a timeout pseudo-randomly chosen to be between
  58. /// idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to
  59. /// smooth out the behavior in a bursty system
  60. template <typename IdleTime = std::chrono::steady_clock::duration>
  61. static IdleTime getVariationTimeout(
  62. IdleTime const& idleTimeout =
  63. defaultIdleTimeout.load(std::memory_order_acquire),
  64. float timeoutVariationFrac = 0.5) {
  65. if (idleTimeout <= IdleTime::zero() || timeoutVariationFrac <= 0) {
  66. return idleTimeout;
  67. }
  68. // hash the pthread_t and the time to get the adjustment
  69. // Standard hash func isn't very good, so bit mix the result
  70. uint64_t h = folly::hash::twang_mix64(folly::hash::hash_combine(
  71. getCurrentThreadID(),
  72. std::chrono::system_clock::now().time_since_epoch().count()));
  73. // multiplying the duration by a floating point doesn't work, grr
  74. auto extraFrac = timeoutVariationFrac /
  75. static_cast<float>(std::numeric_limits<uint64_t>::max()) * h;
  76. auto tics = uint64_t(idleTimeout.count() * (1 + extraFrac));
  77. return IdleTime(tics);
  78. }
  79. /// Equivalent to fut.futexWait(expected, waitMask), but calls
  80. /// flushLocalMallocCaches() and unmapUnusedStack(stackToRetain)
  81. /// after idleTimeout has passed (if it has passed). Internally uses
  82. /// fut.futexWait and fut.futexWaitUntil. The actual timeout will be
  83. /// pseudo-randomly chosen to be between idleTimeout and idleTimeout *
  84. /// (1 + timeoutVariationFraction), to smooth out the behavior in a
  85. /// system with bursty requests. The default is to wait up to 50%
  86. /// extra, so on average 25% extra.
  87. template <
  88. typename Futex,
  89. typename IdleTime = std::chrono::steady_clock::duration>
  90. static FutexResult futexWait(
  91. Futex& fut,
  92. uint32_t expected,
  93. uint32_t waitMask = -1,
  94. IdleTime const& idleTimeout =
  95. defaultIdleTimeout.load(std::memory_order_acquire),
  96. size_t stackToRetain = kDefaultStackToRetain,
  97. float timeoutVariationFrac = 0.5) {
  98. FutexResult pre;
  99. if (futexWaitPreIdle(
  100. pre,
  101. fut,
  102. expected,
  103. std::chrono::steady_clock::time_point::max(),
  104. waitMask,
  105. idleTimeout,
  106. stackToRetain,
  107. timeoutVariationFrac)) {
  108. return pre;
  109. }
  110. using folly::detail::futexWait;
  111. return futexWait(&fut, expected, waitMask);
  112. }
  113. /// Equivalent to fut.futexWaitUntil(expected, deadline, waitMask), but
  114. /// calls flushLocalMallocCaches() and unmapUnusedStack(stackToRetain)
  115. /// after idleTimeout has passed (if it has passed). Internally uses
  116. /// fut.futexWaitUntil. The actual timeout will be pseudo-randomly
  117. /// chosen to be between idleTimeout and idleTimeout *
  118. /// (1 + timeoutVariationFraction), to smooth out the behavior in a
  119. /// system with bursty requests. The default is to wait up to 50%
  120. /// extra, so on average 25% extra.
  121. template <
  122. typename Futex,
  123. typename Deadline,
  124. typename IdleTime = std::chrono::steady_clock::duration>
  125. static FutexResult futexWaitUntil(
  126. Futex& fut,
  127. uint32_t expected,
  128. Deadline const& deadline,
  129. uint32_t waitMask = -1,
  130. IdleTime const& idleTimeout =
  131. defaultIdleTimeout.load(std::memory_order_acquire),
  132. size_t stackToRetain = kDefaultStackToRetain,
  133. float timeoutVariationFrac = 0.5) {
  134. FutexResult pre;
  135. if (futexWaitPreIdle(
  136. pre,
  137. fut,
  138. expected,
  139. deadline,
  140. waitMask,
  141. idleTimeout,
  142. stackToRetain,
  143. timeoutVariationFrac)) {
  144. return pre;
  145. }
  146. using folly::detail::futexWaitUntil;
  147. return futexWaitUntil(&fut, expected, deadline, waitMask);
  148. }
  149. private:
  150. template <typename Futex, typename Deadline, typename IdleTime>
  151. static bool futexWaitPreIdle(
  152. FutexResult& _ret,
  153. Futex& fut,
  154. uint32_t expected,
  155. Deadline const& deadline,
  156. uint32_t waitMask,
  157. IdleTime idleTimeout,
  158. size_t stackToRetain,
  159. float timeoutVariationFrac) {
  160. // idleTimeout < 0 means no flush behavior
  161. if (idleTimeout < IdleTime::zero()) {
  162. return false;
  163. }
  164. // idleTimeout == 0 means flush immediately, without variation
  165. // idleTimeout > 0 means flush after delay, with variation
  166. if (idleTimeout > IdleTime::zero()) {
  167. idleTimeout = std::max(
  168. IdleTime::zero(),
  169. getVariationTimeout(idleTimeout, timeoutVariationFrac));
  170. }
  171. if (idleTimeout > IdleTime::zero()) {
  172. auto idleDeadline = Deadline::clock::now() + idleTimeout;
  173. if (idleDeadline < deadline) {
  174. using folly::detail::futexWaitUntil;
  175. auto rv = futexWaitUntil(&fut, expected, idleDeadline, waitMask);
  176. if (rv != FutexResult::TIMEDOUT) {
  177. // finished before timeout hit, no flush
  178. _ret = rv;
  179. return true;
  180. }
  181. }
  182. }
  183. // flush, then wait
  184. flushLocalMallocCaches();
  185. unmapUnusedStack(stackToRetain);
  186. return false;
  187. }
  188. };
  189. } // namespace detail
  190. } // namespace folly