ThreadCachedInt.h 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182
  1. /*
  2. * Copyright 2011-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * Higher performance (up to 10x) atomic increment using thread caching.
  18. *
  19. * @author Spencer Ahrens (sahrens)
  20. */
  21. #pragma once
  22. #include <atomic>
  23. #include <boost/noncopyable.hpp>
  24. #include <folly/Likely.h>
  25. #include <folly/ThreadLocal.h>
  26. namespace folly {
  27. // Note that readFull requires holding a lock and iterating through all of the
  28. // thread local objects with the same Tag, so if you have a lot of
  29. // ThreadCachedInt's you should considering breaking up the Tag space even
  30. // further.
  31. template <class IntT, class Tag = IntT>
  32. class ThreadCachedInt : boost::noncopyable {
  33. struct IntCache;
  34. public:
  35. explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
  36. : target_(initialVal), cacheSize_(cacheSize) {}
  37. void increment(IntT inc) {
  38. auto cache = cache_.get();
  39. if (UNLIKELY(cache == nullptr)) {
  40. cache = new IntCache(*this);
  41. cache_.reset(cache);
  42. }
  43. cache->increment(inc);
  44. }
  45. // Quickly grabs the current value which may not include some cached
  46. // increments.
  47. IntT readFast() const {
  48. return target_.load(std::memory_order_relaxed);
  49. }
  50. // Reads the current value plus all the cached increments. Requires grabbing
  51. // a lock, so this is significantly slower than readFast().
  52. IntT readFull() const {
  53. // This could race with thread destruction and so the access lock should be
  54. // acquired before reading the current value
  55. const auto accessor = cache_.accessAllThreads();
  56. IntT ret = readFast();
  57. for (const auto& cache : accessor) {
  58. if (!cache.reset_.load(std::memory_order_acquire)) {
  59. ret += cache.val_.load(std::memory_order_relaxed);
  60. }
  61. }
  62. return ret;
  63. }
  64. // Quickly reads and resets current value (doesn't reset cached increments).
  65. IntT readFastAndReset() {
  66. return target_.exchange(0, std::memory_order_release);
  67. }
  68. // This function is designed for accumulating into another counter, where you
  69. // only want to count each increment once. It can still get the count a
  70. // little off, however, but it should be much better than calling readFull()
  71. // and set(0) sequentially.
  72. IntT readFullAndReset() {
  73. // This could race with thread destruction and so the access lock should be
  74. // acquired before reading the current value
  75. auto accessor = cache_.accessAllThreads();
  76. IntT ret = readFastAndReset();
  77. for (auto& cache : accessor) {
  78. if (!cache.reset_.load(std::memory_order_acquire)) {
  79. ret += cache.val_.load(std::memory_order_relaxed);
  80. cache.reset_.store(true, std::memory_order_release);
  81. }
  82. }
  83. return ret;
  84. }
  85. void setCacheSize(uint32_t newSize) {
  86. cacheSize_.store(newSize, std::memory_order_release);
  87. }
  88. uint32_t getCacheSize() const {
  89. return cacheSize_.load();
  90. }
  91. ThreadCachedInt& operator+=(IntT inc) {
  92. increment(inc);
  93. return *this;
  94. }
  95. ThreadCachedInt& operator-=(IntT inc) {
  96. increment(-inc);
  97. return *this;
  98. }
  99. // pre-increment (we don't support post-increment)
  100. ThreadCachedInt& operator++() {
  101. increment(1);
  102. return *this;
  103. }
  104. ThreadCachedInt& operator--() {
  105. increment(IntT(-1));
  106. return *this;
  107. }
  108. // Thread-safe set function.
  109. // This is a best effort implementation. In some edge cases, there could be
  110. // data loss (missing counts)
  111. void set(IntT newVal) {
  112. for (auto& cache : cache_.accessAllThreads()) {
  113. cache.reset_.store(true, std::memory_order_release);
  114. }
  115. target_.store(newVal, std::memory_order_release);
  116. }
  117. private:
  118. std::atomic<IntT> target_;
  119. std::atomic<uint32_t> cacheSize_;
  120. ThreadLocalPtr<IntCache, Tag, AccessModeStrict>
  121. cache_; // Must be last for dtor ordering
  122. // This should only ever be modified by one thread
  123. struct IntCache {
  124. ThreadCachedInt* parent_;
  125. mutable std::atomic<IntT> val_;
  126. mutable uint32_t numUpdates_;
  127. std::atomic<bool> reset_;
  128. explicit IntCache(ThreadCachedInt& parent)
  129. : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
  130. void increment(IntT inc) {
  131. if (LIKELY(!reset_.load(std::memory_order_acquire))) {
  132. // This thread is the only writer to val_, so it's fine do do
  133. // a relaxed load and do the addition non-atomically.
  134. val_.store(
  135. val_.load(std::memory_order_relaxed) + inc,
  136. std::memory_order_release);
  137. } else {
  138. val_.store(inc, std::memory_order_relaxed);
  139. reset_.store(false, std::memory_order_release);
  140. }
  141. ++numUpdates_;
  142. if (UNLIKELY(
  143. numUpdates_ >
  144. parent_->cacheSize_.load(std::memory_order_acquire))) {
  145. flush();
  146. }
  147. }
  148. void flush() const {
  149. parent_->target_.fetch_add(val_, std::memory_order_release);
  150. val_.store(0, std::memory_order_release);
  151. numUpdates_ = 0;
  152. }
  153. ~IntCache() {
  154. flush();
  155. }
  156. };
  157. };
  158. } // namespace folly