Histogram.h 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <cstddef>
  18. #include <cstdint>
  19. #include <limits>
  20. #include <ostream>
  21. #include <stdexcept>
  22. #include <string>
  23. #include <vector>
  24. #include <folly/CPortability.h>
  25. #include <folly/Traits.h>
  26. #include <folly/stats/detail/Bucket.h>
  27. namespace folly {
  28. namespace detail {
  29. /*
  30. * A helper class to manage a set of histogram buckets.
  31. */
  32. template <typename T, typename BucketT>
  33. class HistogramBuckets {
  34. public:
  35. typedef T ValueType;
  36. typedef BucketT BucketType;
  37. /*
  38. * Create a set of histogram buckets.
  39. *
  40. * One bucket will be created for each bucketSize interval of values within
  41. * the specified range. Additionally, one bucket will be created to track
  42. * all values that fall below the specified minimum, and one bucket will be
  43. * created for all values above the specified maximum.
  44. *
  45. * If (max - min) is not a multiple of bucketSize, the last bucket will cover
  46. * a smaller range of values than the other buckets.
  47. *
  48. * (max - min) must be larger than or equal to bucketSize.
  49. */
  50. HistogramBuckets(
  51. ValueType bucketSize,
  52. ValueType min,
  53. ValueType max,
  54. const BucketType& defaultBucket);
  55. /* Returns the bucket size of each bucket in the histogram. */
  56. ValueType getBucketSize() const {
  57. return bucketSize_;
  58. }
  59. /* Returns the min value at which bucketing begins. */
  60. ValueType getMin() const {
  61. return min_;
  62. }
  63. /* Returns the max value at which bucketing ends. */
  64. ValueType getMax() const {
  65. return max_;
  66. }
  67. /*
  68. * Returns the number of buckets.
  69. *
  70. * This includes the total number of buckets for the [min, max) range,
  71. * plus 2 extra buckets, one for handling values less than min, and one for
  72. * values greater than max.
  73. */
  74. size_t getNumBuckets() const {
  75. return buckets_.size();
  76. }
  77. /* Returns the bucket index into which the given value would fall. */
  78. size_t getBucketIdx(ValueType value) const;
  79. /* Returns the bucket for the specified value */
  80. BucketType& getByValue(ValueType value) {
  81. return buckets_[getBucketIdx(value)];
  82. }
  83. /* Returns the bucket for the specified value */
  84. const BucketType& getByValue(ValueType value) const {
  85. return buckets_[getBucketIdx(value)];
  86. }
  87. /*
  88. * Returns the bucket at the specified index.
  89. *
  90. * Note that index 0 is the bucket for all values less than the specified
  91. * minimum. Index 1 is the first bucket in the specified bucket range.
  92. */
  93. BucketType& getByIndex(size_t idx) {
  94. return buckets_[idx];
  95. }
  96. /* Returns the bucket at the specified index. */
  97. const BucketType& getByIndex(size_t idx) const {
  98. return buckets_[idx];
  99. }
  100. /*
  101. * Returns the minimum threshold for the bucket at the given index.
  102. *
  103. * The bucket at the specified index will store values in the range
  104. * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
  105. * max is smaller than bucketMin + bucketSize.
  106. */
  107. ValueType getBucketMin(size_t idx) const {
  108. if (idx == 0) {
  109. return std::numeric_limits<ValueType>::min();
  110. }
  111. if (idx == buckets_.size() - 1) {
  112. return max_;
  113. }
  114. return ValueType(min_ + ((idx - 1) * bucketSize_));
  115. }
  116. /*
  117. * Returns the maximum threshold for the bucket at the given index.
  118. *
  119. * The bucket at the specified index will store values in the range
  120. * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
  121. * max is smaller than bucketMin + bucketSize.
  122. */
  123. ValueType getBucketMax(size_t idx) const {
  124. if (idx == buckets_.size() - 1) {
  125. return std::numeric_limits<ValueType>::max();
  126. }
  127. return ValueType(min_ + (idx * bucketSize_));
  128. }
  129. /**
  130. * Computes the total number of values stored across all buckets.
  131. *
  132. * Runs in O(numBuckets)
  133. *
  134. * @param countFn A function that takes a const BucketType&, and returns the
  135. * number of values in that bucket
  136. * @return Returns the total number of values stored across all buckets
  137. */
  138. template <typename CountFn>
  139. uint64_t computeTotalCount(CountFn countFromBucket) const;
  140. /**
  141. * Determine which bucket the specified percentile falls into.
  142. *
  143. * Looks for the bucket that contains the Nth percentile data point.
  144. *
  145. * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
  146. * @param countFn A function that takes a const BucketType&, and returns the
  147. * number of values in that bucket.
  148. * @param lowPct The lowest percentile stored in the selected bucket will be
  149. * returned via this parameter.
  150. * @param highPct The highest percentile stored in the selected bucket will
  151. * be returned via this parameter.
  152. *
  153. * @return Returns the index of the bucket that contains the Nth percentile
  154. * data point.
  155. */
  156. template <typename CountFn>
  157. size_t getPercentileBucketIdx(
  158. double pct,
  159. CountFn countFromBucket,
  160. double* lowPct = nullptr,
  161. double* highPct = nullptr) const;
  162. /**
  163. * Estimate the value at the specified percentile.
  164. *
  165. * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
  166. * @param countFn A function that takes a const BucketType&, and returns the
  167. * number of values in that bucket.
  168. * @param avgFn A function that takes a const BucketType&, and returns the
  169. * average of all the values in that bucket.
  170. *
  171. * @return Returns an estimate for N, where N is the number where exactly pct
  172. * percentage of the data points in the histogram are less than N.
  173. */
  174. template <typename CountFn, typename AvgFn>
  175. ValueType getPercentileEstimate(
  176. double pct,
  177. CountFn countFromBucket,
  178. AvgFn avgFromBucket) const;
  179. /*
  180. * Iterator access to the buckets.
  181. *
  182. * Note that the first bucket is for all values less than min, and the last
  183. * bucket is for all values greater than max. The buckets tracking values in
  184. * the [min, max) actually start at the second bucket.
  185. */
  186. typename std::vector<BucketType>::const_iterator begin() const {
  187. return buckets_.begin();
  188. }
  189. typename std::vector<BucketType>::iterator begin() {
  190. return buckets_.begin();
  191. }
  192. typename std::vector<BucketType>::const_iterator end() const {
  193. return buckets_.end();
  194. }
  195. typename std::vector<BucketType>::iterator end() {
  196. return buckets_.end();
  197. }
  198. private:
  199. ValueType bucketSize_;
  200. ValueType min_;
  201. ValueType max_;
  202. std::vector<BucketType> buckets_;
  203. };
  204. } // namespace detail
  205. /*
  206. * A basic histogram class.
  207. *
  208. * Groups data points into equally-sized buckets, and stores the overall sum of
  209. * the data points in each bucket, as well as the number of data points in the
  210. * bucket.
  211. *
  212. * The caller must specify the minimum and maximum data points to expect ahead
  213. * of time, as well as the bucket width.
  214. */
  215. template <typename T>
  216. class Histogram {
  217. public:
  218. typedef T ValueType;
  219. typedef detail::Bucket<T> Bucket;
  220. Histogram(ValueType bucketSize, ValueType min, ValueType max)
  221. : buckets_(bucketSize, min, max, Bucket()) {}
  222. /* Add a data point to the histogram */
  223. void addValue(ValueType value) {
  224. Bucket& bucket = buckets_.getByValue(value);
  225. // NOTE: Overflow is handled elsewhere and tests check this
  226. // behavior (see HistogramTest.cpp TestOverflow* tests).
  227. // TODO: It would be nice to handle overflow here and redesign this class.
  228. auto const addend = to_unsigned(value);
  229. bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
  230. bucket.count += 1;
  231. }
  232. /* Add multiple same data points to the histogram */
  233. void addRepeatedValue(ValueType value, uint64_t nSamples) {
  234. Bucket& bucket = buckets_.getByValue(value);
  235. // NOTE: Overflow is handled elsewhere and tests check this
  236. // behavior (see HistogramTest.cpp TestOverflow* tests).
  237. // TODO: It would be nice to handle overflow here and redesign this class.
  238. auto const addend = to_unsigned(value) * nSamples;
  239. bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) + addend);
  240. bucket.count += nSamples;
  241. }
  242. /*
  243. * Remove a data point to the histogram
  244. *
  245. * Note that this method does not actually verify that this exact data point
  246. * had previously been added to the histogram; it merely subtracts the
  247. * requested value from the appropriate bucket's sum.
  248. */
  249. void removeValue(ValueType value) {
  250. Bucket& bucket = buckets_.getByValue(value);
  251. // NOTE: Overflow is handled elsewhere and tests check this
  252. // behavior (see HistogramTest.cpp TestOverflow* tests).
  253. // TODO: It would be nice to handle overflow here and redesign this class.
  254. if (bucket.count > 0) {
  255. auto const subtrahend = to_unsigned(value);
  256. bucket.sum = static_cast<ValueType>(to_unsigned(bucket.sum) - subtrahend);
  257. bucket.count -= 1;
  258. } else {
  259. bucket.sum = ValueType();
  260. bucket.count = 0;
  261. }
  262. }
  263. /* Remove multiple same data points from the histogram */
  264. void removeRepeatedValue(ValueType value, uint64_t nSamples) {
  265. Bucket& bucket = buckets_.getByValue(value);
  266. // TODO: It would be nice to handle overflow here.
  267. if (bucket.count >= nSamples) {
  268. bucket.sum -= value * nSamples;
  269. bucket.count -= nSamples;
  270. } else {
  271. bucket.sum = ValueType();
  272. bucket.count = 0;
  273. }
  274. }
  275. /* Remove all data points from the histogram */
  276. void clear() {
  277. for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
  278. buckets_.getByIndex(i).clear();
  279. }
  280. }
  281. /* Subtract another histogram data from the histogram */
  282. void subtract(const Histogram& hist) {
  283. // the two histogram bucket definitions must match to support
  284. // subtract.
  285. if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
  286. getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
  287. throw std::invalid_argument("Cannot subtract input histogram.");
  288. }
  289. for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
  290. buckets_.getByIndex(i) -= hist.buckets_.getByIndex(i);
  291. }
  292. }
  293. /* Merge two histogram data together */
  294. void merge(const Histogram& hist) {
  295. // the two histogram bucket definitions must match to support
  296. // a merge.
  297. if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
  298. getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
  299. throw std::invalid_argument("Cannot merge from input histogram.");
  300. }
  301. for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
  302. buckets_.getByIndex(i) += hist.buckets_.getByIndex(i);
  303. }
  304. }
  305. /* Copy bucket values from another histogram */
  306. void copy(const Histogram& hist) {
  307. // the two histogram bucket definition must match
  308. if (getBucketSize() != hist.getBucketSize() || getMin() != hist.getMin() ||
  309. getMax() != hist.getMax() || getNumBuckets() != hist.getNumBuckets()) {
  310. throw std::invalid_argument("Cannot copy from input histogram.");
  311. }
  312. for (size_t i = 0; i < buckets_.getNumBuckets(); i++) {
  313. buckets_.getByIndex(i) = hist.buckets_.getByIndex(i);
  314. }
  315. }
  316. /* Returns the bucket size of each bucket in the histogram. */
  317. ValueType getBucketSize() const {
  318. return buckets_.getBucketSize();
  319. }
  320. /* Returns the min value at which bucketing begins. */
  321. ValueType getMin() const {
  322. return buckets_.getMin();
  323. }
  324. /* Returns the max value at which bucketing ends. */
  325. ValueType getMax() const {
  326. return buckets_.getMax();
  327. }
  328. /* Returns the number of buckets */
  329. size_t getNumBuckets() const {
  330. return buckets_.getNumBuckets();
  331. }
  332. /* Returns the specified bucket (for reading only!) */
  333. const Bucket& getBucketByIndex(size_t idx) const {
  334. return buckets_.getByIndex(idx);
  335. }
  336. /*
  337. * Returns the minimum threshold for the bucket at the given index.
  338. *
  339. * The bucket at the specified index will store values in the range
  340. * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
  341. * max is smaller than bucketMin + bucketSize.
  342. */
  343. ValueType getBucketMin(size_t idx) const {
  344. return buckets_.getBucketMin(idx);
  345. }
  346. /*
  347. * Returns the maximum threshold for the bucket at the given index.
  348. *
  349. * The bucket at the specified index will store values in the range
  350. * [bucketMin, bucketMin + bucketSize), or [bucketMin, max), if the overall
  351. * max is smaller than bucketMin + bucketSize.
  352. */
  353. ValueType getBucketMax(size_t idx) const {
  354. return buckets_.getBucketMax(idx);
  355. }
  356. /**
  357. * Computes the total number of values stored across all buckets.
  358. *
  359. * Runs in O(numBuckets)
  360. */
  361. uint64_t computeTotalCount() const {
  362. CountFromBucket countFn;
  363. return buckets_.computeTotalCount(countFn);
  364. }
  365. /*
  366. * Get the bucket that the specified percentile falls into
  367. *
  368. * The lowest and highest percentile data points in returned bucket will be
  369. * returned in the lowPct and highPct arguments, if they are not nullptr.
  370. */
  371. size_t getPercentileBucketIdx(
  372. double pct,
  373. double* lowPct = nullptr,
  374. double* highPct = nullptr) const {
  375. // We unfortunately can't use lambdas here yet;
  376. // Some users of this code are still built with gcc-4.4.
  377. CountFromBucket countFn;
  378. return buckets_.getPercentileBucketIdx(pct, countFn, lowPct, highPct);
  379. }
  380. /**
  381. * Estimate the value at the specified percentile.
  382. *
  383. * @param pct The desired percentile to find, as a value from 0.0 to 1.0.
  384. *
  385. * @return Returns an estimate for N, where N is the number where exactly pct
  386. * percentage of the data points in the histogram are less than N.
  387. */
  388. ValueType getPercentileEstimate(double pct) const {
  389. CountFromBucket countFn;
  390. AvgFromBucket avgFn;
  391. return buckets_.getPercentileEstimate(pct, countFn, avgFn);
  392. }
  393. /*
  394. * Get a human-readable string describing the histogram contents
  395. */
  396. std::string debugString() const;
  397. /*
  398. * Write the histogram contents in tab-separated values (TSV) format.
  399. * Format is "min max count sum".
  400. */
  401. void toTSV(std::ostream& out, bool skipEmptyBuckets = true) const;
  402. struct CountFromBucket {
  403. uint64_t operator()(const Bucket& bucket) const {
  404. return bucket.count;
  405. }
  406. };
  407. struct AvgFromBucket {
  408. ValueType operator()(const Bucket& bucket) const {
  409. if (bucket.count == 0) {
  410. return ValueType(0);
  411. }
  412. // Cast bucket.count to a signed integer type. This ensures that we
  413. // perform division properly here: If bucket.sum is a signed integer
  414. // type but we divide by an unsigned number, unsigned division will be
  415. // performed and bucket.sum will be converted to unsigned first.
  416. // If bucket.sum is unsigned, the code will still do unsigned division
  417. // correctly.
  418. //
  419. // The only downside is if bucket.count is large enough to be negative
  420. // when treated as signed. That should be extremely unlikely, though.
  421. return bucket.sum / static_cast<int64_t>(bucket.count);
  422. }
  423. };
  424. private:
  425. template <
  426. typename S,
  427. typename = _t<std::enable_if<std::is_integral<S>::value>>>
  428. static constexpr _t<std::make_unsigned<S>> to_unsigned(S s) {
  429. return static_cast<_t<std::make_unsigned<S>>>(s);
  430. }
  431. template <
  432. typename S,
  433. typename = _t<std::enable_if<!std::is_integral<S>::value>>>
  434. static constexpr S to_unsigned(S s) {
  435. return s;
  436. }
  437. detail::HistogramBuckets<ValueType, Bucket> buckets_;
  438. };
  439. } // namespace folly
  440. // MSVC 2017 Update 3/4 has an issue with explicitly instantiating templated
  441. // functions with default arguments inside templated classes when compiled
  442. // with /permissive- (the default for the CMake build), so we directly include
  443. // the -defs as if it were -inl, and don't provide the explicit instantiations.
  444. // https://developercommunity.visualstudio.com/content/problem/81223/incorrect-error-c5037-with-permissive.html
  445. #if defined(_MSC_VER) && _MSC_FULL_VER >= 191125506 && \
  446. _MSC_FULL_VER <= 191125547
  447. #define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 1
  448. #else
  449. #define FOLLY_MSVC_USE_WORKAROUND_FOR_C5037 0
  450. #endif
  451. #if FOLLY_MSVC_USE_WORKAROUND_FOR_C5037
  452. #include <folly/stats/Histogram-defs.h>
  453. #endif