TimeseriesHistogram.h 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <folly/stats/Histogram.h>
  18. #include <folly/stats/MultiLevelTimeSeries.h>
  19. #include <string>
  20. namespace folly {
  21. /*
  22. * TimeseriesHistogram tracks data distributions as they change over time.
  23. *
  24. * Specifically, it is a bucketed histogram with different value ranges assigned
  25. * to each bucket. Within each bucket is a MultiLevelTimeSeries from
  26. * 'folly/stats/MultiLevelTimeSeries.h'. This means that each bucket contains a
  27. * different set of data for different historical time periods, and one can
  28. * query data distributions over different trailing time windows.
  29. *
  30. * For example, this can answer questions: "What is the data distribution over
  31. * the last minute? Over the last 10 minutes? Since I last cleared this
  32. * histogram?"
  33. *
  34. * The class can also estimate percentiles and answer questions like: "What was
  35. * the 99th percentile data value over the last 10 minutes?"
  36. *
  37. * Note: that depending on the size of your buckets and the smoothness
  38. * of your data distribution, the estimate may be way off from the actual
  39. * value. In particular, if the given percentile falls outside of the bucket
  40. * range (i.e. your buckets range in 0 - 100,000 but the 99th percentile is
  41. * around 115,000) this estimate may be very wrong.
  42. *
  43. * The memory usage for a typical histogram is roughly 3k * (# of buckets). All
  44. * insertion operations are amortized O(1), and all queries are O(# of buckets).
  45. */
  46. template <
  47. class T,
  48. class CT = LegacyStatsClock<std::chrono::seconds>,
  49. class C = folly::MultiLevelTimeSeries<T, CT>>
  50. class TimeseriesHistogram {
  51. private:
  52. // NOTE: T must be equivalent to _signed_ numeric type for our math.
  53. static_assert(std::numeric_limits<T>::is_signed, "");
  54. public:
  55. // Values to be inserted into container
  56. using ValueType = T;
  57. // The container type we use internally for each bucket
  58. using ContainerType = C;
  59. // Clock, duration, and time point types
  60. using Clock = CT;
  61. using Duration = typename Clock::duration;
  62. using TimePoint = typename Clock::time_point;
  63. /*
  64. * Create a TimeSeries histogram and initialize the bucketing and levels.
  65. *
  66. * The buckets are created by chopping the range [min, max) into pieces
  67. * of size bucketSize, with the last bucket being potentially shorter. Two
  68. * additional buckets are always created -- the "under" bucket for the range
  69. * (-inf, min) and the "over" bucket for the range [max, +inf).
  70. *
  71. * @param bucketSize the width of each bucket
  72. * @param min the smallest value for the bucket range.
  73. * @param max the largest value for the bucket range
  74. * @param defaultContainer a pre-initialized timeseries with the desired
  75. * number of levels and their durations.
  76. */
  77. TimeseriesHistogram(
  78. ValueType bucketSize,
  79. ValueType min,
  80. ValueType max,
  81. const ContainerType& defaultContainer);
  82. /* Return the bucket size of each bucket in the histogram. */
  83. ValueType getBucketSize() const {
  84. return buckets_.getBucketSize();
  85. }
  86. /* Return the min value at which bucketing begins. */
  87. ValueType getMin() const {
  88. return buckets_.getMin();
  89. }
  90. /* Return the max value at which bucketing ends. */
  91. ValueType getMax() const {
  92. return buckets_.getMax();
  93. }
  94. /* Return the number of levels of the Timeseries object in each bucket */
  95. size_t getNumLevels() const {
  96. return buckets_.getByIndex(0).numLevels();
  97. }
  98. /* Return the number of buckets */
  99. size_t getNumBuckets() const {
  100. return buckets_.getNumBuckets();
  101. }
  102. /*
  103. * Return the threshold of the bucket for the given index in range
  104. * [0..numBuckets). The bucket will have range [thresh, thresh + bucketSize)
  105. * or [thresh, max), whichever is shorter.
  106. */
  107. ValueType getBucketMin(size_t bucketIdx) const {
  108. return buckets_.getBucketMin(bucketIdx);
  109. }
  110. /* Return the actual timeseries in the given bucket (for reading only!) */
  111. const ContainerType& getBucket(size_t bucketIdx) const {
  112. return buckets_.getByIndex(bucketIdx);
  113. }
  114. /* Total count of values at the given timeseries level (all buckets). */
  115. uint64_t count(size_t level) const {
  116. uint64_t total = 0;
  117. for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
  118. total += buckets_.getByIndex(b).count(level);
  119. }
  120. return total;
  121. }
  122. /* Total count of values added during the given interval (all buckets). */
  123. uint64_t count(TimePoint start, TimePoint end) const {
  124. uint64_t total = 0;
  125. for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
  126. total += buckets_.getByIndex(b).count(start, end);
  127. }
  128. return total;
  129. }
  130. /* Total sum of values at the given timeseries level (all buckets). */
  131. ValueType sum(size_t level) const {
  132. ValueType total = ValueType();
  133. for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
  134. total += buckets_.getByIndex(b).sum(level);
  135. }
  136. return total;
  137. }
  138. /* Total sum of values added during the given interval (all buckets). */
  139. ValueType sum(TimePoint start, TimePoint end) const {
  140. ValueType total = ValueType();
  141. for (size_t b = 0; b < buckets_.getNumBuckets(); ++b) {
  142. total += buckets_.getByIndex(b).sum(start, end);
  143. }
  144. return total;
  145. }
  146. /* Average of values at the given timeseries level (all buckets). */
  147. template <typename ReturnType = double>
  148. ReturnType avg(size_t level) const {
  149. auto total = ValueType();
  150. uint64_t nsamples = 0;
  151. computeAvgData(&total, &nsamples, level);
  152. return folly::detail::avgHelper<ReturnType>(total, nsamples);
  153. }
  154. /* Average of values added during the given interval (all buckets). */
  155. template <typename ReturnType = double>
  156. ReturnType avg(TimePoint start, TimePoint end) const {
  157. auto total = ValueType();
  158. uint64_t nsamples = 0;
  159. computeAvgData(&total, &nsamples, start, end);
  160. return folly::detail::avgHelper<ReturnType>(total, nsamples);
  161. }
  162. /*
  163. * Rate at the given timeseries level (all buckets).
  164. * This is the sum of all values divided by the time interval (in seconds).
  165. */
  166. template <typename ReturnType = double>
  167. ReturnType rate(size_t level) const {
  168. auto total = ValueType();
  169. Duration elapsed(0);
  170. computeRateData(&total, &elapsed, level);
  171. return folly::detail::rateHelper<ReturnType, Duration, Duration>(
  172. total, elapsed);
  173. }
  174. /*
  175. * Rate for the given interval (all buckets).
  176. * This is the sum of all values divided by the time interval (in seconds).
  177. */
  178. template <typename ReturnType = double>
  179. ReturnType rate(TimePoint start, TimePoint end) const {
  180. auto total = ValueType();
  181. Duration elapsed(0);
  182. computeRateData(&total, &elapsed, start, end);
  183. return folly::detail::rateHelper<ReturnType, Duration, Duration>(
  184. total, elapsed);
  185. }
  186. /*
  187. * Update every underlying timeseries object with the given timestamp. You
  188. * must call this directly before querying to ensure that the data in all
  189. * buckets is decayed properly.
  190. */
  191. void update(TimePoint now);
  192. /* clear all the data from the histogram. */
  193. void clear();
  194. /* Add a value into the histogram with timestamp 'now' */
  195. void addValue(TimePoint now, const ValueType& value);
  196. /* Add a value the given number of times with timestamp 'now' */
  197. void addValue(TimePoint now, const ValueType& value, uint64_t times);
  198. /*
  199. * Add all of the values from the specified histogram.
  200. *
  201. * All of the values will be added to the current time-slot.
  202. *
  203. * One use of this is for thread-local caching of frequently updated
  204. * histogram data. For example, each thread can store a thread-local
  205. * Histogram that is updated frequently, and only add it to the global
  206. * TimeseriesHistogram once a second.
  207. */
  208. void addValues(TimePoint now, const folly::Histogram<ValueType>& values);
  209. /*
  210. * Return an estimate of the value at the given percentile in the histogram
  211. * in the given timeseries level. The percentile is estimated as follows:
  212. *
  213. * - We retrieve a count of the values in each bucket (at the given level)
  214. * - We determine via the counts which bucket the given percentile falls in.
  215. * - We assume the average value in the bucket is also its median
  216. * - We then linearly interpolate within the bucket, by assuming that the
  217. * distribution is uniform in the two value ranges [left, median) and
  218. * [median, right) where [left, right) is the bucket value range.
  219. *
  220. * Caveats:
  221. * - If the histogram is empty, this always returns ValueType(), usually 0.
  222. * - For the 'under' and 'over' special buckets, their range is unbounded
  223. * on one side. In order for the interpolation to work, we assume that
  224. * the average value in the bucket is equidistant from the two edges of
  225. * the bucket. In other words, we assume that the distance between the
  226. * average and the known bound is equal to the distance between the average
  227. * and the unknown bound.
  228. */
  229. ValueType getPercentileEstimate(double pct, size_t level) const;
  230. /*
  231. * Return an estimate of the value at the given percentile in the histogram
  232. * in the given historical interval. Please see the documentation for
  233. * getPercentileEstimate(double pct, size_t level) for the explanation of the
  234. * estimation algorithm.
  235. */
  236. ValueType getPercentileEstimate(double pct, TimePoint start, TimePoint end)
  237. const;
  238. /*
  239. * Return the bucket index that the given percentile falls into (in the
  240. * given timeseries level). This index can then be used to retrieve either
  241. * the bucket threshold, or other data from inside the bucket.
  242. */
  243. size_t getPercentileBucketIdx(double pct, size_t level) const;
  244. /*
  245. * Return the bucket index that the given percentile falls into (in the
  246. * given historical interval). This index can then be used to retrieve either
  247. * the bucket threshold, or other data from inside the bucket.
  248. */
  249. size_t getPercentileBucketIdx(double pct, TimePoint start, TimePoint end)
  250. const;
  251. /* Get the bucket threshold for the bucket containing the given pct. */
  252. ValueType getPercentileBucketMin(double pct, size_t level) const {
  253. return getBucketMin(getPercentileBucketIdx(pct, level));
  254. }
  255. /* Get the bucket threshold for the bucket containing the given pct. */
  256. ValueType getPercentileBucketMin(double pct, TimePoint start, TimePoint end)
  257. const {
  258. return getBucketMin(getPercentileBucketIdx(pct, start, end));
  259. }
  260. /*
  261. * Print out serialized data from all buckets at the given level.
  262. * Format is: BUCKET [',' BUCKET ...]
  263. * Where: BUCKET == bucketMin ':' count ':' avg
  264. */
  265. std::string getString(size_t level) const;
  266. /*
  267. * Print out serialized data for all buckets in the historical interval.
  268. * For format, please see getString(size_t level).
  269. */
  270. std::string getString(TimePoint start, TimePoint end) const;
  271. /*
  272. * Legacy APIs that accept a Duration parameters rather than TimePoint.
  273. *
  274. * These treat the Duration as relative to the clock epoch.
  275. * Prefer using the correct TimePoint-based APIs instead. These APIs will
  276. * eventually be deprecated and removed.
  277. */
  278. void update(Duration now) {
  279. update(TimePoint(now));
  280. }
  281. void addValue(Duration now, const ValueType& value) {
  282. addValue(TimePoint(now), value);
  283. }
  284. void addValue(Duration now, const ValueType& value, uint64_t times) {
  285. addValue(TimePoint(now), value, times);
  286. }
  287. void addValues(Duration now, const folly::Histogram<ValueType>& values) {
  288. addValues(TimePoint(now), values);
  289. }
  290. private:
  291. typedef ContainerType Bucket;
  292. struct CountFromLevel {
  293. explicit CountFromLevel(size_t level) : level_(level) {}
  294. uint64_t operator()(const ContainerType& bucket) const {
  295. return bucket.count(level_);
  296. }
  297. private:
  298. size_t level_;
  299. };
  300. struct CountFromInterval {
  301. explicit CountFromInterval(TimePoint start, TimePoint end)
  302. : start_(start), end_(end) {}
  303. uint64_t operator()(const ContainerType& bucket) const {
  304. return bucket.count(start_, end_);
  305. }
  306. private:
  307. TimePoint start_;
  308. TimePoint end_;
  309. };
  310. struct AvgFromLevel {
  311. explicit AvgFromLevel(size_t level) : level_(level) {}
  312. ValueType operator()(const ContainerType& bucket) const {
  313. return bucket.template avg<ValueType>(level_);
  314. }
  315. private:
  316. size_t level_;
  317. };
  318. template <typename ReturnType>
  319. struct AvgFromInterval {
  320. explicit AvgFromInterval(TimePoint start, TimePoint end)
  321. : start_(start), end_(end) {}
  322. ReturnType operator()(const ContainerType& bucket) const {
  323. return bucket.template avg<ReturnType>(start_, end_);
  324. }
  325. private:
  326. TimePoint start_;
  327. TimePoint end_;
  328. };
  329. /*
  330. * Special logic for the case of only one unique value registered
  331. * (this can happen when clients don't pick good bucket ranges or have
  332. * other bugs). It's a lot easier for clients to track down these issues
  333. * if they are getting the correct value.
  334. */
  335. void maybeHandleSingleUniqueValue(const ValueType& value);
  336. void computeAvgData(ValueType* total, uint64_t* nsamples, size_t level) const;
  337. void computeAvgData(
  338. ValueType* total,
  339. uint64_t* nsamples,
  340. TimePoint start,
  341. TimePoint end) const;
  342. void computeRateData(ValueType* total, Duration* elapsed, size_t level) const;
  343. void computeRateData(
  344. ValueType* total,
  345. Duration* elapsed,
  346. TimePoint start,
  347. TimePoint end) const;
  348. folly::detail::HistogramBuckets<ValueType, ContainerType> buckets_;
  349. bool haveNotSeenValue_;
  350. bool singleUniqueValue_;
  351. ValueType firstValue_;
  352. };
  353. } // namespace folly