MultiLevelTimeSeries.h 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <chrono>
  18. #include <stdexcept>
  19. #include <string>
  20. #include <vector>
  21. #include <folly/String.h>
  22. #include <folly/stats/BucketedTimeSeries.h>
  23. #include <glog/logging.h>
  24. namespace folly {
  25. /*
  26. * This class represents a timeseries which keeps several levels of data
  27. * granularity (similar in principle to the loads reported by the UNIX
  28. * 'uptime' command). It uses several instances (one per level) of
  29. * BucketedTimeSeries as the underlying storage.
  30. *
  31. * This can easily be used to track sums (and thus rates or averages) over
  32. * several predetermined time periods, as well as all-time sums. For example,
  33. * you would use to it to track query rate or response speed over the last
  34. * 5, 15, 30, and 60 minutes.
  35. *
  36. * The MultiLevelTimeSeries takes a list of level durations as an input; the
  37. * durations must be strictly increasing. Furthermore a special level can be
  38. * provided with a duration of '0' -- this will be an "all-time" level. If
  39. * an all-time level is provided, it MUST be the last level present.
  40. *
  41. * The class assumes that time advances forward -- you can't retroactively add
  42. * values for events in the past -- the 'now' argument is provided for better
  43. * efficiency and ease of unittesting.
  44. *
  45. * The class is not thread-safe -- use your own synchronization!
  46. */
  47. template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
  48. class MultiLevelTimeSeries {
  49. public:
  50. using ValueType = VT;
  51. using Clock = CT;
  52. using Duration = typename Clock::duration;
  53. using TimePoint = typename Clock::time_point;
  54. using Level = folly::BucketedTimeSeries<ValueType, Clock>;
  55. /*
  56. * Create a new MultiLevelTimeSeries.
  57. *
  58. * This creates a new MultiLevelTimeSeries that tracks time series data at the
  59. * specified time durations (level). The time series data tracked at each
  60. * level is then further divided by numBuckets for memory efficiency.
  61. *
  62. * The durations must be strictly increasing. Furthermore a special level can
  63. * be provided with a duration of '0' -- this will be an "all-time" level. If
  64. * an all-time level is provided, it MUST be the last level present.
  65. */
  66. MultiLevelTimeSeries(
  67. size_t numBuckets,
  68. size_t numLevels,
  69. const Duration levelDurations[]);
  70. MultiLevelTimeSeries(
  71. size_t numBuckets,
  72. std::initializer_list<Duration> durations);
  73. /*
  74. * Return the number of buckets used to track time series at each level.
  75. */
  76. size_t numBuckets() const {
  77. // The constructor ensures that levels_ has at least one item
  78. return levels_[0].numBuckets();
  79. }
  80. /*
  81. * Return the number of levels tracked by MultiLevelTimeSeries.
  82. */
  83. size_t numLevels() const {
  84. return levels_.size();
  85. }
  86. /*
  87. * Get the BucketedTimeSeries backing the specified level.
  88. *
  89. * Note: you should generally call update() or flush() before accessing the
  90. * data. Otherwise you may be reading stale data if update() or flush() has
  91. * not been called recently.
  92. */
  93. const Level& getLevel(size_t level) const {
  94. CHECK_LT(level, levels_.size());
  95. return levels_[level];
  96. }
  97. /*
  98. * Get the highest granularity level that is still large enough to contain
  99. * data going back to the specified start time.
  100. *
  101. * Note: you should generally call update() or flush() before accessing the
  102. * data. Otherwise you may be reading stale data if update() or flush() has
  103. * not been called recently.
  104. */
  105. const Level& getLevel(TimePoint start) const {
  106. for (const auto& level : levels_) {
  107. if (level.isAllTime()) {
  108. return level;
  109. }
  110. // Note that we use duration() here rather than elapsed().
  111. // If duration is large enough to contain the start time then this level
  112. // is good enough, even if elapsed() indicates that no data was recorded
  113. // before the specified start time.
  114. if (level.getLatestTime() - level.duration() <= start) {
  115. return level;
  116. }
  117. }
  118. // We should always have an all-time level, so this is never reached.
  119. LOG(FATAL) << "No level of timeseries covers internval"
  120. << " from " << start.time_since_epoch().count() << " to now";
  121. return levels_.back();
  122. }
  123. /*
  124. * Get the BucketedTimeSeries backing the specified level.
  125. *
  126. * Note: you should generally call update() or flush() before accessing the
  127. * data. Otherwise you may be reading stale data if update() or flush() has
  128. * not been called recently.
  129. */
  130. const Level& getLevelByDuration(Duration duration) const {
  131. // since the number of levels is expected to be small (less than 5 in most
  132. // cases), a simple linear scan would be efficient and is intentionally
  133. // chosen here over other alternatives for lookup.
  134. for (const auto& level : levels_) {
  135. if (level.duration() == duration) {
  136. return level;
  137. }
  138. }
  139. throw std::out_of_range(folly::to<std::string>(
  140. "No level of duration ", duration.count(), " found"));
  141. }
  142. /*
  143. * Return the sum of all the data points currently tracked at this level.
  144. *
  145. * Note: you should generally call update() or flush() before accessing the
  146. * data. Otherwise you may be reading stale data if update() or flush() has
  147. * not been called recently.
  148. */
  149. ValueType sum(size_t level) const {
  150. return getLevel(level).sum();
  151. }
  152. /*
  153. * Return the average (sum / count) of all the data points currently tracked
  154. * at this level.
  155. *
  156. * The return type may be specified to control whether floating-point or
  157. * integer division should be performed.
  158. *
  159. * Note: you should generally call update() or flush() before accessing the
  160. * data. Otherwise you may be reading stale data if update() or flush() has
  161. * not been called recently.
  162. */
  163. template <typename ReturnType = double>
  164. ReturnType avg(size_t level) const {
  165. return getLevel(level).template avg<ReturnType>();
  166. }
  167. /*
  168. * Return the rate (sum divided by elaspsed time) of the all data points
  169. * currently tracked at this level.
  170. *
  171. * Note: you should generally call update() or flush() before accessing the
  172. * data. Otherwise you may be reading stale data if update() or flush() has
  173. * not been called recently.
  174. */
  175. template <typename ReturnType = double, typename Interval = Duration>
  176. ReturnType rate(size_t level) const {
  177. return getLevel(level).template rate<ReturnType, Interval>();
  178. }
  179. /*
  180. * Return the number of data points currently tracked at this level.
  181. *
  182. * Note: you should generally call update() or flush() before accessing the
  183. * data. Otherwise you may be reading stale data if update() or flush() has
  184. * not been called recently.
  185. */
  186. uint64_t count(size_t level) const {
  187. return getLevel(level).count();
  188. }
  189. /*
  190. * Return the count divided by the elapsed time tracked at this level.
  191. *
  192. * Note: you should generally call update() or flush() before accessing the
  193. * data. Otherwise you may be reading stale data if update() or flush() has
  194. * not been called recently.
  195. */
  196. template <typename ReturnType = double, typename Interval = Duration>
  197. ReturnType countRate(size_t level) const {
  198. return getLevel(level).template countRate<ReturnType, Interval>();
  199. }
  200. /*
  201. * Return the sum of all the data points currently tracked at this level.
  202. *
  203. * This method is identical to sum(size_t level) above but takes in the
  204. * duration that the user is interested in querying as the parameter.
  205. *
  206. * Note: you should generally call update() or flush() before accessing the
  207. * data. Otherwise you may be reading stale data if update() or flush() has
  208. * not been called recently.
  209. */
  210. ValueType sum(Duration duration) const {
  211. return getLevelByDuration(duration).sum();
  212. }
  213. /*
  214. * Return the average (sum / count) of all the data points currently tracked
  215. * at this level.
  216. *
  217. * This method is identical to avg(size_t level) above but takes in the
  218. * duration that the user is interested in querying as the parameter.
  219. *
  220. * Note: you should generally call update() or flush() before accessing the
  221. * data. Otherwise you may be reading stale data if update() or flush() has
  222. * not been called recently.
  223. */
  224. template <typename ReturnType = double>
  225. ReturnType avg(Duration duration) const {
  226. return getLevelByDuration(duration).template avg<ReturnType>();
  227. }
  228. /*
  229. * Return the rate (sum divided by elaspsed time) of the all data points
  230. * currently tracked at this level.
  231. *
  232. * This method is identical to rate(size_t level) above but takes in the
  233. * duration that the user is interested in querying as the parameter.
  234. *
  235. * Note: you should generally call update() or flush() before accessing the
  236. * data. Otherwise you may be reading stale data if update() or flush() has
  237. * not been called recently.
  238. */
  239. template <typename ReturnType = double, typename Interval = Duration>
  240. ReturnType rate(Duration duration) const {
  241. return getLevelByDuration(duration).template rate<ReturnType, Interval>();
  242. }
  243. /*
  244. * Return the number of data points currently tracked at this level.
  245. *
  246. * This method is identical to count(size_t level) above but takes in the
  247. * duration that the user is interested in querying as the parameter.
  248. *
  249. * Note: you should generally call update() or flush() before accessing the
  250. * data. Otherwise you may be reading stale data if update() or flush() has
  251. * not been called recently.
  252. */
  253. uint64_t count(Duration duration) const {
  254. return getLevelByDuration(duration).count();
  255. }
  256. /*
  257. * Return the count divided by the elapsed time tracked at this level.
  258. *
  259. * This method is identical to countRate(size_t level) above but takes in the
  260. * duration that the user is interested in querying as the parameter.
  261. *
  262. * Note: you should generally call update() or flush() before accessing the
  263. * data. Otherwise you may be reading stale data if update() or flush() has
  264. * not been called recently.
  265. */
  266. template <typename ReturnType = double, typename Interval = Duration>
  267. ReturnType countRate(Duration duration) const {
  268. return getLevelByDuration(duration)
  269. .template countRate<ReturnType, Interval>();
  270. }
  271. /*
  272. * Estimate the sum of the data points that occurred in the specified time
  273. * period at this level.
  274. *
  275. * The range queried is [start, end).
  276. * That is, start is inclusive, and end is exclusive.
  277. *
  278. * Note that data outside of the timeseries duration will no longer be
  279. * available for use in the estimation. Specifying a start time earlier than
  280. * getEarliestTime() will not have much effect, since only data points after
  281. * that point in time will be counted.
  282. *
  283. * Note that the value returned is an estimate, and may not be precise.
  284. *
  285. * Note: you should generally call update() or flush() before accessing the
  286. * data. Otherwise you may be reading stale data if update() or flush() has
  287. * not been called recently.
  288. */
  289. ValueType sum(TimePoint start, TimePoint end) const {
  290. return getLevel(start).sum(start, end);
  291. }
  292. /*
  293. * Estimate the average value during the specified time period.
  294. *
  295. * The same caveats documented in the sum(TimePoint start, TimePoint end)
  296. * comments apply here as well.
  297. *
  298. * Note: you should generally call update() or flush() before accessing the
  299. * data. Otherwise you may be reading stale data if update() or flush() has
  300. * not been called recently.
  301. */
  302. template <typename ReturnType = double>
  303. ReturnType avg(TimePoint start, TimePoint end) const {
  304. return getLevel(start).template avg<ReturnType>(start, end);
  305. }
  306. /*
  307. * Estimate the rate during the specified time period.
  308. *
  309. * The same caveats documented in the sum(TimePoint start, TimePoint end)
  310. * comments apply here as well.
  311. *
  312. * Note: you should generally call update() or flush() before accessing the
  313. * data. Otherwise you may be reading stale data if update() or flush() has
  314. * not been called recently.
  315. */
  316. template <typename ReturnType = double>
  317. ReturnType rate(TimePoint start, TimePoint end) const {
  318. return getLevel(start).template rate<ReturnType>(start, end);
  319. }
  320. /*
  321. * Estimate the count during the specified time period.
  322. *
  323. * The same caveats documented in the sum(TimePoint start, TimePoint end)
  324. * comments apply here as well.
  325. *
  326. * Note: you should generally call update() or flush() before accessing the
  327. * data. Otherwise you may be reading stale data if update() or flush() has
  328. * not been called recently.
  329. */
  330. uint64_t count(TimePoint start, TimePoint end) const {
  331. return getLevel(start).count(start, end);
  332. }
  333. /*
  334. * Adds the value 'val' at time 'now' to all levels.
  335. *
  336. * Data points added at the same time point is cached internally here and not
  337. * propagated to the underlying levels until either flush() is called or when
  338. * update from a different time comes.
  339. *
  340. * This function expects time to always move forwards: it cannot be used to
  341. * add historical data points that have occurred in the past. If now is
  342. * older than the another timestamp that has already been passed to
  343. * addValue() or update(), now will be ignored and the latest timestamp will
  344. * be used.
  345. */
  346. void addValue(TimePoint now, const ValueType& val);
  347. /*
  348. * Adds the value 'val' at time 'now' to all levels.
  349. */
  350. void addValue(TimePoint now, const ValueType& val, uint64_t times);
  351. /*
  352. * Adds the value 'total' at time 'now' to all levels as the sum of
  353. * 'nsamples' samples.
  354. */
  355. void
  356. addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
  357. /*
  358. * Update all the levels to the specified time, doing all the necessary
  359. * work to rotate the buckets and remove any stale data points.
  360. *
  361. * When reading data from the timeseries, you should make sure to manually
  362. * call update() before accessing the data. Otherwise you may be reading
  363. * stale data if update() has not been called recently.
  364. */
  365. void update(TimePoint now);
  366. /*
  367. * Reset all the timeseries to an empty state as if no data points have ever
  368. * been added to it.
  369. */
  370. void clear();
  371. /*
  372. * Flush all cached updates.
  373. */
  374. void flush();
  375. /*
  376. * Legacy APIs that accept a Duration parameters rather than TimePoint.
  377. *
  378. * These treat the Duration as relative to the clock epoch.
  379. * Prefer using the correct TimePoint-based APIs instead. These APIs will
  380. * eventually be deprecated and removed.
  381. */
  382. void update(Duration now) {
  383. update(TimePoint(now));
  384. }
  385. void addValue(Duration now, const ValueType& value) {
  386. addValue(TimePoint(now), value);
  387. }
  388. void addValue(Duration now, const ValueType& value, uint64_t times) {
  389. addValue(TimePoint(now), value, times);
  390. }
  391. void
  392. addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
  393. addValueAggregated(TimePoint(now), total, nsamples);
  394. }
  395. private:
  396. std::vector<Level> levels_;
  397. // Updates within the same time interval are cached
  398. // They are flushed out when updates from a different time comes,
  399. // or flush() is called.
  400. TimePoint cachedTime_;
  401. ValueType cachedSum_;
  402. uint64_t cachedCount_;
  403. };
  404. } // namespace folly