/* * Copyright 2013-present Facebook, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #pragma once #include #include #include namespace folly { namespace detail { template HistogramBuckets::HistogramBuckets( ValueType bucketSize, ValueType min, ValueType max, const BucketType& defaultBucket) : bucketSize_(bucketSize), min_(min), max_(max) { CHECK_GT(bucketSize_, ValueType(0)); CHECK_LT(min_, max_); // Deliberately make this a signed type, because we're about // to compare it against max-min, which is nominally signed, too. int64_t numBuckets = int64_t((max - min) / bucketSize); // Round up if the bucket size does not fit evenly if (numBuckets * bucketSize < max - min) { ++numBuckets; } // Add 2 for the extra 'below min' and 'above max' buckets numBuckets += 2; buckets_.assign(size_t(numBuckets), defaultBucket); } template size_t HistogramBuckets::getBucketIdx(ValueType value) const { if (value < min_) { return 0; } else if (value >= max_) { return buckets_.size() - 1; } else { // the 1 is the below_min bucket return size_t(((value - min_) / bucketSize_) + 1); } } template template uint64_t HistogramBuckets::computeTotalCount( CountFn countFromBucket) const { uint64_t count = 0; for (size_t n = 0; n < buckets_.size(); ++n) { count += countFromBucket(const_cast(buckets_[n])); } return count; } template template size_t HistogramBuckets::getPercentileBucketIdx( double pct, CountFn countFromBucket, double* lowPct, double* highPct) const { CHECK_GE(pct, 0.0); CHECK_LE(pct, 1.0); auto numBuckets = buckets_.size(); // Compute the counts in each bucket std::vector counts(numBuckets); uint64_t totalCount = 0; for (size_t n = 0; n < numBuckets; ++n) { uint64_t bucketCount = countFromBucket(const_cast(buckets_[n])); counts[n] = bucketCount; totalCount += bucketCount; } // If there are no elements, just return the lowest bucket. // Note that we return bucket 1, which is the first bucket in the // histogram range; bucket 0 is for all values below min_. if (totalCount == 0) { // Set lowPct and highPct both to 0. // getPercentileEstimate() will recognize this to mean that the histogram // is empty. if (lowPct) { *lowPct = 0.0; } if (highPct) { *highPct = 0.0; } return 1; } // Loop through all the buckets, keeping track of each bucket's // percentile range: [0,10], [10,17], [17,45], etc. When we find a range // that includes our desired percentile, we return that bucket index. double prevPct = 0.0; double curPct = 0.0; uint64_t curCount = 0; size_t idx; for (idx = 0; idx < numBuckets; ++idx) { if (counts[idx] == 0) { // skip empty buckets continue; } prevPct = curPct; curCount += counts[idx]; curPct = static_cast(curCount) / totalCount; if (pct <= curPct) { // This is the desired bucket break; } } if (lowPct) { *lowPct = prevPct; } if (highPct) { *highPct = curPct; } return idx; } template template T HistogramBuckets::getPercentileEstimate( double pct, CountFn countFromBucket, AvgFn avgFromBucket) const { // Find the bucket where this percentile falls double lowPct; double highPct; size_t bucketIdx = getPercentileBucketIdx(pct, countFromBucket, &lowPct, &highPct); if (lowPct == 0.0 && highPct == 0.0) { // Invalid range -- the buckets must all be empty // Return the default value for ValueType. return ValueType(); } if (lowPct == highPct) { // Unlikely to have exact equality, // but just return the bucket average in this case. // We handle this here to avoid division by 0 below. return avgFromBucket(buckets_[bucketIdx]); } CHECK_GE(pct, lowPct); CHECK_LE(pct, highPct); CHECK_LT(lowPct, highPct); // Compute information about this bucket ValueType avg = avgFromBucket(buckets_[bucketIdx]); ValueType low; ValueType high; if (bucketIdx == 0) { if (avg > min_) { // This normally shouldn't happen. This bucket is only supposed to track // values less than min_. Most likely this means that integer overflow // occurred, and the code in avgFromBucket() returned a huge value // instead of a small one. Just return the minimum possible value for // now. // // (Note that if the counter keeps being decremented, eventually it will // wrap and become small enough that we won't detect this any more, and // we will return bogus information.) LOG(ERROR) << "invalid average value in histogram minimum bucket: " << avg << " > " << min_ << ": possible integer overflow?"; return getBucketMin(bucketIdx); } // For the below-min bucket, just assume the lowest value ever seen is // twice as far away from min_ as avg. high = min_; low = high - (2 * (high - avg)); // Adjust low in case it wrapped if (low > avg) { low = std::numeric_limits::min(); } } else if (bucketIdx == buckets_.size() - 1) { if (avg < max_) { // Most likely this means integer overflow occurred. See the comments // above in the minimum case. LOG(ERROR) << "invalid average value in histogram maximum bucket: " << avg << " < " << max_ << ": possible integer overflow?"; return getBucketMax(bucketIdx); } // Similarly for the above-max bucket, assume the highest value ever seen // is twice as far away from max_ as avg. low = max_; high = low + (2 * (avg - low)); // Adjust high in case it wrapped if (high < avg) { high = std::numeric_limits::max(); } } else { low = getBucketMin(bucketIdx); high = getBucketMax(bucketIdx); if (avg < low || avg > high) { // Most likely this means an integer overflow occurred. // See the comments above. Return the midpoint between low and high // as a best guess, since avg is meaningless. LOG(ERROR) << "invalid average value in histogram bucket: " << avg << " not in range [" << low << ", " << high << "]: possible integer overflow?"; return (low + high) / 2; } } // Since we know the average value in this bucket, we can do slightly better // than just assuming the data points in this bucket are uniformly // distributed between low and high. // // Assume that the median value in this bucket is the same as the average // value. double medianPct = (lowPct + highPct) / 2.0; if (pct < medianPct) { // Assume that the data points lower than the median of this bucket // are uniformly distributed between low and avg double pctThroughSection = (pct - lowPct) / (medianPct - lowPct); return T(low + ((avg - low) * pctThroughSection)); } else { // Assume that the data points greater than the median of this bucket // are uniformly distributed between avg and high double pctThroughSection = (pct - medianPct) / (highPct - medianPct); return T(avg + ((high - avg) * pctThroughSection)); } } } // namespace detail template std::string Histogram::debugString() const { std::string ret = folly::to( "num buckets: ", buckets_.getNumBuckets(), ", bucketSize: ", buckets_.getBucketSize(), ", min: ", buckets_.getMin(), ", max: ", buckets_.getMax(), "\n"); for (size_t i = 0; i < buckets_.getNumBuckets(); ++i) { folly::toAppend( " ", buckets_.getBucketMin(i), ": ", buckets_.getByIndex(i).count, "\n", &ret); } return ret; } template void Histogram::toTSV(std::ostream& out, bool skipEmptyBuckets) const { for (size_t i = 0; i < buckets_.getNumBuckets(); ++i) { // Do not output empty buckets in order to reduce data file size. if (skipEmptyBuckets && getBucketByIndex(i).count == 0) { continue; } const auto& bucket = getBucketByIndex(i); out << getBucketMin(i) << '\t' << getBucketMax(i) << '\t' << bucket.count << '\t' << bucket.sum << '\n'; } } } // namespace folly