BitsBenchmark.cpp 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <atomic>
  17. #include <memory>
  18. #include <random>
  19. #include <glog/logging.h>
  20. #include <folly/Benchmark.h>
  21. #include <folly/experimental/Bits.h>
  22. std::random_device rd;
  23. const size_t kBufferSize = 1 << 10;
  24. std::vector<uint8_t> buffer(kBufferSize + 16);
  25. template <class T>
  26. void benchmarkSet(size_t n, T) {
  27. size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
  28. const size_t k = 16;
  29. T values[k];
  30. BENCHMARK_SUSPEND {
  31. std::mt19937 gen(rd());
  32. T max, min;
  33. if (std::is_signed<T>::value) {
  34. max = (T(1) << (size - 1)) - 1;
  35. min = -(T(1) << (size - 1));
  36. } else {
  37. max = (T(1) << size) - 1;
  38. min = 0;
  39. }
  40. CHECK_LE(folly::findLastSet(max), size);
  41. CHECK_LE(folly::findLastSet(-min), size);
  42. std::uniform_int_distribution<T> dis(min, max);
  43. for (size_t i = 0; i < k; ++i) {
  44. values[i] = dis(gen);
  45. }
  46. }
  47. for (size_t i = 0; i < n; ++i) {
  48. size_t bit = (i * 2973) % (kBufferSize * 8);
  49. size_t drop = i % size;
  50. folly::Bits<T>::set(
  51. reinterpret_cast<T*>(buffer.data()),
  52. bit,
  53. size - drop,
  54. values[i % k] >> drop);
  55. }
  56. folly::doNotOptimizeAway(
  57. folly::Bits<T>::test(reinterpret_cast<T*>(buffer.data()), 512));
  58. }
  59. BENCHMARK_NAMED_PARAM(benchmarkSet, u16, uint16_t())
  60. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i16, int16_t())
  61. BENCHMARK_NAMED_PARAM(benchmarkSet, u32, uint32_t())
  62. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i32, int32_t())
  63. BENCHMARK_NAMED_PARAM(benchmarkSet, u64, uint64_t())
  64. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkSet, i64, int64_t())
  65. BENCHMARK_DRAW_LINE();
  66. std::atomic<int64_t> sum(0);
  67. template <class T>
  68. void benchmarkGet(size_t n, T x) {
  69. size_t size = sizeof(T) * 6.9; // use 6.9 bits/byte
  70. for (size_t i = 0; i < n; ++i) {
  71. size_t bit = (i * 2973) % (kBufferSize * 8);
  72. size_t drop = i % size;
  73. x += folly::Bits<T>::get(
  74. reinterpret_cast<T*>(buffer.data()), bit, size - drop);
  75. }
  76. folly::doNotOptimizeAway(x);
  77. }
  78. BENCHMARK_NAMED_PARAM(benchmarkGet, u16, uint16_t(0))
  79. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i16, int16_t(0))
  80. BENCHMARK_NAMED_PARAM(benchmarkGet, u32, uint32_t(0))
  81. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i32, int32_t(0))
  82. BENCHMARK_NAMED_PARAM(benchmarkGet, u64, uint64_t(0))
  83. BENCHMARK_RELATIVE_NAMED_PARAM(benchmarkGet, i64, int64_t(0))
  84. #if 0
  85. ============================================================================
  86. folly/experimental/test/BitsBenchmark.cpp relative time/iter iters/s
  87. ============================================================================
  88. benchmarkSet(u16) 8.58ns 116.59M
  89. benchmarkSet(i16) 88.42% 9.70ns 103.08M
  90. benchmarkSet(u32) 8.37ns 119.45M
  91. benchmarkSet(i32) 88.23% 9.49ns 105.39M
  92. benchmarkSet(u64) 9.23ns 108.34M
  93. benchmarkSet(i64) 82.77% 11.15ns 89.68M
  94. ----------------------------------------------------------------------------
  95. benchmarkGet(u16) 6.32ns 158.13M
  96. benchmarkGet(i16) 80.40% 7.87ns 127.14M
  97. benchmarkGet(u32) 6.34ns 157.65M
  98. benchmarkGet(i32) 84.61% 7.50ns 133.39M
  99. benchmarkGet(u64) 7.32ns 136.58M
  100. benchmarkGet(i64) 85.78% 8.53ns 117.16M
  101. ============================================================================
  102. #endif
  103. int main(int argc, char* argv[]) {
  104. gflags::ParseCommandLineFlags(&argc, &argv, true);
  105. folly::runBenchmarks();
  106. return sum.load();
  107. }