FingerprintBenchmark.cpp 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149
  1. /*
  2. * Copyright 2015-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <random>
  17. #include <folly/Benchmark.h>
  18. #include <folly/Fingerprint.h>
  19. #include <folly/Format.h>
  20. #include <folly/detail/SlowFingerprint.h>
  21. using namespace std;
  22. using namespace folly;
  23. using folly::detail::SlowFingerprint;
  24. namespace {
  25. constexpr int kMaxIds = 64 << 10; // 64Ki
  26. constexpr int kMaxTerms = 64 << 10;
  27. // Globals are generally bad, but this is a benchmark, so there.
  28. uint64_t ids[kMaxIds];
  29. std::string terms[kMaxTerms];
  30. void initialize() {
  31. std::mt19937 rng;
  32. for (int i = 0; i < kMaxIds; i++) {
  33. ids[i] = (((uint64_t)rng()) << 32) | rng();
  34. }
  35. // Use randomly generated words. These numbers are out of my hat and
  36. // probably wrong.
  37. // word length = uniformly distributed between 1 and 10
  38. // charset = 0x20 - 0x7f
  39. std::uniform_int_distribution<size_t> term_len(1, 10);
  40. std::uniform_int_distribution<uint16_t> term_char(0x20, 0x7f);
  41. for (int i = 0; i < kMaxTerms; i++) {
  42. std::string& term = terms[i];
  43. int len = term_len(rng);
  44. term.reserve(len);
  45. for (int j = 0; j < len; j++) {
  46. term.append(1, (char)term_char(rng));
  47. }
  48. }
  49. }
  50. template <class FP>
  51. void fingerprintIds(int num_iterations, int num_ids) {
  52. for (int iter = 0; iter < num_iterations; iter++) {
  53. FP fp;
  54. for (int i = 0; i < num_ids; i++) {
  55. fp.update64(ids[i]);
  56. }
  57. // GOTCHA: if we don't actually call write(), compiler optimizes
  58. // away the inner loop!
  59. uint64_t out;
  60. fp.write(&out);
  61. VLOG(1) << out;
  62. }
  63. }
  64. template <class FP>
  65. void fingerprintTerms(int num_iterations, int num_terms) {
  66. for (int iter = 0; iter < num_iterations; iter++) {
  67. FP fp;
  68. for (int i = 0; i < num_terms; i++) {
  69. fp.update(terms[i]);
  70. }
  71. // GOTCHA: if we don't actually call write(), compiler optimizes
  72. // away the inner loop!
  73. uint64_t out;
  74. fp.write(&out);
  75. VLOG(1) << out;
  76. }
  77. }
  78. void fastFingerprintIds64(int num_iterations, int num_ids) {
  79. fingerprintIds<Fingerprint<64>>(num_iterations, num_ids);
  80. }
  81. void slowFingerprintIds64(int num_iterations, int num_ids) {
  82. fingerprintIds<SlowFingerprint<64>>(num_iterations, num_ids);
  83. }
  84. void fastFingerprintIds96(int num_iterations, int num_ids) {
  85. fingerprintIds<Fingerprint<96>>(num_iterations, num_ids);
  86. }
  87. void fastFingerprintIds128(int num_iterations, int num_ids) {
  88. fingerprintIds<Fingerprint<128>>(num_iterations, num_ids);
  89. }
  90. void fastFingerprintTerms64(int num_iterations, int num_ids) {
  91. fingerprintTerms<Fingerprint<64>>(num_iterations, num_ids);
  92. }
  93. void slowFingerprintTerms64(int num_iterations, int num_ids) {
  94. fingerprintTerms<SlowFingerprint<64>>(num_iterations, num_ids);
  95. }
  96. void fastFingerprintTerms96(int num_iterations, int num_ids) {
  97. fingerprintTerms<Fingerprint<96>>(num_iterations, num_ids);
  98. }
  99. void fastFingerprintTerms128(int num_iterations, int num_ids) {
  100. fingerprintTerms<Fingerprint<128>>(num_iterations, num_ids);
  101. }
  102. } // namespace
  103. // Only benchmark one size of slowFingerprint; it's significantly slower
  104. // than fastFingeprint (as you can see for 64 bits) and it just slows down
  105. // the benchmark without providing any useful data.
  106. int main(int argc, char** argv) {
  107. gflags::ParseCommandLineFlags(&argc, &argv, true);
  108. #define BM(name, min, max) \
  109. for (size_t i = min; i <= max; i *= 2) { \
  110. addBenchmark( \
  111. __FILE__, sformat("{}_{}", #name, i).c_str(), [=](int iters) { \
  112. name(iters, i); \
  113. return iters; \
  114. }); \
  115. }
  116. BM(fastFingerprintIds64, 1, kMaxIds)
  117. BM(slowFingerprintIds64, 1, kMaxIds)
  118. BM(fastFingerprintIds96, 1, kMaxIds)
  119. BM(fastFingerprintIds128, 1, kMaxIds)
  120. BM(fastFingerprintTerms64, 1, kMaxTerms)
  121. BM(slowFingerprintTerms64, 1, kMaxTerms)
  122. BM(fastFingerprintTerms96, 1, kMaxTerms)
  123. BM(fastFingerprintTerms128, 1, kMaxTerms)
  124. #undef BM
  125. initialize();
  126. runBenchmarks();
  127. return 0;
  128. }