Select64.h 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107
  1. /*
  2. * Copyright 2015-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <array>
  18. #include <glog/logging.h>
  19. #include <folly/Portability.h>
  20. #include <folly/experimental/Instructions.h>
  21. namespace folly {
  22. namespace detail {
  23. // kSelectInByte
  24. //
  25. // Described in:
  26. // http://dsiutils.di.unimi.it/docs/it/unimi/dsi/bits/Fast.html#selectInByte
  27. //
  28. // A precomputed tabled containing the positions of the set bits in the binary
  29. // representations of all 8-bit unsigned integers.
  30. //
  31. // For i: [0, 256) ranging over all 8-bit unsigned integers and for j: [0, 8)
  32. // ranging over all 0-based bit positions in an 8-bit unsigned integer, the
  33. // table entry kSelectInByte[i][j] is the 0-based bit position of the j-th set
  34. // bit in the binary representation of i, or 8 if it has fewer than j set bits.
  35. //
  36. // Example: i: 17 (b00010001), j: [0, 8)
  37. // kSelectInByte[b00010001][0] = 0
  38. // kSelectInByte[b00010001][1] = 4
  39. // kSelectInByte[b00010001][2] = 8
  40. // ...
  41. // kSelectInByte[b00010001][7] = 8
  42. extern std::array<std::array<std::uint8_t, 256>, 8> const kSelectInByte;
  43. } // namespace detail
  44. /**
  45. * Returns the position of the k-th 1 in the 64-bit word x.
  46. * k is 0-based, so k=0 returns the position of the first 1.
  47. *
  48. * Uses the broadword selection algorithm by Vigna [1], improved by Gog
  49. * and Petri [2] and Vigna [3].
  50. *
  51. * [1] Sebastiano Vigna. Broadword Implementation of Rank/Select
  52. * Queries. WEA, 2008
  53. *
  54. * [2] Simon Gog, Matthias Petri. Optimized succinct data structures
  55. * for massive data. Softw. Pract. Exper., 2014
  56. *
  57. * [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/
  58. */
  59. template <class Instructions>
  60. inline uint64_t select64(uint64_t x, uint64_t k) {
  61. DCHECK_LT(k, Instructions::popcount(x));
  62. constexpr uint64_t kOnesStep4 = 0x1111111111111111ULL;
  63. constexpr uint64_t kOnesStep8 = 0x0101010101010101ULL;
  64. constexpr uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8;
  65. auto s = x;
  66. s = s - ((s & 0xA * kOnesStep4) >> 1);
  67. s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4);
  68. s = (s + (s >> 4)) & 0xF * kOnesStep8;
  69. uint64_t byteSums = s * kOnesStep8;
  70. uint64_t kStep8 = k * kOnesStep8;
  71. uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8);
  72. uint64_t place = Instructions::popcount(geqKStep8) * 8;
  73. uint64_t byteRank = k - (((byteSums << 8) >> place) & uint64_t(0xFF));
  74. return place + detail::kSelectInByte[byteRank][((x >> place) & 0xFF)];
  75. }
  76. template <>
  77. FOLLY_ALWAYS_INLINE uint64_t
  78. select64<compression::instructions::Haswell>(uint64_t x, uint64_t k) {
  79. #if defined(__GNUC__) || defined(__clang__)
  80. // GCC and Clang won't inline the intrinsics.
  81. uint64_t result = uint64_t(1) << k;
  82. asm("pdep %1, %0, %0\n\t"
  83. "tzcnt %0, %0"
  84. : "+r"(result)
  85. : "r"(x));
  86. return result;
  87. #else
  88. return _tzcnt_u64(_pdep_u64(1ULL << k, x));
  89. #endif
  90. }
  91. } // namespace folly