F14Table.h 74 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430
  1. /*
  2. * Copyright 2017-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <cstddef>
  18. #include <cstdint>
  19. #include <cstring>
  20. #include <array>
  21. #include <iterator>
  22. #include <limits>
  23. #include <memory>
  24. #include <new>
  25. #include <type_traits>
  26. #include <utility>
  27. #include <vector>
  28. #include <folly/Bits.h>
  29. #include <folly/ConstexprMath.h>
  30. #include <folly/Likely.h>
  31. #include <folly/Portability.h>
  32. #include <folly/ScopeGuard.h>
  33. #include <folly/Traits.h>
  34. #include <folly/functional/ApplyTuple.h>
  35. #include <folly/functional/Invoke.h>
  36. #include <folly/lang/Align.h>
  37. #include <folly/lang/Assume.h>
  38. #include <folly/lang/Exception.h>
  39. #include <folly/lang/Launder.h>
  40. #include <folly/lang/SafeAssert.h>
  41. #include <folly/portability/Builtins.h>
  42. #include <folly/container/detail/F14Defaults.h>
  43. #include <folly/container/detail/F14IntrinsicsAvailability.h>
  44. #if FOLLY_ASAN_ENABLED && defined(FOLLY_TLS)
  45. #define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
  46. #else
  47. #define FOLLY_F14_TLS_IF_ASAN
  48. #endif
  49. #if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
  50. #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
  51. #if FOLLY_NEON
  52. #include <arm_acle.h> // __crc32cd
  53. #else
  54. #include <nmmintrin.h> // _mm_crc32_u64
  55. #endif
  56. #else
  57. #ifdef _WIN32
  58. #include <intrin.h> // _mul128 in fallback bit mixer
  59. #endif
  60. #endif
  61. #if FOLLY_NEON
  62. #include <arm_neon.h> // uint8x16t intrinsics
  63. #else // SSE2
  64. #include <immintrin.h> // __m128i intrinsics
  65. #include <xmmintrin.h> // _mm_prefetch
  66. #endif
  67. #endif
  68. namespace folly {
  69. struct F14TableStats {
  70. char const* policy;
  71. std::size_t size{0};
  72. std::size_t valueSize{0};
  73. std::size_t bucketCount{0};
  74. std::size_t chunkCount{0};
  75. std::vector<std::size_t> chunkOccupancyHisto;
  76. std::vector<std::size_t> chunkOutboundOverflowHisto;
  77. std::vector<std::size_t> chunkHostedOverflowHisto;
  78. std::vector<std::size_t> keyProbeLengthHisto;
  79. std::vector<std::size_t> missProbeLengthHisto;
  80. std::size_t totalBytes{0};
  81. std::size_t overheadBytes{0};
  82. private:
  83. template <typename T>
  84. static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
  85. return m->computeStats();
  86. }
  87. static F14TableStats computeHelper(...) {
  88. return {};
  89. }
  90. public:
  91. template <typename T>
  92. static F14TableStats compute(T const& m) {
  93. return computeHelper(&m);
  94. }
  95. };
  96. namespace f14 {
  97. namespace detail {
  98. template <F14IntrinsicsMode>
  99. struct F14LinkCheck {};
  100. template <>
  101. struct F14LinkCheck<getF14IntrinsicsMode()> {
  102. // The purpose of this method is to trigger a link failure if
  103. // compilation flags vary across compilation units. The definition
  104. // is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
  105. // F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
  106. // be available at link time.
  107. //
  108. // To cause a link failure the function must be invoked in code that
  109. // is not optimized away, so we call it on a couple of cold paths
  110. // (exception handling paths in copy construction and rehash). LTO may
  111. // remove it entirely, but that's fine.
  112. static void check() noexcept;
  113. };
  114. #if defined(_LIBCPP_VERSION)
  115. template <typename K, typename V, typename H>
  116. struct StdNodeReplica {
  117. void* next;
  118. std::size_t hash;
  119. V value;
  120. };
  121. #else
  122. template <typename H>
  123. struct StdIsFastHash : std::true_type {};
  124. template <>
  125. struct StdIsFastHash<std::hash<long double>> : std::false_type {};
  126. template <typename... Args>
  127. struct StdIsFastHash<std::hash<std::basic_string<Args...>>> : std::false_type {
  128. };
  129. // TODO: add specialization for std::basic_string_view
  130. // mimic internal node of unordered containers in STL to estimate the size
  131. template <typename K, typename V, typename H, typename Enable = void>
  132. struct StdNodeReplica {
  133. void* next;
  134. V value;
  135. };
  136. template <typename K, typename V, typename H>
  137. struct StdNodeReplica<
  138. K,
  139. V,
  140. H,
  141. std::enable_if_t<
  142. !StdIsFastHash<H>::value || !is_nothrow_invocable<H, K>::value>> {
  143. void* next;
  144. V value;
  145. std::size_t hash;
  146. };
  147. #endif
  148. } // namespace detail
  149. } // namespace f14
  150. #if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
  151. namespace f14 {
  152. namespace detail {
  153. template <typename Policy>
  154. class F14Table;
  155. } // namespace detail
  156. } // namespace f14
  157. class F14HashToken final {
  158. public:
  159. F14HashToken() = default;
  160. private:
  161. using HashPair = std::pair<std::size_t, std::size_t>;
  162. explicit F14HashToken(HashPair hp) : hp_(hp) {}
  163. explicit operator HashPair() const {
  164. return hp_;
  165. }
  166. HashPair hp_;
  167. template <typename Policy>
  168. friend class f14::detail::F14Table;
  169. };
  170. namespace f14 {
  171. namespace detail {
  172. //// Defaults should be selected using void
  173. template <typename Arg, typename Default>
  174. using VoidDefault =
  175. std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
  176. template <typename Arg, typename Default>
  177. using Defaulted =
  178. typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
  179. template <
  180. typename TableKey,
  181. typename Hasher,
  182. typename KeyEqual,
  183. typename ArgKey,
  184. typename Void = void>
  185. struct EligibleForHeterogeneousFind : std::false_type {};
  186. template <
  187. typename TableKey,
  188. typename Hasher,
  189. typename KeyEqual,
  190. typename ArgKey>
  191. struct EligibleForHeterogeneousFind<
  192. TableKey,
  193. Hasher,
  194. KeyEqual,
  195. ArgKey,
  196. void_t<typename Hasher::is_transparent, typename KeyEqual::is_transparent>>
  197. : std::true_type {};
  198. template <
  199. typename TableKey,
  200. typename Hasher,
  201. typename KeyEqual,
  202. typename ArgKey>
  203. using EligibleForHeterogeneousInsert = Conjunction<
  204. EligibleForHeterogeneousFind<TableKey, Hasher, KeyEqual, ArgKey>,
  205. std::is_constructible<TableKey, ArgKey>>;
  206. template <
  207. typename TableKey,
  208. typename Hasher,
  209. typename KeyEqual,
  210. typename KeyArg0OrBool,
  211. typename... KeyArgs>
  212. using KeyTypeForEmplaceHelper = std::conditional_t<
  213. sizeof...(KeyArgs) == 1 &&
  214. (std::is_same<remove_cvref_t<KeyArg0OrBool>, TableKey>::value ||
  215. EligibleForHeterogeneousFind<
  216. TableKey,
  217. Hasher,
  218. KeyEqual,
  219. KeyArg0OrBool>::value),
  220. KeyArg0OrBool&&,
  221. TableKey>;
  222. template <
  223. typename TableKey,
  224. typename Hasher,
  225. typename KeyEqual,
  226. typename... KeyArgs>
  227. using KeyTypeForEmplace = KeyTypeForEmplaceHelper<
  228. TableKey,
  229. Hasher,
  230. KeyEqual,
  231. std::tuple_element_t<0, std::tuple<KeyArgs..., bool>>,
  232. KeyArgs...>;
  233. ////////////////
  234. template <typename T>
  235. FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
  236. #ifndef _WIN32
  237. __builtin_prefetch(static_cast<void const*>(ptr));
  238. #elif FOLLY_NEON
  239. __prefetch(static_cast<void const*>(ptr));
  240. #else
  241. _mm_prefetch(
  242. static_cast<char const*>(static_cast<void const*>(ptr)), _MM_HINT_T0);
  243. #endif
  244. }
  245. template <typename T>
  246. FOLLY_ALWAYS_INLINE static unsigned findFirstSetNonZero(T mask) {
  247. assume(mask != 0);
  248. if (sizeof(mask) == sizeof(unsigned)) {
  249. return __builtin_ctz(static_cast<unsigned>(mask));
  250. } else {
  251. return __builtin_ctzll(mask);
  252. }
  253. }
  254. #if FOLLY_NEON
  255. using TagVector = uint8x16_t;
  256. using MaskType = uint64_t;
  257. constexpr unsigned kMaskSpacing = 4;
  258. #else // SSE2
  259. using TagVector = __m128i;
  260. using MaskType = uint32_t;
  261. constexpr unsigned kMaskSpacing = 1;
  262. #endif
  263. // We could use unaligned loads to relax this requirement, but that
  264. // would be both a performance penalty and require a bulkier packed
  265. // ItemIter format
  266. constexpr std::size_t kRequiredVectorAlignment =
  267. constexpr_max(std::size_t{16}, alignof(max_align_t));
  268. using EmptyTagVectorType = std::aligned_storage_t<
  269. sizeof(TagVector) + kRequiredVectorAlignment,
  270. alignof(max_align_t)>;
  271. extern EmptyTagVectorType kEmptyTagVector;
  272. extern FOLLY_F14_TLS_IF_ASAN std::size_t asanPendingSafeInserts;
  273. extern FOLLY_F14_TLS_IF_ASAN std::size_t asanRehashState;
  274. template <unsigned BitCount>
  275. struct FullMask {
  276. static constexpr MaskType value =
  277. (FullMask<BitCount - 1>::value << kMaskSpacing) + 1;
  278. };
  279. template <>
  280. struct FullMask<1> : std::integral_constant<MaskType, 1> {};
  281. #if FOLLY_ARM
  282. // Mask iteration is different for ARM because that is the only platform
  283. // for which the mask is bigger than a register.
  284. // Iterates a mask, optimized for the case that only a few bits are set
  285. class SparseMaskIter {
  286. static_assert(kMaskSpacing == 4, "");
  287. uint32_t interleavedMask_;
  288. public:
  289. explicit SparseMaskIter(MaskType mask)
  290. : interleavedMask_{static_cast<uint32_t>(((mask >> 32) << 2) | mask)} {}
  291. bool hasNext() {
  292. return interleavedMask_ != 0;
  293. }
  294. unsigned next() {
  295. FOLLY_SAFE_DCHECK(hasNext(), "");
  296. unsigned i = findFirstSetNonZero(interleavedMask_);
  297. interleavedMask_ &= (interleavedMask_ - 1);
  298. return ((i >> 2) | (i << 2)) & 0xf;
  299. }
  300. };
  301. // Iterates a mask, optimized for the case that most bits are set
  302. class DenseMaskIter {
  303. static_assert(kMaskSpacing == 4, "");
  304. std::size_t count_;
  305. unsigned index_;
  306. uint8_t const* tags_;
  307. public:
  308. explicit DenseMaskIter(uint8_t const* tags, MaskType mask) {
  309. if (mask == 0) {
  310. count_ = 0;
  311. } else {
  312. count_ = popcount(static_cast<uint32_t>(((mask >> 32) << 2) | mask));
  313. if (LIKELY((mask & 1) != 0)) {
  314. index_ = 0;
  315. } else {
  316. index_ = findFirstSetNonZero(mask) / kMaskSpacing;
  317. }
  318. tags_ = tags;
  319. }
  320. }
  321. bool hasNext() {
  322. return count_ > 0;
  323. }
  324. unsigned next() {
  325. auto rv = index_;
  326. --count_;
  327. if (count_ > 0) {
  328. do {
  329. ++index_;
  330. } while ((tags_[index_] & 0x80) == 0);
  331. }
  332. FOLLY_SAFE_DCHECK(index_ < 16, "");
  333. return rv;
  334. }
  335. };
  336. #else
  337. // Iterates a mask, optimized for the case that only a few bits are set
  338. class SparseMaskIter {
  339. MaskType mask_;
  340. public:
  341. explicit SparseMaskIter(MaskType mask) : mask_{mask} {}
  342. bool hasNext() {
  343. return mask_ != 0;
  344. }
  345. unsigned next() {
  346. FOLLY_SAFE_DCHECK(hasNext(), "");
  347. unsigned i = findFirstSetNonZero(mask_);
  348. mask_ &= (mask_ - 1);
  349. return i / kMaskSpacing;
  350. }
  351. };
  352. // Iterates a mask, optimized for the case that most bits are set
  353. class DenseMaskIter {
  354. MaskType mask_;
  355. unsigned index_{0};
  356. public:
  357. explicit DenseMaskIter(uint8_t const*, MaskType mask) : mask_{mask} {}
  358. bool hasNext() {
  359. return mask_ != 0;
  360. }
  361. unsigned next() {
  362. FOLLY_SAFE_DCHECK(hasNext(), "");
  363. if (LIKELY((mask_ & 1) != 0)) {
  364. mask_ >>= kMaskSpacing;
  365. return index_++;
  366. } else {
  367. unsigned s = findFirstSetNonZero(mask_);
  368. unsigned rv = index_ + (s / kMaskSpacing);
  369. mask_ >>= (s + kMaskSpacing);
  370. index_ = rv + 1;
  371. return rv;
  372. }
  373. }
  374. };
  375. #endif
  376. // Iterates a mask, returning pairs of [begin,end) index covering blocks
  377. // of set bits
  378. class MaskRangeIter {
  379. MaskType mask_;
  380. unsigned shift_{0};
  381. public:
  382. explicit MaskRangeIter(MaskType mask) {
  383. // If kMaskSpacing is > 1 then there will be empty bits even for
  384. // contiguous ranges. Fill them in.
  385. mask_ = mask * ((1 << kMaskSpacing) - 1);
  386. }
  387. bool hasNext() {
  388. return mask_ != 0;
  389. }
  390. std::pair<unsigned, unsigned> next() {
  391. FOLLY_SAFE_DCHECK(hasNext(), "");
  392. auto s = shift_;
  393. unsigned b = findFirstSetNonZero(mask_);
  394. unsigned e = findFirstSetNonZero(~(mask_ | (mask_ - 1)));
  395. mask_ >>= e;
  396. shift_ = s + e;
  397. return std::make_pair((s + b) / kMaskSpacing, (s + e) / kMaskSpacing);
  398. }
  399. };
  400. // Holds the result of an index query that has an optional result,
  401. // interpreting a mask of 0 to be the empty answer and the index of the
  402. // last set bit to be the non-empty answer
  403. class LastOccupiedInMask {
  404. MaskType mask_;
  405. public:
  406. explicit LastOccupiedInMask(MaskType mask) : mask_{mask} {}
  407. bool hasIndex() const {
  408. return mask_ != 0;
  409. }
  410. unsigned index() const {
  411. assume(mask_ != 0);
  412. return (findLastSet(mask_) - 1) / kMaskSpacing;
  413. }
  414. };
  415. // Holds the result of an index query that has an optional result,
  416. // interpreting a mask of 0 to be the empty answer and the index of the
  417. // first set bit to be the non-empty answer
  418. class FirstEmptyInMask {
  419. MaskType mask_;
  420. public:
  421. explicit FirstEmptyInMask(MaskType mask) : mask_{mask} {}
  422. bool hasIndex() const {
  423. return mask_ != 0;
  424. }
  425. unsigned index() const {
  426. FOLLY_SAFE_DCHECK(mask_ != 0, "");
  427. return findFirstSetNonZero(mask_) / kMaskSpacing;
  428. }
  429. };
  430. template <typename ItemType>
  431. struct alignas(kRequiredVectorAlignment) F14Chunk {
  432. using Item = ItemType;
  433. // For our 16 byte vector alignment (and assuming alignof(Item) >=
  434. // 4) kCapacity of 14 is the most space efficient. Slightly smaller
  435. // or larger capacities can help with cache alignment in a couple of
  436. // cases without wasting too much space, but once the items are larger
  437. // then we're unlikely to get much benefit anyway. The only case we
  438. // optimize is using kCapacity of 12 for 4 byte items, which makes the
  439. // chunk take exactly 1 cache line, and adding 16 bytes of padding for
  440. // 16 byte items so that a chunk takes exactly 4 cache lines.
  441. static constexpr unsigned kCapacity = sizeof(Item) == 4 ? 12 : 14;
  442. static constexpr unsigned kDesiredCapacity = kCapacity - 2;
  443. static constexpr unsigned kAllocatedCapacity =
  444. kCapacity + (sizeof(Item) == 16 ? 1 : 0);
  445. static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
  446. // Non-empty tags have their top bit set. tags_ array might be bigger
  447. // than kCapacity to keep alignment of first item.
  448. std::array<uint8_t, 14> tags_;
  449. // Bits 0..3 record the actual capacity of the chunk if this is chunk
  450. // zero, or hold 0000 for other chunks. Bits 4-7 are a 4-bit counter
  451. // of the number of values in this chunk that were placed because they
  452. // overflowed their desired chunk (hostedOverflowCount).
  453. uint8_t control_;
  454. // The number of values that would have been placed into this chunk if
  455. // there had been space, including values that also overflowed previous
  456. // full chunks. This value saturates; once it becomes 255 it no longer
  457. // increases nor decreases.
  458. uint8_t outboundOverflowCount_;
  459. std::array<
  460. std::aligned_storage_t<sizeof(Item), alignof(Item)>,
  461. kAllocatedCapacity>
  462. rawItems_;
  463. static F14Chunk* emptyInstance() {
  464. auto raw = reinterpret_cast<char*>(&kEmptyTagVector);
  465. if (kRequiredVectorAlignment > alignof(max_align_t)) {
  466. auto delta = kRequiredVectorAlignment -
  467. (reinterpret_cast<uintptr_t>(raw) % kRequiredVectorAlignment);
  468. raw += delta;
  469. }
  470. auto rv = reinterpret_cast<F14Chunk*>(raw);
  471. FOLLY_SAFE_DCHECK(
  472. (reinterpret_cast<uintptr_t>(rv) % kRequiredVectorAlignment) == 0, "");
  473. return rv;
  474. }
  475. void clear() {
  476. // tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
  477. // gcc < 6 doesn't exploit chunk alignment to generate the optimal
  478. // SSE clear from memset. This is very hot code, so it is worth
  479. // handling that case specially.
  480. #if FOLLY_SSE >= 2 && __GNUC__ <= 5 && !__clang__
  481. // this doesn't violate strict aliasing rules because __m128i is
  482. // tagged as __may_alias__
  483. auto* v = static_cast<__m128i*>(static_cast<void*>(&tags_[0]));
  484. _mm_store_si128(v, _mm_setzero_si128());
  485. #else
  486. std::memset(&tags_[0], '\0', 16);
  487. #endif
  488. }
  489. void copyOverflowInfoFrom(F14Chunk const& rhs) {
  490. FOLLY_SAFE_DCHECK(hostedOverflowCount() == 0, "");
  491. control_ += static_cast<uint8_t>(rhs.control_ & 0xf0);
  492. outboundOverflowCount_ = rhs.outboundOverflowCount_;
  493. }
  494. unsigned hostedOverflowCount() const {
  495. return control_ >> 4;
  496. }
  497. static constexpr uint8_t kIncrHostedOverflowCount = 0x10;
  498. static constexpr uint8_t kDecrHostedOverflowCount =
  499. static_cast<uint8_t>(-0x10);
  500. void adjustHostedOverflowCount(uint8_t op) {
  501. control_ += op;
  502. }
  503. bool eof() const {
  504. return (control_ & 0xf) != 0;
  505. }
  506. std::size_t chunk0Capacity() const {
  507. return control_ & 0xf;
  508. }
  509. void markEof(std::size_t c0c) {
  510. FOLLY_SAFE_DCHECK(
  511. this != emptyInstance() && control_ == 0 && c0c > 0 && c0c <= 0xf &&
  512. c0c <= kCapacity,
  513. "");
  514. control_ = static_cast<uint8_t>(c0c);
  515. }
  516. unsigned outboundOverflowCount() const {
  517. return outboundOverflowCount_;
  518. }
  519. void incrOutboundOverflowCount() {
  520. if (outboundOverflowCount_ != 255) {
  521. ++outboundOverflowCount_;
  522. }
  523. }
  524. void decrOutboundOverflowCount() {
  525. if (outboundOverflowCount_ != 255) {
  526. --outboundOverflowCount_;
  527. }
  528. }
  529. std::size_t tag(std::size_t index) const {
  530. return tags_[index];
  531. }
  532. void setTag(std::size_t index, std::size_t tag) {
  533. FOLLY_SAFE_DCHECK(
  534. this != emptyInstance() && tag >= 0x80 && tag <= 0xff, "");
  535. tags_[index] = static_cast<uint8_t>(tag);
  536. }
  537. void clearTag(std::size_t index) {
  538. tags_[index] = 0;
  539. }
  540. #if FOLLY_NEON
  541. ////////
  542. // Tag filtering using NEON intrinsics
  543. SparseMaskIter tagMatchIter(std::size_t needle) const {
  544. FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
  545. uint8x16_t tagV = vld1q_u8(&tags_[0]);
  546. auto needleV = vdupq_n_u8(static_cast<uint8_t>(needle));
  547. auto eqV = vceqq_u8(tagV, needleV);
  548. // get info from every byte into the bottom half of every uint16_t
  549. // by shifting right 4, then round to get it into a 64-bit vector
  550. uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(eqV), 4);
  551. uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
  552. return SparseMaskIter(mask);
  553. }
  554. MaskType occupiedMask() const {
  555. uint8x16_t tagV = vld1q_u8(&tags_[0]);
  556. // signed shift extends top bit to all bits
  557. auto occupiedV =
  558. vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(tagV), 7));
  559. uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(occupiedV), 4);
  560. return vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
  561. }
  562. #else
  563. ////////
  564. // Tag filtering using SSE2 intrinsics
  565. TagVector const* tagVector() const {
  566. return static_cast<TagVector const*>(static_cast<void const*>(&tags_[0]));
  567. }
  568. SparseMaskIter tagMatchIter(std::size_t needle) const {
  569. FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
  570. auto tagV = _mm_load_si128(tagVector());
  571. // TRICKY! It may seem strange to have a std::size_t needle and narrow
  572. // it at the last moment, rather than making HashPair::second be a
  573. // uint8_t, but the latter choice sometimes leads to a performance
  574. // problem.
  575. //
  576. // On architectures with SSE2 but not AVX2, _mm_set1_epi8 expands
  577. // to multiple instructions. One of those is a MOVD of either 4 or
  578. // 8 byte width. Only the bottom byte of that move actually affects
  579. // the result, but if a 1-byte needle has been spilled then this will
  580. // be a 4 byte load. GCC 5.5 has been observed to reload needle
  581. // (or perhaps fuse a reload and part of a previous static_cast)
  582. // needle using a MOVZX with a 1 byte load in parallel with the MOVD.
  583. // This combination causes a failure of store-to-load forwarding,
  584. // which has a big performance penalty (60 nanoseconds per find on
  585. // a microbenchmark). Keeping needle >= 4 bytes avoids the problem
  586. // and also happens to result in slightly more compact assembly.
  587. auto needleV = _mm_set1_epi8(static_cast<uint8_t>(needle));
  588. auto eqV = _mm_cmpeq_epi8(tagV, needleV);
  589. auto mask = _mm_movemask_epi8(eqV) & kFullMask;
  590. return SparseMaskIter{mask};
  591. }
  592. MaskType occupiedMask() const {
  593. auto tagV = _mm_load_si128(tagVector());
  594. return _mm_movemask_epi8(tagV) & kFullMask;
  595. }
  596. #endif
  597. DenseMaskIter occupiedIter() const {
  598. return DenseMaskIter{&tags_[0], occupiedMask()};
  599. }
  600. MaskRangeIter occupiedRangeIter() const {
  601. return MaskRangeIter{occupiedMask()};
  602. }
  603. LastOccupiedInMask lastOccupied() const {
  604. return LastOccupiedInMask{occupiedMask()};
  605. }
  606. FirstEmptyInMask firstEmpty() const {
  607. return FirstEmptyInMask{occupiedMask() ^ kFullMask};
  608. }
  609. bool occupied(std::size_t index) const {
  610. FOLLY_SAFE_DCHECK(tags_[index] == 0 || (tags_[index] & 0x80) != 0, "");
  611. return tags_[index] != 0;
  612. }
  613. Item* itemAddr(std::size_t i) const {
  614. return static_cast<Item*>(
  615. const_cast<void*>(static_cast<void const*>(&rawItems_[i])));
  616. }
  617. Item& item(std::size_t i) {
  618. FOLLY_SAFE_DCHECK(this->occupied(i), "");
  619. return *launder(itemAddr(i));
  620. }
  621. Item const& citem(std::size_t i) const {
  622. FOLLY_SAFE_DCHECK(this->occupied(i), "");
  623. return *launder(itemAddr(i));
  624. }
  625. static F14Chunk& owner(Item& item, std::size_t index) {
  626. auto rawAddr =
  627. static_cast<uint8_t*>(static_cast<void*>(std::addressof(item))) -
  628. offsetof(F14Chunk, rawItems_) - index * sizeof(Item);
  629. auto chunkAddr = static_cast<F14Chunk*>(static_cast<void*>(rawAddr));
  630. FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
  631. return *chunkAddr;
  632. }
  633. };
  634. ////////////////
  635. // PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
  636. // Item& and its index to be recovered. It sorts by the address of the
  637. // item, and it only works for items that are in a properly-aligned chunk.
  638. // generic form, not actually packed
  639. template <typename Ptr>
  640. class PackedChunkItemPtr {
  641. public:
  642. PackedChunkItemPtr(Ptr p, std::size_t i) noexcept : ptr_{p}, index_{i} {
  643. FOLLY_SAFE_DCHECK(ptr_ != nullptr || index_ == 0, "");
  644. }
  645. Ptr ptr() const {
  646. return ptr_;
  647. }
  648. std::size_t index() const {
  649. return index_;
  650. }
  651. bool operator<(PackedChunkItemPtr const& rhs) const {
  652. FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
  653. return ptr_ < rhs.ptr_;
  654. }
  655. bool operator==(PackedChunkItemPtr const& rhs) const {
  656. FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
  657. return ptr_ == rhs.ptr_;
  658. }
  659. bool operator!=(PackedChunkItemPtr const& rhs) const {
  660. return !(*this == rhs);
  661. }
  662. private:
  663. Ptr ptr_;
  664. std::size_t index_;
  665. };
  666. // Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
  667. // alignment, so it works on 32-bit and 64-bit platforms
  668. template <typename T>
  669. class PackedChunkItemPtr<T*> {
  670. static_assert((alignof(F14Chunk<T>) % 16) == 0, "");
  671. // Chunks are 16-byte aligned, so we can maintain a packed pointer to a
  672. // chunk item by packing the 4-bit item index into the least significant
  673. // bits of a pointer to the chunk itself. This makes ItemIter::pack
  674. // more expensive, however, since it has to compute the chunk address.
  675. //
  676. // Chunk items have varying alignment constraints, so it would seem
  677. // to be that we can't do a similar trick while using only bit masking
  678. // operations on the Item* itself. It happens to be, however, that if
  679. // sizeof(Item) is not a multiple of 16 then we can recover a portion
  680. // of the index bits from the knowledge that the Item-s are stored in
  681. // an array that is itself 16-byte aligned.
  682. //
  683. // If kAlignBits is the number of trailing zero bits in sizeof(Item)
  684. // (up to 4), then we can borrow those bits to store kAlignBits of the
  685. // index directly. We can recover (4 - kAlignBits) bits of the index
  686. // from the item pointer itself, by defining/observing that
  687. //
  688. // A = kAlignBits (A <= 4)
  689. //
  690. // S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
  691. //
  692. // R = (itemPtr % 16) >> A (shifted-away bits are all zero)
  693. //
  694. // M = 16 >> A
  695. //
  696. // itemPtr % 16 = (index * sizeof(Item)) % 16
  697. //
  698. // (R * 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16
  699. //
  700. // (R * 2^A) % 16 = (index * 2^A * S) % 16
  701. //
  702. // R % M = (index * S) % M
  703. //
  704. // S is relatively prime with M, so a multiplicative inverse is easy
  705. // to compute
  706. //
  707. // Sinv = S^(M - 1) % M
  708. //
  709. // (R * Sinv) % M = index % M
  710. //
  711. // This lets us recover the bottom bits of the index. When sizeof(T)
  712. // is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
  713. // is 4-byte aligned kSizeInverse will be either 1 or 3.
  714. // returns pow(x, y) % m
  715. static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
  716. return y == 0 ? 1 : (x * powerMod(x, y - 1, m)) % m;
  717. }
  718. static constexpr uintptr_t kIndexBits = 4;
  719. static constexpr uintptr_t kIndexMask = (uintptr_t{1} << kIndexBits) - 1;
  720. static constexpr uintptr_t kAlignBits = constexpr_min(
  721. uintptr_t{4},
  722. constexpr_find_first_set(uintptr_t{sizeof(T)}) - 1);
  723. static constexpr uintptr_t kAlignMask = (uintptr_t{1} << kAlignBits) - 1;
  724. static constexpr uintptr_t kModulus = uintptr_t{1}
  725. << (kIndexBits - kAlignBits);
  726. static constexpr uintptr_t kSizeInverse =
  727. powerMod(sizeof(T) >> kAlignBits, kModulus - 1, kModulus);
  728. public:
  729. PackedChunkItemPtr(T* p, std::size_t i) noexcept {
  730. uintptr_t encoded = i >> (kIndexBits - kAlignBits);
  731. assume((encoded & ~kAlignMask) == 0);
  732. raw_ = reinterpret_cast<uintptr_t>(p) | encoded;
  733. FOLLY_SAFE_DCHECK(p == ptr(), "");
  734. FOLLY_SAFE_DCHECK(i == index(), "");
  735. }
  736. T* ptr() const {
  737. return reinterpret_cast<T*>(raw_ & ~kAlignMask);
  738. }
  739. std::size_t index() const {
  740. auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
  741. auto deduced =
  742. ((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
  743. return encoded | deduced;
  744. }
  745. bool operator<(PackedChunkItemPtr const& rhs) const {
  746. return raw_ < rhs.raw_;
  747. }
  748. bool operator==(PackedChunkItemPtr const& rhs) const {
  749. return raw_ == rhs.raw_;
  750. }
  751. bool operator!=(PackedChunkItemPtr const& rhs) const {
  752. return !(*this == rhs);
  753. }
  754. private:
  755. uintptr_t raw_;
  756. };
  757. template <typename ChunkPtr>
  758. class F14ItemIter {
  759. private:
  760. using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
  761. public:
  762. using Item = typename Chunk::Item;
  763. using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
  764. using ItemConstPtr =
  765. typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
  766. using Packed = PackedChunkItemPtr<ItemPtr>;
  767. //// PUBLIC
  768. F14ItemIter() noexcept : itemPtr_{nullptr}, index_{0} {}
  769. // default copy and move constructors and assignment operators are correct
  770. explicit F14ItemIter(Packed const& packed)
  771. : itemPtr_{packed.ptr()}, index_{packed.index()} {}
  772. F14ItemIter(ChunkPtr chunk, std::size_t index)
  773. : itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
  774. index_{index} {
  775. FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
  776. assume(
  777. std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
  778. nullptr);
  779. assume(itemPtr_ != nullptr);
  780. }
  781. FOLLY_ALWAYS_INLINE void advanceImpl(bool checkEof, bool likelyDead) {
  782. auto c = chunk();
  783. // common case is packed entries
  784. while (index_ > 0) {
  785. --index_;
  786. --itemPtr_;
  787. if (LIKELY(c->occupied(index_))) {
  788. return;
  789. }
  790. }
  791. // It's fairly common for an iterator to be advanced and then become
  792. // dead, for example in the return value from erase(iter) or in
  793. // the last step of a loop. We'd like to make sure that the entire
  794. // advance() method can be eliminated by the compiler's dead code
  795. // elimination pass. To do that it must eliminate the loops, which
  796. // requires it to prove that they have no side effects. It's easy
  797. // to show that there are no escaping stores, but at the moment
  798. // compilers also consider an infinite loop to be a side effect.
  799. // (There are parts of the standard that would allow them to treat
  800. // this as undefined behavior, but at the moment they don't exploit
  801. // those clauses.)
  802. //
  803. // The following loop should really be a while loop, which would
  804. // save a register, some instructions, and a conditional branch,
  805. // but by writing it as a for loop the compiler can prove to itself
  806. // that it will eventually terminate. (No matter that even if the
  807. // loop executed in a single cycle it would take about 200 years to
  808. // run all 2^64 iterations.)
  809. //
  810. // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
  811. // filed about the issue. while (true) {
  812. for (std::size_t i = 1; !likelyDead || i != 0; ++i) {
  813. if (checkEof) {
  814. // exhausted the current chunk
  815. if (UNLIKELY(c->eof())) {
  816. FOLLY_SAFE_DCHECK(index_ == 0, "");
  817. itemPtr_ = nullptr;
  818. return;
  819. }
  820. } else {
  821. FOLLY_SAFE_DCHECK(!c->eof(), "");
  822. }
  823. --c;
  824. auto last = c->lastOccupied();
  825. if (checkEof && !likelyDead) {
  826. prefetchAddr(&*c - 1);
  827. }
  828. if (LIKELY(last.hasIndex())) {
  829. index_ = last.index();
  830. itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
  831. return;
  832. }
  833. }
  834. }
  835. void precheckedAdvance() {
  836. advanceImpl(false, false);
  837. }
  838. FOLLY_ALWAYS_INLINE void advance() {
  839. advanceImpl(true, false);
  840. }
  841. FOLLY_ALWAYS_INLINE void advanceLikelyDead() {
  842. advanceImpl(true, true);
  843. }
  844. ChunkPtr chunk() const {
  845. return std::pointer_traits<ChunkPtr>::pointer_to(
  846. Chunk::owner(*itemPtr_, index_));
  847. }
  848. std::size_t index() const {
  849. return index_;
  850. }
  851. Item* itemAddr() const {
  852. return std::addressof(*itemPtr_);
  853. }
  854. Item& item() const {
  855. return *itemPtr_;
  856. }
  857. Item const& citem() const {
  858. return *itemPtr_;
  859. }
  860. bool atEnd() const {
  861. return itemPtr_ == nullptr;
  862. }
  863. Packed pack() const {
  864. return Packed{itemPtr_, static_cast<uint8_t>(index_)};
  865. }
  866. bool operator==(F14ItemIter const& rhs) const {
  867. // this form makes iter == end() into a single null check after inlining
  868. // and constant propagation
  869. return itemPtr_ == rhs.itemPtr_;
  870. }
  871. bool operator!=(F14ItemIter const& rhs) const {
  872. return !(*this == rhs);
  873. }
  874. private:
  875. ItemPtr itemPtr_;
  876. std::size_t index_;
  877. };
  878. ////////////////
  879. template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
  880. struct SizeAndPackedBegin {
  881. SizeType size_{0};
  882. private:
  883. typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
  884. public:
  885. typename ItemIter::Packed& packedBegin() {
  886. return packedBegin_;
  887. }
  888. typename ItemIter::Packed const& packedBegin() const {
  889. return packedBegin_;
  890. }
  891. };
  892. template <typename SizeType, typename ItemIter>
  893. struct SizeAndPackedBegin<SizeType, ItemIter, false> {
  894. SizeType size_{0};
  895. [[noreturn]] typename ItemIter::Packed& packedBegin() {
  896. assume_unreachable();
  897. }
  898. [[noreturn]] typename ItemIter::Packed const& packedBegin() const {
  899. assume_unreachable();
  900. }
  901. };
  902. template <typename Policy>
  903. class F14Table : public Policy {
  904. public:
  905. using Item = typename Policy::Item;
  906. using value_type = typename Policy::Value;
  907. using allocator_type = typename Policy::Alloc;
  908. private:
  909. using Alloc = typename Policy::Alloc;
  910. using AllocTraits = typename Policy::AllocTraits;
  911. using Hasher = typename Policy::Hasher;
  912. using InternalSizeType = typename Policy::InternalSizeType;
  913. using KeyEqual = typename Policy::KeyEqual;
  914. using Policy::kAllocIsAlwaysEqual;
  915. using Policy::kDefaultConstructIsNoexcept;
  916. using Policy::kEnableItemIteration;
  917. using Policy::kSwapIsNoexcept;
  918. using Policy::destroyItemOnClear;
  919. using Policy::isAvalanchingHasher;
  920. using Policy::prefetchBeforeCopy;
  921. using Policy::prefetchBeforeDestroy;
  922. using Policy::prefetchBeforeRehash;
  923. using ByteAlloc = typename AllocTraits::template rebind_alloc<uint8_t>;
  924. using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
  925. using Chunk = F14Chunk<Item>;
  926. using ChunkPtr =
  927. typename std::pointer_traits<BytePtr>::template rebind<Chunk>;
  928. using HashPair = typename F14HashToken::HashPair;
  929. public:
  930. using ItemIter = F14ItemIter<ChunkPtr>;
  931. private:
  932. //////// begin fields
  933. ChunkPtr chunks_{Chunk::emptyInstance()};
  934. InternalSizeType chunkMask_{0};
  935. SizeAndPackedBegin<InternalSizeType, ItemIter, kEnableItemIteration>
  936. sizeAndPackedBegin_;
  937. //////// end fields
  938. void swapContents(F14Table& rhs) noexcept {
  939. using std::swap;
  940. swap(chunks_, rhs.chunks_);
  941. swap(chunkMask_, rhs.chunkMask_);
  942. swap(sizeAndPackedBegin_.size_, rhs.sizeAndPackedBegin_.size_);
  943. if (kEnableItemIteration) {
  944. swap(
  945. sizeAndPackedBegin_.packedBegin(),
  946. rhs.sizeAndPackedBegin_.packedBegin());
  947. }
  948. }
  949. public:
  950. F14Table(
  951. std::size_t initialCapacity,
  952. Hasher const& hasher,
  953. KeyEqual const& keyEqual,
  954. Alloc const& alloc)
  955. : Policy{hasher, keyEqual, alloc} {
  956. if (initialCapacity > 0) {
  957. reserve(initialCapacity);
  958. }
  959. }
  960. F14Table(F14Table const& rhs) : Policy{rhs} {
  961. buildFromF14Table(rhs);
  962. }
  963. F14Table(F14Table const& rhs, Alloc const& alloc) : Policy{rhs, alloc} {
  964. buildFromF14Table(rhs);
  965. }
  966. F14Table(F14Table&& rhs) noexcept(
  967. std::is_nothrow_move_constructible<Hasher>::value&&
  968. std::is_nothrow_move_constructible<KeyEqual>::value&&
  969. std::is_nothrow_move_constructible<Alloc>::value)
  970. : Policy{std::move(rhs)} {
  971. swapContents(rhs);
  972. }
  973. F14Table(F14Table&& rhs, Alloc const& alloc) noexcept(kAllocIsAlwaysEqual)
  974. : Policy{std::move(rhs), alloc} {
  975. if (kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
  976. // move storage (common case)
  977. swapContents(rhs);
  978. } else {
  979. // new storage because allocators unequal, move values (rare case)
  980. buildFromF14Table(std::move(rhs));
  981. }
  982. }
  983. F14Table& operator=(F14Table const& rhs) {
  984. if (this != &rhs) {
  985. reset();
  986. static_cast<Policy&>(*this) = rhs;
  987. buildFromF14Table(rhs);
  988. }
  989. return *this;
  990. }
  991. F14Table& operator=(F14Table&& rhs) noexcept(
  992. std::is_nothrow_move_assignable<Hasher>::value&&
  993. std::is_nothrow_move_assignable<KeyEqual>::value &&
  994. (kAllocIsAlwaysEqual ||
  995. (AllocTraits::propagate_on_container_move_assignment::value &&
  996. std::is_nothrow_move_assignable<Alloc>::value))) {
  997. if (this != &rhs) {
  998. reset();
  999. static_cast<Policy&>(*this) = std::move(rhs);
  1000. if (AllocTraits::propagate_on_container_move_assignment::value ||
  1001. kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
  1002. // move storage (common case)
  1003. swapContents(rhs);
  1004. } else {
  1005. // new storage because allocators unequal, move values (rare case)
  1006. buildFromF14Table(std::move(rhs));
  1007. }
  1008. }
  1009. return *this;
  1010. }
  1011. ~F14Table() {
  1012. reset();
  1013. }
  1014. void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
  1015. // If propagate_on_container_swap is false and allocators are
  1016. // not equal, the only way to accomplish a swap would be to do
  1017. // dynamic allocation and then move (or swap) each contained value.
  1018. // AllocatorAwareContainer-s are not supposed to attempt this, but
  1019. // rather are supposed to have undefined behavior in that case.
  1020. FOLLY_SAFE_CHECK(
  1021. AllocTraits::propagate_on_container_swap::value ||
  1022. kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
  1023. "swap is undefined for unequal non-propagating allocators");
  1024. this->swapPolicy(rhs);
  1025. swapContents(rhs);
  1026. }
  1027. private:
  1028. //////// hash helpers
  1029. // Hash values are used to compute the desired position, which is the
  1030. // chunk index at which we would like to place a value (if there is no
  1031. // overflow), and the tag, which is an additional 8 bits of entropy.
  1032. //
  1033. // The standard's definition of hash function quality only refers to
  1034. // the probability of collisions of the entire hash value, not to the
  1035. // probability of collisions of the results of shifting or masking the
  1036. // hash value. Some hash functions, however, provide this stronger
  1037. // guarantee (not quite the same as the definition of avalanching,
  1038. // but similar).
  1039. //
  1040. // If the user-supplied hasher is an avalanching one (each bit of the
  1041. // hash value has a 50% chance of being the same for differing hash
  1042. // inputs), then we can just take 1 byte of the hash value for the tag
  1043. // and the rest for the desired position. Avalanching hashers also
  1044. // let us map hash value to array index position with just a bitmask
  1045. // without risking clumping. (Many hash tables just accept the risk
  1046. // and do it regardless.)
  1047. //
  1048. // std::hash<std::string> avalanches in all implementations we've
  1049. // examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
  1050. // or MurmurHash2. The other std::hash specializations, however, do not
  1051. // have this property. std::hash for integral and pointer values is the
  1052. // identity function on libstdc++-v3 and libc++, in particular. In our
  1053. // experience it is also fairly common for user-defined specializations
  1054. // of std::hash to combine fields in an ad-hoc way that does not evenly
  1055. // distribute entropy among the bits of the result (a + 37 * b, for
  1056. // example, where a and b are integer fields).
  1057. //
  1058. // For hash functions we don't trust to avalanche, we repair things by
  1059. // applying a bit mixer to the user-supplied hash.
  1060. #if FOLLY_X64 || FOLLY_AARCH64
  1061. // 64-bit
  1062. static HashPair splitHash(std::size_t hash) {
  1063. static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
  1064. std::size_t tag;
  1065. if (!isAvalanchingHasher()) {
  1066. #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
  1067. #if FOLLY_SSE
  1068. // SSE4.2 CRC
  1069. std::size_t c = _mm_crc32_u64(0, hash);
  1070. tag = (c >> 24) | 0x80;
  1071. hash += c;
  1072. #else
  1073. // CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
  1074. std::size_t c = __crc32cd(0, hash);
  1075. tag = (c >> 24) | 0x80;
  1076. hash += c;
  1077. #endif
  1078. #else
  1079. // The mixer below is not fully avalanching for all 64 bits of
  1080. // output, but looks quite good for bits 18..63 and puts plenty
  1081. // of entropy even lower when considering multiple bits together
  1082. // (like the tag). Importantly, when under register pressure it
  1083. // uses fewer registers, instructions, and immediate constants
  1084. // than the alternatives, resulting in compact code that is more
  1085. // easily inlinable. In one instantiation a modified Murmur mixer
  1086. // was 48 bytes of assembly (even after using the same multiplicand
  1087. // for both steps) and this one was 27 bytes, for example.
  1088. auto const kMul = 0xc4ceb9fe1a85ec53ULL;
  1089. #ifdef _WIN32
  1090. __int64 signedHi;
  1091. __int64 signedLo = _mul128(
  1092. static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
  1093. auto hi = static_cast<uint64_t>(signedHi);
  1094. auto lo = static_cast<uint64_t>(signedLo);
  1095. #else
  1096. auto hi = static_cast<uint64_t>(
  1097. (static_cast<unsigned __int128>(hash) * kMul) >> 64);
  1098. auto lo = hash * kMul;
  1099. #endif
  1100. hash = hi ^ lo;
  1101. hash *= kMul;
  1102. tag = ((hash >> 15) & 0x7f) | 0x80;
  1103. hash >>= 22;
  1104. #endif
  1105. } else {
  1106. // we don't trust the top bit
  1107. tag = (hash >> 56) | 0x80;
  1108. }
  1109. return std::make_pair(hash, tag);
  1110. }
  1111. #else
  1112. // 32-bit
  1113. static HashPair splitHash(std::size_t hash) {
  1114. static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
  1115. uint8_t tag;
  1116. if (!isAvalanchingHasher()) {
  1117. #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
  1118. #if FOLLY_SSE
  1119. // SSE4.2 CRC
  1120. auto c = _mm_crc32_u32(0, hash);
  1121. tag = static_cast<uint8_t>(~(c >> 25));
  1122. hash += c;
  1123. #else
  1124. auto c = __crc32cw(0, hash);
  1125. tag = static_cast<uint8_t>(~(c >> 25));
  1126. hash += c;
  1127. #endif
  1128. #else
  1129. // finalizer for 32-bit murmur2
  1130. hash ^= hash >> 13;
  1131. hash *= 0x5bd1e995;
  1132. hash ^= hash >> 15;
  1133. tag = static_cast<uint8_t>(~(hash >> 25));
  1134. #endif
  1135. } else {
  1136. // we don't trust the top bit
  1137. tag = (hash >> 24) | 0x80;
  1138. }
  1139. return std::make_pair(hash, tag);
  1140. }
  1141. #endif
  1142. //////// memory management helpers
  1143. static std::size_t chunkAllocSize(
  1144. std::size_t chunkCount,
  1145. std::size_t maxSizeWithoutRehash) {
  1146. if (chunkCount == 1) {
  1147. FOLLY_SAFE_DCHECK((maxSizeWithoutRehash % 2) == 0, "");
  1148. static_assert(offsetof(Chunk, rawItems_) == 16, "");
  1149. return 16 + sizeof(Item) * maxSizeWithoutRehash;
  1150. } else {
  1151. return sizeof(Chunk) * chunkCount;
  1152. }
  1153. }
  1154. ChunkPtr initializeChunks(
  1155. BytePtr raw,
  1156. std::size_t chunkCount,
  1157. std::size_t maxSizeWithoutRehash) {
  1158. static_assert(std::is_trivial<Chunk>::value, "F14Chunk should be POD");
  1159. auto chunks = static_cast<Chunk*>(static_cast<void*>(&*raw));
  1160. for (std::size_t i = 0; i < chunkCount; ++i) {
  1161. chunks[i].clear();
  1162. }
  1163. chunks[0].markEof(chunkCount == 1 ? maxSizeWithoutRehash : 1);
  1164. return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
  1165. }
  1166. public:
  1167. ItemIter begin() const noexcept {
  1168. FOLLY_SAFE_DCHECK(kEnableItemIteration, "");
  1169. return ItemIter{sizeAndPackedBegin_.packedBegin()};
  1170. }
  1171. ItemIter end() const noexcept {
  1172. return ItemIter{};
  1173. }
  1174. bool empty() const noexcept {
  1175. return size() == 0;
  1176. }
  1177. InternalSizeType size() const noexcept {
  1178. return sizeAndPackedBegin_.size_;
  1179. }
  1180. std::size_t max_size() const noexcept {
  1181. auto& a = this->alloc();
  1182. return std::min<std::size_t>(
  1183. (std::numeric_limits<InternalSizeType>::max)(),
  1184. AllocTraits::max_size(a));
  1185. }
  1186. std::size_t bucket_count() const noexcept {
  1187. // bucket_count is just a synthetic construct for the outside world
  1188. // so that size, bucket_count, load_factor, and max_load_factor are
  1189. // all self-consistent. The only one of those that is real is size().
  1190. if (chunkMask_ != 0) {
  1191. return (chunkMask_ + 1) * Chunk::kDesiredCapacity;
  1192. } else {
  1193. return chunks_->chunk0Capacity();
  1194. }
  1195. }
  1196. std::size_t max_bucket_count() const noexcept {
  1197. return max_size();
  1198. }
  1199. float load_factor() const noexcept {
  1200. return empty()
  1201. ? 0.0f
  1202. : static_cast<float>(size()) / static_cast<float>(bucket_count());
  1203. }
  1204. float max_load_factor() const noexcept {
  1205. return 1.0f;
  1206. }
  1207. void max_load_factor(float) noexcept {
  1208. // Probing hash tables can't run load factors >= 1 (unlike chaining
  1209. // tables). In addition, we have measured that there is little or
  1210. // no performance advantage to running a smaller load factor (cache
  1211. // locality losses outweigh the small reduction in probe lengths,
  1212. // often making it slower). Therefore, we've decided to just fix
  1213. // max_load_factor at 1.0f regardless of what the user requests.
  1214. // This has an additional advantage that we don't have to store it.
  1215. // Taking alignment into consideration this makes every F14 table
  1216. // 8 bytes smaller, and is part of the reason an empty F14NodeMap
  1217. // is almost half the size of an empty std::unordered_map (32 vs
  1218. // 56 bytes).
  1219. //
  1220. // I don't have a strong opinion on whether we should remove this
  1221. // method or leave a stub, let ngbronson or xshi know if you have a
  1222. // compelling argument either way.
  1223. }
  1224. private:
  1225. // Our probe strategy is to advance through additional chunks with
  1226. // a stride that is key-specific. This is called double hashing,
  1227. // and is a well known and high quality probing strategy. So long as
  1228. // the stride and the chunk count are relatively prime, we will visit
  1229. // every chunk once and then return to the original chunk, letting us
  1230. // detect and end the cycle. The chunk count is a power of two, so
  1231. // we can satisfy the relatively prime part by choosing an odd stride.
  1232. // We've already computed a high quality secondary hash value for the
  1233. // tag, so we just use it for the second probe hash as well.
  1234. //
  1235. // At the maximum load factor of 12/14, expected probe length for a
  1236. // find hit is 1.041, with 99% of keys found in the first three chunks.
  1237. // Expected probe length for a find miss (or insert) is 1.275, with a
  1238. // p99 probe length of 4 (fewer than 1% of failing find look at 5 or
  1239. // more chunks).
  1240. //
  1241. // This code is structured so you can try various ways of encoding
  1242. // the current probe state. For example, at the moment the probe's
  1243. // state is the position in the cycle and the resulting chunk index is
  1244. // computed from that inside probeCurrentIndex. We could also make the
  1245. // probe state the chunk index, and then increment it by hp.second *
  1246. // 2 + 1 in probeAdvance. Wrapping can be applied early or late as
  1247. // well. This particular code seems to be easier for the optimizer
  1248. // to understand.
  1249. //
  1250. // We could also implement probing strategies that resulted in the same
  1251. // tour for every key initially assigned to a chunk (linear probing or
  1252. // quadratic), but that results in longer probe lengths. In particular,
  1253. // the cache locality wins of linear probing are not worth the increase
  1254. // in probe lengths (extra work and less branch predictability) in
  1255. // our experiments.
  1256. std::size_t probeDelta(HashPair hp) const {
  1257. return 2 * hp.second + 1;
  1258. }
  1259. template <typename K>
  1260. FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
  1261. std::size_t index = hp.first;
  1262. std::size_t step = probeDelta(hp);
  1263. for (std::size_t tries = 0; tries <= chunkMask_; ++tries) {
  1264. ChunkPtr chunk = chunks_ + (index & chunkMask_);
  1265. if (sizeof(Chunk) > 64) {
  1266. prefetchAddr(chunk->itemAddr(8));
  1267. }
  1268. auto hits = chunk->tagMatchIter(hp.second);
  1269. while (hits.hasNext()) {
  1270. auto i = hits.next();
  1271. if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
  1272. // Tag match and key match were both successful. The chance
  1273. // of a false tag match is 1/128 for each key in the chunk
  1274. // (with a proper hash function).
  1275. return ItemIter{chunk, i};
  1276. }
  1277. }
  1278. if (LIKELY(chunk->outboundOverflowCount() == 0)) {
  1279. // No keys that wanted to be placed in this chunk were denied
  1280. // entry, so our search is over. This is the common case.
  1281. break;
  1282. }
  1283. index += step;
  1284. }
  1285. // Loop exit because tries is exhausted is rare, but possible.
  1286. // That means that for every chunk there is currently a key present
  1287. // in the map that visited that chunk on its probe search but ended
  1288. // up somewhere else, and we have searched every chunk.
  1289. return ItemIter{};
  1290. }
  1291. public:
  1292. // Prehashing splits the work of find(key) into two calls, enabling you
  1293. // to manually implement loop pipelining for hot bulk lookups. prehash
  1294. // computes the hash and prefetches the first computed memory location,
  1295. // and the two-arg find(F14HashToken,K) performs the rest of the search.
  1296. template <typename K>
  1297. F14HashToken prehash(K const& key) const {
  1298. FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
  1299. auto hp = splitHash(this->computeKeyHash(key));
  1300. ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
  1301. prefetchAddr(firstChunk);
  1302. return F14HashToken(std::move(hp));
  1303. }
  1304. template <typename K>
  1305. FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
  1306. auto hp = splitHash(this->computeKeyHash(key));
  1307. return findImpl(hp, key);
  1308. }
  1309. template <typename K>
  1310. FOLLY_ALWAYS_INLINE ItemIter
  1311. find(F14HashToken const& token, K const& key) const {
  1312. FOLLY_SAFE_DCHECK(
  1313. splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
  1314. "");
  1315. return findImpl(static_cast<HashPair>(token), key);
  1316. }
  1317. private:
  1318. void adjustSizeAndBeginAfterInsert(ItemIter iter) {
  1319. if (kEnableItemIteration) {
  1320. // packedBegin is the max of all valid ItemIter::pack()
  1321. auto packed = iter.pack();
  1322. if (sizeAndPackedBegin_.packedBegin() < packed) {
  1323. sizeAndPackedBegin_.packedBegin() = packed;
  1324. }
  1325. }
  1326. ++sizeAndPackedBegin_.size_;
  1327. }
  1328. // Ignores hp if pos.chunk()->hostedOverflowCount() == 0
  1329. void eraseBlank(ItemIter iter, HashPair hp) {
  1330. iter.chunk()->clearTag(iter.index());
  1331. if (iter.chunk()->hostedOverflowCount() != 0) {
  1332. // clean up
  1333. std::size_t index = hp.first;
  1334. std::size_t delta = probeDelta(hp);
  1335. uint8_t hostedOp = 0;
  1336. while (true) {
  1337. ChunkPtr chunk = chunks_ + (index & chunkMask_);
  1338. if (chunk == iter.chunk()) {
  1339. chunk->adjustHostedOverflowCount(hostedOp);
  1340. break;
  1341. }
  1342. chunk->decrOutboundOverflowCount();
  1343. hostedOp = Chunk::kDecrHostedOverflowCount;
  1344. index += delta;
  1345. }
  1346. }
  1347. }
  1348. void adjustSizeAndBeginBeforeErase(ItemIter iter) {
  1349. --sizeAndPackedBegin_.size_;
  1350. if (kEnableItemIteration) {
  1351. if (iter.pack() == sizeAndPackedBegin_.packedBegin()) {
  1352. if (size() == 0) {
  1353. iter = ItemIter{};
  1354. } else {
  1355. iter.precheckedAdvance();
  1356. }
  1357. sizeAndPackedBegin_.packedBegin() = iter.pack();
  1358. }
  1359. }
  1360. }
  1361. template <typename... Args>
  1362. void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
  1363. try {
  1364. auto dst = pos.itemAddr();
  1365. this->constructValueAtItem(size(), dst, std::forward<Args>(args)...);
  1366. } catch (...) {
  1367. eraseBlank(pos, hp);
  1368. throw;
  1369. }
  1370. adjustSizeAndBeginAfterInsert(pos);
  1371. }
  1372. ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
  1373. ChunkPtr chunk;
  1374. std::size_t index = hp.first;
  1375. std::size_t delta = probeDelta(hp);
  1376. uint8_t hostedOp = 0;
  1377. while (true) {
  1378. index &= chunkMask_;
  1379. chunk = chunks_ + index;
  1380. if (LIKELY(fullness[index] < Chunk::kCapacity)) {
  1381. break;
  1382. }
  1383. chunk->incrOutboundOverflowCount();
  1384. hostedOp = Chunk::kIncrHostedOverflowCount;
  1385. index += delta;
  1386. }
  1387. unsigned itemIndex = fullness[index]++;
  1388. FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
  1389. chunk->setTag(itemIndex, hp.second);
  1390. chunk->adjustHostedOverflowCount(hostedOp);
  1391. return ItemIter{chunk, itemIndex};
  1392. }
  1393. ChunkPtr lastOccupiedChunk() const {
  1394. FOLLY_SAFE_DCHECK(size() > 0, "");
  1395. if (kEnableItemIteration) {
  1396. return begin().chunk();
  1397. } else {
  1398. return chunks_ + chunkMask_;
  1399. }
  1400. }
  1401. template <typename T>
  1402. void directBuildFrom(T&& src) {
  1403. FOLLY_SAFE_DCHECK(src.size() > 0 && chunkMask_ == src.chunkMask_, "");
  1404. // We use std::forward<T> to allow portions of src to be moved out by
  1405. // either beforeBuild or afterBuild, but we are just relying on good
  1406. // behavior of our Policy superclass to ensure that any particular
  1407. // field of this is a donor at most once.
  1408. auto undoState =
  1409. this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
  1410. bool success = false;
  1411. SCOPE_EXIT {
  1412. this->afterBuild(
  1413. undoState, success, src.size(), bucket_count(), std::forward<T>(src));
  1414. };
  1415. // Copy can fail part-way through if a Value copy constructor throws.
  1416. // Failing afterBuild is limited in its cleanup power in this case,
  1417. // because it can't enumerate the items that were actually copied.
  1418. // Fortunately we can divide the situation into cases where all of
  1419. // the state is owned by the table itself (F14Node and F14Value),
  1420. // for which clearImpl() can do partial cleanup, and cases where all
  1421. // of the values are owned by the policy (F14Vector), in which case
  1422. // partial failure should not occur. Sorry for the subtle invariants
  1423. // in the Policy API.
  1424. if (is_trivially_copyable<Item>::value && !this->destroyItemOnClear() &&
  1425. bucket_count() == src.bucket_count()) {
  1426. // most happy path
  1427. auto n = chunkAllocSize(chunkMask_ + 1, bucket_count());
  1428. std::memcpy(&chunks_[0], &src.chunks_[0], n);
  1429. sizeAndPackedBegin_.size_ = src.size();
  1430. if (kEnableItemIteration) {
  1431. auto srcBegin = src.begin();
  1432. sizeAndPackedBegin_.packedBegin() =
  1433. ItemIter{chunks_ + (srcBegin.chunk() - src.chunks_),
  1434. srcBegin.index()}
  1435. .pack();
  1436. }
  1437. } else {
  1438. std::size_t maxChunkIndex = src.lastOccupiedChunk() - src.chunks_;
  1439. // happy path, no rehash but pack items toward bottom of chunk and
  1440. // use copy constructor
  1441. auto srcChunk = &src.chunks_[maxChunkIndex];
  1442. Chunk* dstChunk = &chunks_[maxChunkIndex];
  1443. do {
  1444. dstChunk->copyOverflowInfoFrom(*srcChunk);
  1445. auto iter = srcChunk->occupiedIter();
  1446. if (prefetchBeforeCopy()) {
  1447. for (auto piter = iter; piter.hasNext();) {
  1448. this->prefetchValue(srcChunk->citem(piter.next()));
  1449. }
  1450. }
  1451. std::size_t dstI = 0;
  1452. for (; iter.hasNext(); ++dstI) {
  1453. auto srcI = iter.next();
  1454. auto&& srcArg =
  1455. std::forward<T>(src).buildArgForItem(srcChunk->item(srcI));
  1456. auto dst = dstChunk->itemAddr(dstI);
  1457. this->constructValueAtItem(
  1458. 0, dst, std::forward<decltype(srcArg)>(srcArg));
  1459. dstChunk->setTag(dstI, srcChunk->tag(srcI));
  1460. ++sizeAndPackedBegin_.size_;
  1461. }
  1462. --srcChunk;
  1463. --dstChunk;
  1464. } while (size() != src.size());
  1465. // reset doesn't care about packedBegin, so we don't fix it until the end
  1466. if (kEnableItemIteration) {
  1467. sizeAndPackedBegin_.packedBegin() =
  1468. ItemIter{chunks_ + maxChunkIndex,
  1469. chunks_[maxChunkIndex].lastOccupied().index()}
  1470. .pack();
  1471. }
  1472. }
  1473. success = true;
  1474. }
  1475. template <typename T>
  1476. void rehashBuildFrom(T&& src) {
  1477. FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
  1478. // 1 byte per chunk means < 1 bit per value temporary overhead
  1479. std::array<uint8_t, 256> stackBuf;
  1480. uint8_t* fullness;
  1481. auto cc = chunkMask_ + 1;
  1482. if (cc <= stackBuf.size()) {
  1483. fullness = stackBuf.data();
  1484. } else {
  1485. ByteAlloc a{this->alloc()};
  1486. fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
  1487. }
  1488. SCOPE_EXIT {
  1489. if (cc > stackBuf.size()) {
  1490. ByteAlloc a{this->alloc()};
  1491. std::allocator_traits<ByteAlloc>::deallocate(
  1492. a,
  1493. std::pointer_traits<typename std::allocator_traits<
  1494. ByteAlloc>::pointer>::pointer_to(*fullness),
  1495. cc);
  1496. }
  1497. };
  1498. std::memset(fullness, '\0', cc);
  1499. // We use std::forward<T> to allow portions of src to be moved out by
  1500. // either beforeBuild or afterBuild, but we are just relying on good
  1501. // behavior of our Policy superclass to ensure that any particular
  1502. // field of this is a donor at most once.
  1503. // Exception safety requires beforeBuild to happen after all of the
  1504. // allocate() calls.
  1505. auto undoState =
  1506. this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
  1507. bool success = false;
  1508. SCOPE_EXIT {
  1509. this->afterBuild(
  1510. undoState, success, src.size(), bucket_count(), std::forward<T>(src));
  1511. };
  1512. // The current table is at a valid state at all points for policies
  1513. // in which non-trivial values are owned by the main table (F14Node
  1514. // and F14Value), so reset() will clean things up properly if we
  1515. // fail partway through. For the case that the policy manages value
  1516. // lifecycle (F14Vector) then nothing after beforeBuild can throw and
  1517. // we don't have to worry about partial failure.
  1518. std::size_t srcChunkIndex = src.lastOccupiedChunk() - src.chunks_;
  1519. while (true) {
  1520. auto srcChunk = &src.chunks_[srcChunkIndex];
  1521. auto iter = srcChunk->occupiedIter();
  1522. if (prefetchBeforeRehash()) {
  1523. for (auto piter = iter; piter.hasNext();) {
  1524. this->prefetchValue(srcChunk->item(piter.next()));
  1525. }
  1526. }
  1527. if (srcChunk->hostedOverflowCount() == 0) {
  1528. // all items are in their preferred chunk (no probing), so we
  1529. // don't need to compute any hash values
  1530. while (iter.hasNext()) {
  1531. auto i = iter.next();
  1532. auto& srcItem = srcChunk->item(i);
  1533. auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
  1534. HashPair hp{srcChunkIndex, srcChunk->tag(i)};
  1535. insertAtBlank(
  1536. allocateTag(fullness, hp),
  1537. hp,
  1538. std::forward<decltype(srcArg)>(srcArg));
  1539. }
  1540. } else {
  1541. // any chunk's items might be in here
  1542. while (iter.hasNext()) {
  1543. auto i = iter.next();
  1544. auto& srcItem = srcChunk->item(i);
  1545. auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
  1546. auto const& srcKey = src.keyForValue(srcArg);
  1547. auto hp = splitHash(this->computeKeyHash(srcKey));
  1548. FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
  1549. insertAtBlank(
  1550. allocateTag(fullness, hp),
  1551. hp,
  1552. std::forward<decltype(srcArg)>(srcArg));
  1553. }
  1554. }
  1555. if (srcChunkIndex == 0) {
  1556. break;
  1557. }
  1558. --srcChunkIndex;
  1559. }
  1560. success = true;
  1561. }
  1562. template <typename T>
  1563. FOLLY_NOINLINE void buildFromF14Table(T&& src) {
  1564. FOLLY_SAFE_DCHECK(size() == 0, "");
  1565. if (src.size() == 0) {
  1566. return;
  1567. }
  1568. reserveForInsert(src.size());
  1569. try {
  1570. if (chunkMask_ == src.chunkMask_) {
  1571. directBuildFrom(std::forward<T>(src));
  1572. } else {
  1573. rehashBuildFrom(std::forward<T>(src));
  1574. }
  1575. } catch (...) {
  1576. reset();
  1577. F14LinkCheck<getF14IntrinsicsMode()>::check();
  1578. throw;
  1579. }
  1580. }
  1581. FOLLY_NOINLINE void reserveImpl(
  1582. std::size_t capacity,
  1583. std::size_t origChunkCount,
  1584. std::size_t origMaxSizeWithoutRehash) {
  1585. FOLLY_SAFE_DCHECK(capacity >= size(), "");
  1586. // compute new size
  1587. std::size_t const kInitialCapacity = 2;
  1588. std::size_t const kHalfChunkCapacity =
  1589. (Chunk::kDesiredCapacity / 2) & ~std::size_t{1};
  1590. std::size_t newMaxSizeWithoutRehash;
  1591. std::size_t newChunkCount;
  1592. if (capacity <= kHalfChunkCapacity) {
  1593. newChunkCount = 1;
  1594. newMaxSizeWithoutRehash =
  1595. (capacity < kInitialCapacity) ? kInitialCapacity : kHalfChunkCapacity;
  1596. } else {
  1597. newChunkCount = nextPowTwo((capacity - 1) / Chunk::kDesiredCapacity + 1);
  1598. newMaxSizeWithoutRehash = newChunkCount * Chunk::kDesiredCapacity;
  1599. constexpr std::size_t kMaxChunksWithoutCapacityOverflow =
  1600. (std::numeric_limits<std::size_t>::max)() / Chunk::kDesiredCapacity;
  1601. if (newChunkCount > kMaxChunksWithoutCapacityOverflow ||
  1602. newMaxSizeWithoutRehash > max_size()) {
  1603. throw_exception<std::bad_alloc>();
  1604. }
  1605. }
  1606. if (origMaxSizeWithoutRehash != newMaxSizeWithoutRehash) {
  1607. rehashImpl(
  1608. origChunkCount,
  1609. origMaxSizeWithoutRehash,
  1610. newChunkCount,
  1611. newMaxSizeWithoutRehash);
  1612. }
  1613. }
  1614. void rehashImpl(
  1615. std::size_t origChunkCount,
  1616. std::size_t origMaxSizeWithoutRehash,
  1617. std::size_t newChunkCount,
  1618. std::size_t newMaxSizeWithoutRehash) {
  1619. auto origChunks = chunks_;
  1620. BytePtr rawAllocation;
  1621. auto undoState = this->beforeRehash(
  1622. size(),
  1623. origMaxSizeWithoutRehash,
  1624. newMaxSizeWithoutRehash,
  1625. chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash),
  1626. rawAllocation);
  1627. chunks_ =
  1628. initializeChunks(rawAllocation, newChunkCount, newMaxSizeWithoutRehash);
  1629. FOLLY_SAFE_DCHECK(
  1630. newChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
  1631. chunkMask_ = static_cast<InternalSizeType>(newChunkCount - 1);
  1632. bool success = false;
  1633. SCOPE_EXIT {
  1634. // this SCOPE_EXIT reverts chunks_ and chunkMask_ if necessary
  1635. BytePtr finishedRawAllocation = nullptr;
  1636. std::size_t finishedAllocSize = 0;
  1637. if (LIKELY(success)) {
  1638. if (origMaxSizeWithoutRehash > 0) {
  1639. finishedRawAllocation = std::pointer_traits<BytePtr>::pointer_to(
  1640. *static_cast<uint8_t*>(static_cast<void*>(&*origChunks)));
  1641. finishedAllocSize =
  1642. chunkAllocSize(origChunkCount, origMaxSizeWithoutRehash);
  1643. }
  1644. } else {
  1645. finishedRawAllocation = rawAllocation;
  1646. finishedAllocSize =
  1647. chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash);
  1648. chunks_ = origChunks;
  1649. FOLLY_SAFE_DCHECK(
  1650. origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
  1651. chunkMask_ = static_cast<InternalSizeType>(origChunkCount - 1);
  1652. F14LinkCheck<getF14IntrinsicsMode()>::check();
  1653. }
  1654. this->afterRehash(
  1655. std::move(undoState),
  1656. success,
  1657. size(),
  1658. origMaxSizeWithoutRehash,
  1659. newMaxSizeWithoutRehash,
  1660. finishedRawAllocation,
  1661. finishedAllocSize);
  1662. };
  1663. if (size() == 0) {
  1664. // nothing to do
  1665. } else if (origChunkCount == 1 && newChunkCount == 1) {
  1666. // no mask, no chunk scan, no hash computation, no probing
  1667. auto srcChunk = origChunks;
  1668. auto dstChunk = chunks_;
  1669. std::size_t srcI = 0;
  1670. std::size_t dstI = 0;
  1671. while (dstI < size()) {
  1672. if (LIKELY(srcChunk->occupied(srcI))) {
  1673. dstChunk->setTag(dstI, srcChunk->tag(srcI));
  1674. this->moveItemDuringRehash(
  1675. dstChunk->itemAddr(dstI), srcChunk->item(srcI));
  1676. ++dstI;
  1677. }
  1678. ++srcI;
  1679. }
  1680. if (kEnableItemIteration) {
  1681. sizeAndPackedBegin_.packedBegin() = ItemIter{dstChunk, dstI - 1}.pack();
  1682. }
  1683. } else {
  1684. // 1 byte per chunk means < 1 bit per value temporary overhead
  1685. std::array<uint8_t, 256> stackBuf;
  1686. uint8_t* fullness;
  1687. if (newChunkCount <= stackBuf.size()) {
  1688. fullness = stackBuf.data();
  1689. } else {
  1690. ByteAlloc a{this->alloc()};
  1691. // may throw
  1692. fullness =
  1693. &*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
  1694. }
  1695. std::memset(fullness, '\0', newChunkCount);
  1696. SCOPE_EXIT {
  1697. if (newChunkCount > stackBuf.size()) {
  1698. ByteAlloc a{this->alloc()};
  1699. std::allocator_traits<ByteAlloc>::deallocate(
  1700. a,
  1701. std::pointer_traits<typename std::allocator_traits<
  1702. ByteAlloc>::pointer>::pointer_to(*fullness),
  1703. newChunkCount);
  1704. }
  1705. };
  1706. auto srcChunk = origChunks + origChunkCount - 1;
  1707. std::size_t remaining = size();
  1708. while (remaining > 0) {
  1709. auto iter = srcChunk->occupiedIter();
  1710. if (prefetchBeforeRehash()) {
  1711. for (auto piter = iter; piter.hasNext();) {
  1712. this->prefetchValue(srcChunk->item(piter.next()));
  1713. }
  1714. }
  1715. while (iter.hasNext()) {
  1716. --remaining;
  1717. auto srcI = iter.next();
  1718. Item& srcItem = srcChunk->item(srcI);
  1719. auto hp = splitHash(
  1720. this->computeItemHash(const_cast<Item const&>(srcItem)));
  1721. FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
  1722. auto dstIter = allocateTag(fullness, hp);
  1723. this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
  1724. }
  1725. --srcChunk;
  1726. }
  1727. if (kEnableItemIteration) {
  1728. // this code replaces size invocations of adjustSizeAndBeginAfterInsert
  1729. std::size_t i = chunkMask_;
  1730. while (fullness[i] == 0) {
  1731. --i;
  1732. }
  1733. sizeAndPackedBegin_.packedBegin() =
  1734. ItemIter{chunks_ + i, std::size_t{fullness[i]} - 1}.pack();
  1735. }
  1736. }
  1737. success = true;
  1738. }
  1739. void asanOnReserve(std::size_t capacity) {
  1740. if (kIsSanitizeAddress && capacity > size()) {
  1741. asanPendingSafeInserts += capacity - size();
  1742. }
  1743. }
  1744. bool asanShouldAddExtraRehash() {
  1745. if (!kIsSanitizeAddress) {
  1746. return false;
  1747. } else if (asanPendingSafeInserts > 0) {
  1748. --asanPendingSafeInserts;
  1749. return false;
  1750. } else if (size() <= 1) {
  1751. return size() > 0;
  1752. } else {
  1753. constexpr std::size_t kBigPrime = 4294967291U;
  1754. auto s = (asanRehashState += kBigPrime);
  1755. return (s % size()) == 0;
  1756. }
  1757. }
  1758. void asanExtraRehash() {
  1759. auto cc = chunkMask_ + 1;
  1760. auto bc = bucket_count();
  1761. rehashImpl(cc, bc, cc, bc);
  1762. }
  1763. void asanOnInsert() {
  1764. // When running under ASAN, we add a spurious rehash with 1/size()
  1765. // probability before every insert. This means that finding reference
  1766. // stability problems for F14Value and F14Vector is much more likely.
  1767. // The most common pattern that causes this is
  1768. //
  1769. // auto& ref = map[k1]; map[k2] = foo(ref);
  1770. //
  1771. // One way to fix this is to call map.reserve(N) before such a
  1772. // sequence, where N is the number of keys that might be inserted
  1773. // within the section that retains references.
  1774. if (asanShouldAddExtraRehash()) {
  1775. asanExtraRehash();
  1776. }
  1777. }
  1778. public:
  1779. // user has no control over max_load_factor
  1780. void rehash(std::size_t capacity) {
  1781. reserve(capacity);
  1782. }
  1783. void reserve(std::size_t capacity) {
  1784. // We want to support the pattern
  1785. // map.reserve(2); auto& r1 = map[k1]; auto& r2 = map[k2];
  1786. asanOnReserve(capacity);
  1787. reserveImpl(
  1788. std::max<std::size_t>(capacity, size()),
  1789. chunkMask_ + 1,
  1790. bucket_count());
  1791. }
  1792. // Returns true iff a rehash was performed
  1793. void reserveForInsert(size_t incoming = 1) {
  1794. auto capacity = size() + incoming;
  1795. auto bc = bucket_count();
  1796. if (capacity - 1 >= bc) {
  1797. reserveImpl(capacity, chunkMask_ + 1, bc);
  1798. }
  1799. }
  1800. // Returns pos,true if construct, pos,false if found. key is only used
  1801. // during the search; all constructor args for an inserted value come
  1802. // from args... key won't be accessed after args are touched.
  1803. template <typename K, typename... Args>
  1804. std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
  1805. const auto hp = splitHash(this->computeKeyHash(key));
  1806. if (size() > 0) {
  1807. auto existing = findImpl(hp, key);
  1808. if (!existing.atEnd()) {
  1809. return std::make_pair(existing, false);
  1810. }
  1811. }
  1812. asanOnInsert();
  1813. reserveForInsert();
  1814. std::size_t index = hp.first;
  1815. ChunkPtr chunk = chunks_ + (index & chunkMask_);
  1816. auto firstEmpty = chunk->firstEmpty();
  1817. if (!firstEmpty.hasIndex()) {
  1818. std::size_t delta = probeDelta(hp);
  1819. do {
  1820. chunk->incrOutboundOverflowCount();
  1821. index += delta;
  1822. chunk = chunks_ + (index & chunkMask_);
  1823. firstEmpty = chunk->firstEmpty();
  1824. } while (!firstEmpty.hasIndex());
  1825. chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
  1826. }
  1827. std::size_t itemIndex = firstEmpty.index();
  1828. FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
  1829. chunk->setTag(itemIndex, hp.second);
  1830. ItemIter iter{chunk, itemIndex};
  1831. // insertAtBlank will clear the tag if the constructor throws
  1832. insertAtBlank(iter, hp, std::forward<Args>(args)...);
  1833. return std::make_pair(iter, true);
  1834. }
  1835. private:
  1836. template <bool Reset>
  1837. void clearImpl() noexcept {
  1838. if (chunks_ == Chunk::emptyInstance()) {
  1839. FOLLY_SAFE_DCHECK(empty() && bucket_count() == 0, "");
  1840. return;
  1841. }
  1842. // turn clear into reset if the table is >= 16 chunks so that
  1843. // we don't get too low a load factor
  1844. bool willReset = Reset || chunkMask_ + 1 >= 16;
  1845. auto origSize = size();
  1846. auto origCapacity = bucket_count();
  1847. if (willReset) {
  1848. this->beforeReset(origSize, origCapacity);
  1849. } else {
  1850. this->beforeClear(origSize, origCapacity);
  1851. }
  1852. if (!empty()) {
  1853. if (destroyItemOnClear()) {
  1854. for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
  1855. ChunkPtr chunk = chunks_ + ci;
  1856. auto iter = chunk->occupiedIter();
  1857. if (prefetchBeforeDestroy()) {
  1858. for (auto piter = iter; piter.hasNext();) {
  1859. this->prefetchValue(chunk->item(piter.next()));
  1860. }
  1861. }
  1862. while (iter.hasNext()) {
  1863. this->destroyItem(chunk->item(iter.next()));
  1864. }
  1865. }
  1866. }
  1867. if (!willReset) {
  1868. // It's okay to do this in a separate loop because we only do it
  1869. // when the chunk count is small. That avoids a branch when we
  1870. // are promoting a clear to a reset for a large table.
  1871. auto c0c = chunks_[0].chunk0Capacity();
  1872. for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
  1873. chunks_[ci].clear();
  1874. }
  1875. chunks_[0].markEof(c0c);
  1876. }
  1877. if (kEnableItemIteration) {
  1878. sizeAndPackedBegin_.packedBegin() = ItemIter{}.pack();
  1879. }
  1880. sizeAndPackedBegin_.size_ = 0;
  1881. }
  1882. if (willReset) {
  1883. BytePtr rawAllocation = std::pointer_traits<BytePtr>::pointer_to(
  1884. *static_cast<uint8_t*>(static_cast<void*>(&*chunks_)));
  1885. std::size_t rawSize = chunkAllocSize(chunkMask_ + 1, bucket_count());
  1886. chunks_ = Chunk::emptyInstance();
  1887. chunkMask_ = 0;
  1888. this->afterReset(origSize, origCapacity, rawAllocation, rawSize);
  1889. } else {
  1890. this->afterClear(origSize, origCapacity);
  1891. }
  1892. }
  1893. void eraseImpl(ItemIter pos, HashPair hp) {
  1894. this->destroyItem(pos.item());
  1895. adjustSizeAndBeginBeforeErase(pos);
  1896. eraseBlank(pos, hp);
  1897. }
  1898. public:
  1899. // The item needs to still be hashable during this call. If you want
  1900. // to intercept the value before it is destroyed (to extract it, for
  1901. // example), use eraseIterInto(pos, beforeDestroy).
  1902. void eraseIter(ItemIter pos) {
  1903. eraseIterInto(pos, [](value_type&&) {});
  1904. }
  1905. // The item needs to still be hashable during this call. If you want
  1906. // to intercept the value before it is destroyed (to extract it, for
  1907. // example), do so in the beforeDestroy callback.
  1908. template <typename BeforeDestroy>
  1909. void eraseIterInto(ItemIter pos, BeforeDestroy&& beforeDestroy) {
  1910. HashPair hp{};
  1911. if (pos.chunk()->hostedOverflowCount() != 0) {
  1912. hp = splitHash(this->computeItemHash(pos.citem()));
  1913. }
  1914. beforeDestroy(this->valueAtItemForExtract(pos.item()));
  1915. eraseImpl(pos, hp);
  1916. }
  1917. template <typename K>
  1918. std::size_t eraseKey(K const& key) {
  1919. return eraseKeyInto(key, [](value_type&&) {});
  1920. }
  1921. template <typename K, typename BeforeDestroy>
  1922. std::size_t eraseKeyInto(K const& key, BeforeDestroy&& beforeDestroy) {
  1923. if (UNLIKELY(size() == 0)) {
  1924. return 0;
  1925. }
  1926. auto hp = splitHash(this->computeKeyHash(key));
  1927. auto iter = findImpl(hp, key);
  1928. if (!iter.atEnd()) {
  1929. beforeDestroy(this->valueAtItemForExtract(iter.item()));
  1930. eraseImpl(iter, hp);
  1931. return 1;
  1932. } else {
  1933. return 0;
  1934. }
  1935. }
  1936. void clear() noexcept {
  1937. if (kIsSanitizeAddress) {
  1938. // force recycling of heap memory
  1939. auto bc = bucket_count();
  1940. reset();
  1941. try {
  1942. reserveImpl(bc, 0, 0);
  1943. } catch (std::bad_alloc const&) {
  1944. // ASAN mode only, keep going
  1945. }
  1946. } else {
  1947. clearImpl<false>();
  1948. }
  1949. }
  1950. // Like clear(), but always frees all dynamic storage allocated
  1951. // by the table.
  1952. void reset() noexcept {
  1953. clearImpl<true>();
  1954. }
  1955. // Get memory footprint, not including sizeof(*this).
  1956. std::size_t getAllocatedMemorySize() const {
  1957. std::size_t sum = 0;
  1958. visitAllocationClasses(
  1959. [&sum](std::size_t bytes, std::size_t n) { sum += bytes * n; });
  1960. return sum;
  1961. }
  1962. // Enumerates classes of allocated memory blocks currently owned
  1963. // by this table, calling visitor(allocationSize, allocationCount).
  1964. // This can be used to get a more accurate indication of memory footprint
  1965. // than getAllocatedMemorySize() if you have some way of computing the
  1966. // internal fragmentation of the allocator, such as JEMalloc's nallocx.
  1967. // The visitor might be called twice with the same allocationSize. The
  1968. // visitor's computation should produce the same result for visitor(8,
  1969. // 2) as for two calls to visitor(8, 1), for example. The visitor may
  1970. // be called with a zero allocationCount.
  1971. template <typename V>
  1972. void visitAllocationClasses(V&& visitor) const {
  1973. auto bc = bucket_count();
  1974. this->visitPolicyAllocationClasses(
  1975. (bc == 0 ? 0 : chunkAllocSize(chunkMask_ + 1, bc)),
  1976. size(),
  1977. bc,
  1978. visitor);
  1979. }
  1980. // visitor should take an Item const&
  1981. template <typename V>
  1982. void visitItems(V&& visitor) const {
  1983. if (empty()) {
  1984. return;
  1985. }
  1986. std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
  1987. auto chunk = &chunks_[0];
  1988. for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
  1989. auto iter = chunk->occupiedIter();
  1990. if (prefetchBeforeCopy()) {
  1991. for (auto piter = iter; piter.hasNext();) {
  1992. this->prefetchValue(chunk->citem(piter.next()));
  1993. }
  1994. }
  1995. while (iter.hasNext()) {
  1996. visitor(chunk->citem(iter.next()));
  1997. }
  1998. }
  1999. }
  2000. // visitor should take two Item const*
  2001. template <typename V>
  2002. void visitContiguousItemRanges(V&& visitor) const {
  2003. if (empty()) {
  2004. return;
  2005. }
  2006. std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
  2007. auto chunk = &chunks_[0];
  2008. for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
  2009. for (auto iter = chunk->occupiedRangeIter(); iter.hasNext();) {
  2010. auto be = iter.next();
  2011. FOLLY_SAFE_DCHECK(
  2012. chunk->occupied(be.first) && chunk->occupied(be.second - 1), "");
  2013. Item const* b = chunk->itemAddr(be.first);
  2014. visitor(b, b + (be.second - be.first));
  2015. }
  2016. }
  2017. }
  2018. private:
  2019. static std::size_t& histoAt(
  2020. std::vector<std::size_t>& histo,
  2021. std::size_t index) {
  2022. if (histo.size() <= index) {
  2023. histo.resize(index + 1);
  2024. }
  2025. return histo.at(index);
  2026. }
  2027. public:
  2028. // Expensive
  2029. F14TableStats computeStats() const {
  2030. F14TableStats stats;
  2031. if (kIsDebug && kEnableItemIteration) {
  2032. // validate iteration
  2033. std::size_t n = 0;
  2034. ItemIter prev;
  2035. for (auto iter = begin(); iter != end(); iter.advance()) {
  2036. FOLLY_SAFE_DCHECK(n == 0 || iter.pack() < prev.pack(), "");
  2037. ++n;
  2038. prev = iter;
  2039. }
  2040. FOLLY_SAFE_DCHECK(n == size(), "");
  2041. }
  2042. FOLLY_SAFE_DCHECK(
  2043. (chunks_ == Chunk::emptyInstance()) == (bucket_count() == 0), "");
  2044. std::size_t n1 = 0;
  2045. std::size_t n2 = 0;
  2046. auto cc = bucket_count() == 0 ? 0 : chunkMask_ + 1;
  2047. for (std::size_t ci = 0; ci < cc; ++ci) {
  2048. ChunkPtr chunk = chunks_ + ci;
  2049. FOLLY_SAFE_DCHECK(chunk->eof() == (ci == 0), "");
  2050. auto iter = chunk->occupiedIter();
  2051. std::size_t chunkOccupied = 0;
  2052. for (auto piter = iter; piter.hasNext(); piter.next()) {
  2053. ++chunkOccupied;
  2054. }
  2055. n1 += chunkOccupied;
  2056. histoAt(stats.chunkOccupancyHisto, chunkOccupied)++;
  2057. histoAt(
  2058. stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
  2059. histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
  2060. while (iter.hasNext()) {
  2061. auto ii = iter.next();
  2062. ++n2;
  2063. {
  2064. auto& item = chunk->citem(ii);
  2065. auto hp = splitHash(this->computeItemHash(item));
  2066. FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
  2067. std::size_t dist = 1;
  2068. std::size_t index = hp.first;
  2069. std::size_t delta = probeDelta(hp);
  2070. while ((index & chunkMask_) != ci) {
  2071. index += delta;
  2072. ++dist;
  2073. }
  2074. histoAt(stats.keyProbeLengthHisto, dist)++;
  2075. }
  2076. // misses could have any tag, so we do the dumb but accurate
  2077. // thing and just try them all
  2078. for (std::size_t ti = 0; ti < 256; ++ti) {
  2079. uint8_t tag = static_cast<uint8_t>(ti == 0 ? 1 : 0);
  2080. HashPair hp{ci, tag};
  2081. std::size_t dist = 1;
  2082. std::size_t index = hp.first;
  2083. std::size_t delta = probeDelta(hp);
  2084. for (std::size_t tries = 0; tries <= chunkMask_ &&
  2085. chunks_[index & chunkMask_].outboundOverflowCount() != 0;
  2086. ++tries) {
  2087. index += delta;
  2088. ++dist;
  2089. }
  2090. histoAt(stats.missProbeLengthHisto, dist)++;
  2091. }
  2092. }
  2093. }
  2094. FOLLY_SAFE_DCHECK(n1 == size(), "");
  2095. FOLLY_SAFE_DCHECK(n2 == size(), "");
  2096. #if FOLLY_HAS_RTTI
  2097. stats.policy = typeid(Policy).name();
  2098. #endif
  2099. stats.size = size();
  2100. stats.valueSize = sizeof(value_type);
  2101. stats.bucketCount = bucket_count();
  2102. stats.chunkCount = cc;
  2103. stats.totalBytes = sizeof(*this) + getAllocatedMemorySize();
  2104. stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
  2105. return stats;
  2106. }
  2107. };
  2108. } // namespace detail
  2109. } // namespace f14
  2110. #endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
  2111. } // namespace folly