123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430 |
- /*
- * Copyright 2017-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #pragma once
- #include <cstddef>
- #include <cstdint>
- #include <cstring>
- #include <array>
- #include <iterator>
- #include <limits>
- #include <memory>
- #include <new>
- #include <type_traits>
- #include <utility>
- #include <vector>
- #include <folly/Bits.h>
- #include <folly/ConstexprMath.h>
- #include <folly/Likely.h>
- #include <folly/Portability.h>
- #include <folly/ScopeGuard.h>
- #include <folly/Traits.h>
- #include <folly/functional/ApplyTuple.h>
- #include <folly/functional/Invoke.h>
- #include <folly/lang/Align.h>
- #include <folly/lang/Assume.h>
- #include <folly/lang/Exception.h>
- #include <folly/lang/Launder.h>
- #include <folly/lang/SafeAssert.h>
- #include <folly/portability/Builtins.h>
- #include <folly/container/detail/F14Defaults.h>
- #include <folly/container/detail/F14IntrinsicsAvailability.h>
- #if FOLLY_ASAN_ENABLED && defined(FOLLY_TLS)
- #define FOLLY_F14_TLS_IF_ASAN FOLLY_TLS
- #else
- #define FOLLY_F14_TLS_IF_ASAN
- #endif
- #if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
- #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
- #if FOLLY_NEON
- #include <arm_acle.h> // __crc32cd
- #else
- #include <nmmintrin.h> // _mm_crc32_u64
- #endif
- #else
- #ifdef _WIN32
- #include <intrin.h> // _mul128 in fallback bit mixer
- #endif
- #endif
- #if FOLLY_NEON
- #include <arm_neon.h> // uint8x16t intrinsics
- #else // SSE2
- #include <immintrin.h> // __m128i intrinsics
- #include <xmmintrin.h> // _mm_prefetch
- #endif
- #endif
- namespace folly {
- struct F14TableStats {
- char const* policy;
- std::size_t size{0};
- std::size_t valueSize{0};
- std::size_t bucketCount{0};
- std::size_t chunkCount{0};
- std::vector<std::size_t> chunkOccupancyHisto;
- std::vector<std::size_t> chunkOutboundOverflowHisto;
- std::vector<std::size_t> chunkHostedOverflowHisto;
- std::vector<std::size_t> keyProbeLengthHisto;
- std::vector<std::size_t> missProbeLengthHisto;
- std::size_t totalBytes{0};
- std::size_t overheadBytes{0};
- private:
- template <typename T>
- static auto computeHelper(T const* m) -> decltype(m->computeStats()) {
- return m->computeStats();
- }
- static F14TableStats computeHelper(...) {
- return {};
- }
- public:
- template <typename T>
- static F14TableStats compute(T const& m) {
- return computeHelper(&m);
- }
- };
- namespace f14 {
- namespace detail {
- template <F14IntrinsicsMode>
- struct F14LinkCheck {};
- template <>
- struct F14LinkCheck<getF14IntrinsicsMode()> {
- // The purpose of this method is to trigger a link failure if
- // compilation flags vary across compilation units. The definition
- // is in F14Table.cpp, so only one of F14LinkCheck<None>::check,
- // F14LinkCheck<Simd>::check, or F14LinkCheck<SimdAndCrc>::check will
- // be available at link time.
- //
- // To cause a link failure the function must be invoked in code that
- // is not optimized away, so we call it on a couple of cold paths
- // (exception handling paths in copy construction and rehash). LTO may
- // remove it entirely, but that's fine.
- static void check() noexcept;
- };
- #if defined(_LIBCPP_VERSION)
- template <typename K, typename V, typename H>
- struct StdNodeReplica {
- void* next;
- std::size_t hash;
- V value;
- };
- #else
- template <typename H>
- struct StdIsFastHash : std::true_type {};
- template <>
- struct StdIsFastHash<std::hash<long double>> : std::false_type {};
- template <typename... Args>
- struct StdIsFastHash<std::hash<std::basic_string<Args...>>> : std::false_type {
- };
- // TODO: add specialization for std::basic_string_view
- // mimic internal node of unordered containers in STL to estimate the size
- template <typename K, typename V, typename H, typename Enable = void>
- struct StdNodeReplica {
- void* next;
- V value;
- };
- template <typename K, typename V, typename H>
- struct StdNodeReplica<
- K,
- V,
- H,
- std::enable_if_t<
- !StdIsFastHash<H>::value || !is_nothrow_invocable<H, K>::value>> {
- void* next;
- V value;
- std::size_t hash;
- };
- #endif
- } // namespace detail
- } // namespace f14
- #if FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
- namespace f14 {
- namespace detail {
- template <typename Policy>
- class F14Table;
- } // namespace detail
- } // namespace f14
- class F14HashToken final {
- public:
- F14HashToken() = default;
- private:
- using HashPair = std::pair<std::size_t, std::size_t>;
- explicit F14HashToken(HashPair hp) : hp_(hp) {}
- explicit operator HashPair() const {
- return hp_;
- }
- HashPair hp_;
- template <typename Policy>
- friend class f14::detail::F14Table;
- };
- namespace f14 {
- namespace detail {
- //// Defaults should be selected using void
- template <typename Arg, typename Default>
- using VoidDefault =
- std::conditional_t<std::is_same<Arg, Default>::value, void, Arg>;
- template <typename Arg, typename Default>
- using Defaulted =
- typename std::conditional_t<std::is_same<Arg, void>::value, Default, Arg>;
- template <
- typename TableKey,
- typename Hasher,
- typename KeyEqual,
- typename ArgKey,
- typename Void = void>
- struct EligibleForHeterogeneousFind : std::false_type {};
- template <
- typename TableKey,
- typename Hasher,
- typename KeyEqual,
- typename ArgKey>
- struct EligibleForHeterogeneousFind<
- TableKey,
- Hasher,
- KeyEqual,
- ArgKey,
- void_t<typename Hasher::is_transparent, typename KeyEqual::is_transparent>>
- : std::true_type {};
- template <
- typename TableKey,
- typename Hasher,
- typename KeyEqual,
- typename ArgKey>
- using EligibleForHeterogeneousInsert = Conjunction<
- EligibleForHeterogeneousFind<TableKey, Hasher, KeyEqual, ArgKey>,
- std::is_constructible<TableKey, ArgKey>>;
- template <
- typename TableKey,
- typename Hasher,
- typename KeyEqual,
- typename KeyArg0OrBool,
- typename... KeyArgs>
- using KeyTypeForEmplaceHelper = std::conditional_t<
- sizeof...(KeyArgs) == 1 &&
- (std::is_same<remove_cvref_t<KeyArg0OrBool>, TableKey>::value ||
- EligibleForHeterogeneousFind<
- TableKey,
- Hasher,
- KeyEqual,
- KeyArg0OrBool>::value),
- KeyArg0OrBool&&,
- TableKey>;
- template <
- typename TableKey,
- typename Hasher,
- typename KeyEqual,
- typename... KeyArgs>
- using KeyTypeForEmplace = KeyTypeForEmplaceHelper<
- TableKey,
- Hasher,
- KeyEqual,
- std::tuple_element_t<0, std::tuple<KeyArgs..., bool>>,
- KeyArgs...>;
- ////////////////
- template <typename T>
- FOLLY_ALWAYS_INLINE static void prefetchAddr(T const* ptr) {
- #ifndef _WIN32
- __builtin_prefetch(static_cast<void const*>(ptr));
- #elif FOLLY_NEON
- __prefetch(static_cast<void const*>(ptr));
- #else
- _mm_prefetch(
- static_cast<char const*>(static_cast<void const*>(ptr)), _MM_HINT_T0);
- #endif
- }
- template <typename T>
- FOLLY_ALWAYS_INLINE static unsigned findFirstSetNonZero(T mask) {
- assume(mask != 0);
- if (sizeof(mask) == sizeof(unsigned)) {
- return __builtin_ctz(static_cast<unsigned>(mask));
- } else {
- return __builtin_ctzll(mask);
- }
- }
- #if FOLLY_NEON
- using TagVector = uint8x16_t;
- using MaskType = uint64_t;
- constexpr unsigned kMaskSpacing = 4;
- #else // SSE2
- using TagVector = __m128i;
- using MaskType = uint32_t;
- constexpr unsigned kMaskSpacing = 1;
- #endif
- // We could use unaligned loads to relax this requirement, but that
- // would be both a performance penalty and require a bulkier packed
- // ItemIter format
- constexpr std::size_t kRequiredVectorAlignment =
- constexpr_max(std::size_t{16}, alignof(max_align_t));
- using EmptyTagVectorType = std::aligned_storage_t<
- sizeof(TagVector) + kRequiredVectorAlignment,
- alignof(max_align_t)>;
- extern EmptyTagVectorType kEmptyTagVector;
- extern FOLLY_F14_TLS_IF_ASAN std::size_t asanPendingSafeInserts;
- extern FOLLY_F14_TLS_IF_ASAN std::size_t asanRehashState;
- template <unsigned BitCount>
- struct FullMask {
- static constexpr MaskType value =
- (FullMask<BitCount - 1>::value << kMaskSpacing) + 1;
- };
- template <>
- struct FullMask<1> : std::integral_constant<MaskType, 1> {};
- #if FOLLY_ARM
- // Mask iteration is different for ARM because that is the only platform
- // for which the mask is bigger than a register.
- // Iterates a mask, optimized for the case that only a few bits are set
- class SparseMaskIter {
- static_assert(kMaskSpacing == 4, "");
- uint32_t interleavedMask_;
- public:
- explicit SparseMaskIter(MaskType mask)
- : interleavedMask_{static_cast<uint32_t>(((mask >> 32) << 2) | mask)} {}
- bool hasNext() {
- return interleavedMask_ != 0;
- }
- unsigned next() {
- FOLLY_SAFE_DCHECK(hasNext(), "");
- unsigned i = findFirstSetNonZero(interleavedMask_);
- interleavedMask_ &= (interleavedMask_ - 1);
- return ((i >> 2) | (i << 2)) & 0xf;
- }
- };
- // Iterates a mask, optimized for the case that most bits are set
- class DenseMaskIter {
- static_assert(kMaskSpacing == 4, "");
- std::size_t count_;
- unsigned index_;
- uint8_t const* tags_;
- public:
- explicit DenseMaskIter(uint8_t const* tags, MaskType mask) {
- if (mask == 0) {
- count_ = 0;
- } else {
- count_ = popcount(static_cast<uint32_t>(((mask >> 32) << 2) | mask));
- if (LIKELY((mask & 1) != 0)) {
- index_ = 0;
- } else {
- index_ = findFirstSetNonZero(mask) / kMaskSpacing;
- }
- tags_ = tags;
- }
- }
- bool hasNext() {
- return count_ > 0;
- }
- unsigned next() {
- auto rv = index_;
- --count_;
- if (count_ > 0) {
- do {
- ++index_;
- } while ((tags_[index_] & 0x80) == 0);
- }
- FOLLY_SAFE_DCHECK(index_ < 16, "");
- return rv;
- }
- };
- #else
- // Iterates a mask, optimized for the case that only a few bits are set
- class SparseMaskIter {
- MaskType mask_;
- public:
- explicit SparseMaskIter(MaskType mask) : mask_{mask} {}
- bool hasNext() {
- return mask_ != 0;
- }
- unsigned next() {
- FOLLY_SAFE_DCHECK(hasNext(), "");
- unsigned i = findFirstSetNonZero(mask_);
- mask_ &= (mask_ - 1);
- return i / kMaskSpacing;
- }
- };
- // Iterates a mask, optimized for the case that most bits are set
- class DenseMaskIter {
- MaskType mask_;
- unsigned index_{0};
- public:
- explicit DenseMaskIter(uint8_t const*, MaskType mask) : mask_{mask} {}
- bool hasNext() {
- return mask_ != 0;
- }
- unsigned next() {
- FOLLY_SAFE_DCHECK(hasNext(), "");
- if (LIKELY((mask_ & 1) != 0)) {
- mask_ >>= kMaskSpacing;
- return index_++;
- } else {
- unsigned s = findFirstSetNonZero(mask_);
- unsigned rv = index_ + (s / kMaskSpacing);
- mask_ >>= (s + kMaskSpacing);
- index_ = rv + 1;
- return rv;
- }
- }
- };
- #endif
- // Iterates a mask, returning pairs of [begin,end) index covering blocks
- // of set bits
- class MaskRangeIter {
- MaskType mask_;
- unsigned shift_{0};
- public:
- explicit MaskRangeIter(MaskType mask) {
- // If kMaskSpacing is > 1 then there will be empty bits even for
- // contiguous ranges. Fill them in.
- mask_ = mask * ((1 << kMaskSpacing) - 1);
- }
- bool hasNext() {
- return mask_ != 0;
- }
- std::pair<unsigned, unsigned> next() {
- FOLLY_SAFE_DCHECK(hasNext(), "");
- auto s = shift_;
- unsigned b = findFirstSetNonZero(mask_);
- unsigned e = findFirstSetNonZero(~(mask_ | (mask_ - 1)));
- mask_ >>= e;
- shift_ = s + e;
- return std::make_pair((s + b) / kMaskSpacing, (s + e) / kMaskSpacing);
- }
- };
- // Holds the result of an index query that has an optional result,
- // interpreting a mask of 0 to be the empty answer and the index of the
- // last set bit to be the non-empty answer
- class LastOccupiedInMask {
- MaskType mask_;
- public:
- explicit LastOccupiedInMask(MaskType mask) : mask_{mask} {}
- bool hasIndex() const {
- return mask_ != 0;
- }
- unsigned index() const {
- assume(mask_ != 0);
- return (findLastSet(mask_) - 1) / kMaskSpacing;
- }
- };
- // Holds the result of an index query that has an optional result,
- // interpreting a mask of 0 to be the empty answer and the index of the
- // first set bit to be the non-empty answer
- class FirstEmptyInMask {
- MaskType mask_;
- public:
- explicit FirstEmptyInMask(MaskType mask) : mask_{mask} {}
- bool hasIndex() const {
- return mask_ != 0;
- }
- unsigned index() const {
- FOLLY_SAFE_DCHECK(mask_ != 0, "");
- return findFirstSetNonZero(mask_) / kMaskSpacing;
- }
- };
- template <typename ItemType>
- struct alignas(kRequiredVectorAlignment) F14Chunk {
- using Item = ItemType;
- // For our 16 byte vector alignment (and assuming alignof(Item) >=
- // 4) kCapacity of 14 is the most space efficient. Slightly smaller
- // or larger capacities can help with cache alignment in a couple of
- // cases without wasting too much space, but once the items are larger
- // then we're unlikely to get much benefit anyway. The only case we
- // optimize is using kCapacity of 12 for 4 byte items, which makes the
- // chunk take exactly 1 cache line, and adding 16 bytes of padding for
- // 16 byte items so that a chunk takes exactly 4 cache lines.
- static constexpr unsigned kCapacity = sizeof(Item) == 4 ? 12 : 14;
- static constexpr unsigned kDesiredCapacity = kCapacity - 2;
- static constexpr unsigned kAllocatedCapacity =
- kCapacity + (sizeof(Item) == 16 ? 1 : 0);
- static constexpr MaskType kFullMask = FullMask<kCapacity>::value;
- // Non-empty tags have their top bit set. tags_ array might be bigger
- // than kCapacity to keep alignment of first item.
- std::array<uint8_t, 14> tags_;
- // Bits 0..3 record the actual capacity of the chunk if this is chunk
- // zero, or hold 0000 for other chunks. Bits 4-7 are a 4-bit counter
- // of the number of values in this chunk that were placed because they
- // overflowed their desired chunk (hostedOverflowCount).
- uint8_t control_;
- // The number of values that would have been placed into this chunk if
- // there had been space, including values that also overflowed previous
- // full chunks. This value saturates; once it becomes 255 it no longer
- // increases nor decreases.
- uint8_t outboundOverflowCount_;
- std::array<
- std::aligned_storage_t<sizeof(Item), alignof(Item)>,
- kAllocatedCapacity>
- rawItems_;
- static F14Chunk* emptyInstance() {
- auto raw = reinterpret_cast<char*>(&kEmptyTagVector);
- if (kRequiredVectorAlignment > alignof(max_align_t)) {
- auto delta = kRequiredVectorAlignment -
- (reinterpret_cast<uintptr_t>(raw) % kRequiredVectorAlignment);
- raw += delta;
- }
- auto rv = reinterpret_cast<F14Chunk*>(raw);
- FOLLY_SAFE_DCHECK(
- (reinterpret_cast<uintptr_t>(rv) % kRequiredVectorAlignment) == 0, "");
- return rv;
- }
- void clear() {
- // tags_ = {}; control_ = 0; outboundOverflowCount_ = 0;
- // gcc < 6 doesn't exploit chunk alignment to generate the optimal
- // SSE clear from memset. This is very hot code, so it is worth
- // handling that case specially.
- #if FOLLY_SSE >= 2 && __GNUC__ <= 5 && !__clang__
- // this doesn't violate strict aliasing rules because __m128i is
- // tagged as __may_alias__
- auto* v = static_cast<__m128i*>(static_cast<void*>(&tags_[0]));
- _mm_store_si128(v, _mm_setzero_si128());
- #else
- std::memset(&tags_[0], '\0', 16);
- #endif
- }
- void copyOverflowInfoFrom(F14Chunk const& rhs) {
- FOLLY_SAFE_DCHECK(hostedOverflowCount() == 0, "");
- control_ += static_cast<uint8_t>(rhs.control_ & 0xf0);
- outboundOverflowCount_ = rhs.outboundOverflowCount_;
- }
- unsigned hostedOverflowCount() const {
- return control_ >> 4;
- }
- static constexpr uint8_t kIncrHostedOverflowCount = 0x10;
- static constexpr uint8_t kDecrHostedOverflowCount =
- static_cast<uint8_t>(-0x10);
- void adjustHostedOverflowCount(uint8_t op) {
- control_ += op;
- }
- bool eof() const {
- return (control_ & 0xf) != 0;
- }
- std::size_t chunk0Capacity() const {
- return control_ & 0xf;
- }
- void markEof(std::size_t c0c) {
- FOLLY_SAFE_DCHECK(
- this != emptyInstance() && control_ == 0 && c0c > 0 && c0c <= 0xf &&
- c0c <= kCapacity,
- "");
- control_ = static_cast<uint8_t>(c0c);
- }
- unsigned outboundOverflowCount() const {
- return outboundOverflowCount_;
- }
- void incrOutboundOverflowCount() {
- if (outboundOverflowCount_ != 255) {
- ++outboundOverflowCount_;
- }
- }
- void decrOutboundOverflowCount() {
- if (outboundOverflowCount_ != 255) {
- --outboundOverflowCount_;
- }
- }
- std::size_t tag(std::size_t index) const {
- return tags_[index];
- }
- void setTag(std::size_t index, std::size_t tag) {
- FOLLY_SAFE_DCHECK(
- this != emptyInstance() && tag >= 0x80 && tag <= 0xff, "");
- tags_[index] = static_cast<uint8_t>(tag);
- }
- void clearTag(std::size_t index) {
- tags_[index] = 0;
- }
- #if FOLLY_NEON
- ////////
- // Tag filtering using NEON intrinsics
- SparseMaskIter tagMatchIter(std::size_t needle) const {
- FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
- uint8x16_t tagV = vld1q_u8(&tags_[0]);
- auto needleV = vdupq_n_u8(static_cast<uint8_t>(needle));
- auto eqV = vceqq_u8(tagV, needleV);
- // get info from every byte into the bottom half of every uint16_t
- // by shifting right 4, then round to get it into a 64-bit vector
- uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(eqV), 4);
- uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
- return SparseMaskIter(mask);
- }
- MaskType occupiedMask() const {
- uint8x16_t tagV = vld1q_u8(&tags_[0]);
- // signed shift extends top bit to all bits
- auto occupiedV =
- vreinterpretq_u8_s8(vshrq_n_s8(vreinterpretq_s8_u8(tagV), 7));
- uint8x8_t maskV = vshrn_n_u16(vreinterpretq_u16_u8(occupiedV), 4);
- return vget_lane_u64(vreinterpret_u64_u8(maskV), 0) & kFullMask;
- }
- #else
- ////////
- // Tag filtering using SSE2 intrinsics
- TagVector const* tagVector() const {
- return static_cast<TagVector const*>(static_cast<void const*>(&tags_[0]));
- }
- SparseMaskIter tagMatchIter(std::size_t needle) const {
- FOLLY_SAFE_DCHECK(needle >= 0x80 && needle < 0x100, "");
- auto tagV = _mm_load_si128(tagVector());
- // TRICKY! It may seem strange to have a std::size_t needle and narrow
- // it at the last moment, rather than making HashPair::second be a
- // uint8_t, but the latter choice sometimes leads to a performance
- // problem.
- //
- // On architectures with SSE2 but not AVX2, _mm_set1_epi8 expands
- // to multiple instructions. One of those is a MOVD of either 4 or
- // 8 byte width. Only the bottom byte of that move actually affects
- // the result, but if a 1-byte needle has been spilled then this will
- // be a 4 byte load. GCC 5.5 has been observed to reload needle
- // (or perhaps fuse a reload and part of a previous static_cast)
- // needle using a MOVZX with a 1 byte load in parallel with the MOVD.
- // This combination causes a failure of store-to-load forwarding,
- // which has a big performance penalty (60 nanoseconds per find on
- // a microbenchmark). Keeping needle >= 4 bytes avoids the problem
- // and also happens to result in slightly more compact assembly.
- auto needleV = _mm_set1_epi8(static_cast<uint8_t>(needle));
- auto eqV = _mm_cmpeq_epi8(tagV, needleV);
- auto mask = _mm_movemask_epi8(eqV) & kFullMask;
- return SparseMaskIter{mask};
- }
- MaskType occupiedMask() const {
- auto tagV = _mm_load_si128(tagVector());
- return _mm_movemask_epi8(tagV) & kFullMask;
- }
- #endif
- DenseMaskIter occupiedIter() const {
- return DenseMaskIter{&tags_[0], occupiedMask()};
- }
- MaskRangeIter occupiedRangeIter() const {
- return MaskRangeIter{occupiedMask()};
- }
- LastOccupiedInMask lastOccupied() const {
- return LastOccupiedInMask{occupiedMask()};
- }
- FirstEmptyInMask firstEmpty() const {
- return FirstEmptyInMask{occupiedMask() ^ kFullMask};
- }
- bool occupied(std::size_t index) const {
- FOLLY_SAFE_DCHECK(tags_[index] == 0 || (tags_[index] & 0x80) != 0, "");
- return tags_[index] != 0;
- }
- Item* itemAddr(std::size_t i) const {
- return static_cast<Item*>(
- const_cast<void*>(static_cast<void const*>(&rawItems_[i])));
- }
- Item& item(std::size_t i) {
- FOLLY_SAFE_DCHECK(this->occupied(i), "");
- return *launder(itemAddr(i));
- }
- Item const& citem(std::size_t i) const {
- FOLLY_SAFE_DCHECK(this->occupied(i), "");
- return *launder(itemAddr(i));
- }
- static F14Chunk& owner(Item& item, std::size_t index) {
- auto rawAddr =
- static_cast<uint8_t*>(static_cast<void*>(std::addressof(item))) -
- offsetof(F14Chunk, rawItems_) - index * sizeof(Item);
- auto chunkAddr = static_cast<F14Chunk*>(static_cast<void*>(rawAddr));
- FOLLY_SAFE_DCHECK(std::addressof(item) == chunkAddr->itemAddr(index), "");
- return *chunkAddr;
- }
- };
- ////////////////
- // PackedChunkItemPtr points to an Item in an F14Chunk, allowing both the
- // Item& and its index to be recovered. It sorts by the address of the
- // item, and it only works for items that are in a properly-aligned chunk.
- // generic form, not actually packed
- template <typename Ptr>
- class PackedChunkItemPtr {
- public:
- PackedChunkItemPtr(Ptr p, std::size_t i) noexcept : ptr_{p}, index_{i} {
- FOLLY_SAFE_DCHECK(ptr_ != nullptr || index_ == 0, "");
- }
- Ptr ptr() const {
- return ptr_;
- }
- std::size_t index() const {
- return index_;
- }
- bool operator<(PackedChunkItemPtr const& rhs) const {
- FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
- return ptr_ < rhs.ptr_;
- }
- bool operator==(PackedChunkItemPtr const& rhs) const {
- FOLLY_SAFE_DCHECK(ptr_ != rhs.ptr_ || index_ == rhs.index_, "");
- return ptr_ == rhs.ptr_;
- }
- bool operator!=(PackedChunkItemPtr const& rhs) const {
- return !(*this == rhs);
- }
- private:
- Ptr ptr_;
- std::size_t index_;
- };
- // Bare pointer form, packed into a uintptr_t. Uses only bits wasted by
- // alignment, so it works on 32-bit and 64-bit platforms
- template <typename T>
- class PackedChunkItemPtr<T*> {
- static_assert((alignof(F14Chunk<T>) % 16) == 0, "");
- // Chunks are 16-byte aligned, so we can maintain a packed pointer to a
- // chunk item by packing the 4-bit item index into the least significant
- // bits of a pointer to the chunk itself. This makes ItemIter::pack
- // more expensive, however, since it has to compute the chunk address.
- //
- // Chunk items have varying alignment constraints, so it would seem
- // to be that we can't do a similar trick while using only bit masking
- // operations on the Item* itself. It happens to be, however, that if
- // sizeof(Item) is not a multiple of 16 then we can recover a portion
- // of the index bits from the knowledge that the Item-s are stored in
- // an array that is itself 16-byte aligned.
- //
- // If kAlignBits is the number of trailing zero bits in sizeof(Item)
- // (up to 4), then we can borrow those bits to store kAlignBits of the
- // index directly. We can recover (4 - kAlignBits) bits of the index
- // from the item pointer itself, by defining/observing that
- //
- // A = kAlignBits (A <= 4)
- //
- // S = (sizeof(Item) % 16) >> A (shifted-away bits are all zero)
- //
- // R = (itemPtr % 16) >> A (shifted-away bits are all zero)
- //
- // M = 16 >> A
- //
- // itemPtr % 16 = (index * sizeof(Item)) % 16
- //
- // (R * 2^A) % 16 = (index * (sizeof(Item) % 16)) % 16
- //
- // (R * 2^A) % 16 = (index * 2^A * S) % 16
- //
- // R % M = (index * S) % M
- //
- // S is relatively prime with M, so a multiplicative inverse is easy
- // to compute
- //
- // Sinv = S^(M - 1) % M
- //
- // (R * Sinv) % M = index % M
- //
- // This lets us recover the bottom bits of the index. When sizeof(T)
- // is 8-byte aligned kSizeInverse will always be 1. When sizeof(T)
- // is 4-byte aligned kSizeInverse will be either 1 or 3.
- // returns pow(x, y) % m
- static constexpr uintptr_t powerMod(uintptr_t x, uintptr_t y, uintptr_t m) {
- return y == 0 ? 1 : (x * powerMod(x, y - 1, m)) % m;
- }
- static constexpr uintptr_t kIndexBits = 4;
- static constexpr uintptr_t kIndexMask = (uintptr_t{1} << kIndexBits) - 1;
- static constexpr uintptr_t kAlignBits = constexpr_min(
- uintptr_t{4},
- constexpr_find_first_set(uintptr_t{sizeof(T)}) - 1);
- static constexpr uintptr_t kAlignMask = (uintptr_t{1} << kAlignBits) - 1;
- static constexpr uintptr_t kModulus = uintptr_t{1}
- << (kIndexBits - kAlignBits);
- static constexpr uintptr_t kSizeInverse =
- powerMod(sizeof(T) >> kAlignBits, kModulus - 1, kModulus);
- public:
- PackedChunkItemPtr(T* p, std::size_t i) noexcept {
- uintptr_t encoded = i >> (kIndexBits - kAlignBits);
- assume((encoded & ~kAlignMask) == 0);
- raw_ = reinterpret_cast<uintptr_t>(p) | encoded;
- FOLLY_SAFE_DCHECK(p == ptr(), "");
- FOLLY_SAFE_DCHECK(i == index(), "");
- }
- T* ptr() const {
- return reinterpret_cast<T*>(raw_ & ~kAlignMask);
- }
- std::size_t index() const {
- auto encoded = (raw_ & kAlignMask) << (kIndexBits - kAlignBits);
- auto deduced =
- ((raw_ >> kAlignBits) * kSizeInverse) & (kIndexMask >> kAlignBits);
- return encoded | deduced;
- }
- bool operator<(PackedChunkItemPtr const& rhs) const {
- return raw_ < rhs.raw_;
- }
- bool operator==(PackedChunkItemPtr const& rhs) const {
- return raw_ == rhs.raw_;
- }
- bool operator!=(PackedChunkItemPtr const& rhs) const {
- return !(*this == rhs);
- }
- private:
- uintptr_t raw_;
- };
- template <typename ChunkPtr>
- class F14ItemIter {
- private:
- using Chunk = typename std::pointer_traits<ChunkPtr>::element_type;
- public:
- using Item = typename Chunk::Item;
- using ItemPtr = typename std::pointer_traits<ChunkPtr>::template rebind<Item>;
- using ItemConstPtr =
- typename std::pointer_traits<ChunkPtr>::template rebind<Item const>;
- using Packed = PackedChunkItemPtr<ItemPtr>;
- //// PUBLIC
- F14ItemIter() noexcept : itemPtr_{nullptr}, index_{0} {}
- // default copy and move constructors and assignment operators are correct
- explicit F14ItemIter(Packed const& packed)
- : itemPtr_{packed.ptr()}, index_{packed.index()} {}
- F14ItemIter(ChunkPtr chunk, std::size_t index)
- : itemPtr_{std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index))},
- index_{index} {
- FOLLY_SAFE_DCHECK(index < Chunk::kCapacity, "");
- assume(
- std::pointer_traits<ItemPtr>::pointer_to(chunk->item(index)) !=
- nullptr);
- assume(itemPtr_ != nullptr);
- }
- FOLLY_ALWAYS_INLINE void advanceImpl(bool checkEof, bool likelyDead) {
- auto c = chunk();
- // common case is packed entries
- while (index_ > 0) {
- --index_;
- --itemPtr_;
- if (LIKELY(c->occupied(index_))) {
- return;
- }
- }
- // It's fairly common for an iterator to be advanced and then become
- // dead, for example in the return value from erase(iter) or in
- // the last step of a loop. We'd like to make sure that the entire
- // advance() method can be eliminated by the compiler's dead code
- // elimination pass. To do that it must eliminate the loops, which
- // requires it to prove that they have no side effects. It's easy
- // to show that there are no escaping stores, but at the moment
- // compilers also consider an infinite loop to be a side effect.
- // (There are parts of the standard that would allow them to treat
- // this as undefined behavior, but at the moment they don't exploit
- // those clauses.)
- //
- // The following loop should really be a while loop, which would
- // save a register, some instructions, and a conditional branch,
- // but by writing it as a for loop the compiler can prove to itself
- // that it will eventually terminate. (No matter that even if the
- // loop executed in a single cycle it would take about 200 years to
- // run all 2^64 iterations.)
- //
- // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82776 has the bug we
- // filed about the issue. while (true) {
- for (std::size_t i = 1; !likelyDead || i != 0; ++i) {
- if (checkEof) {
- // exhausted the current chunk
- if (UNLIKELY(c->eof())) {
- FOLLY_SAFE_DCHECK(index_ == 0, "");
- itemPtr_ = nullptr;
- return;
- }
- } else {
- FOLLY_SAFE_DCHECK(!c->eof(), "");
- }
- --c;
- auto last = c->lastOccupied();
- if (checkEof && !likelyDead) {
- prefetchAddr(&*c - 1);
- }
- if (LIKELY(last.hasIndex())) {
- index_ = last.index();
- itemPtr_ = std::pointer_traits<ItemPtr>::pointer_to(c->item(index_));
- return;
- }
- }
- }
- void precheckedAdvance() {
- advanceImpl(false, false);
- }
- FOLLY_ALWAYS_INLINE void advance() {
- advanceImpl(true, false);
- }
- FOLLY_ALWAYS_INLINE void advanceLikelyDead() {
- advanceImpl(true, true);
- }
- ChunkPtr chunk() const {
- return std::pointer_traits<ChunkPtr>::pointer_to(
- Chunk::owner(*itemPtr_, index_));
- }
- std::size_t index() const {
- return index_;
- }
- Item* itemAddr() const {
- return std::addressof(*itemPtr_);
- }
- Item& item() const {
- return *itemPtr_;
- }
- Item const& citem() const {
- return *itemPtr_;
- }
- bool atEnd() const {
- return itemPtr_ == nullptr;
- }
- Packed pack() const {
- return Packed{itemPtr_, static_cast<uint8_t>(index_)};
- }
- bool operator==(F14ItemIter const& rhs) const {
- // this form makes iter == end() into a single null check after inlining
- // and constant propagation
- return itemPtr_ == rhs.itemPtr_;
- }
- bool operator!=(F14ItemIter const& rhs) const {
- return !(*this == rhs);
- }
- private:
- ItemPtr itemPtr_;
- std::size_t index_;
- };
- ////////////////
- template <typename SizeType, typename ItemIter, bool EnablePackedItemIter>
- struct SizeAndPackedBegin {
- SizeType size_{0};
- private:
- typename ItemIter::Packed packedBegin_{ItemIter{}.pack()};
- public:
- typename ItemIter::Packed& packedBegin() {
- return packedBegin_;
- }
- typename ItemIter::Packed const& packedBegin() const {
- return packedBegin_;
- }
- };
- template <typename SizeType, typename ItemIter>
- struct SizeAndPackedBegin<SizeType, ItemIter, false> {
- SizeType size_{0};
- [[noreturn]] typename ItemIter::Packed& packedBegin() {
- assume_unreachable();
- }
- [[noreturn]] typename ItemIter::Packed const& packedBegin() const {
- assume_unreachable();
- }
- };
- template <typename Policy>
- class F14Table : public Policy {
- public:
- using Item = typename Policy::Item;
- using value_type = typename Policy::Value;
- using allocator_type = typename Policy::Alloc;
- private:
- using Alloc = typename Policy::Alloc;
- using AllocTraits = typename Policy::AllocTraits;
- using Hasher = typename Policy::Hasher;
- using InternalSizeType = typename Policy::InternalSizeType;
- using KeyEqual = typename Policy::KeyEqual;
- using Policy::kAllocIsAlwaysEqual;
- using Policy::kDefaultConstructIsNoexcept;
- using Policy::kEnableItemIteration;
- using Policy::kSwapIsNoexcept;
- using Policy::destroyItemOnClear;
- using Policy::isAvalanchingHasher;
- using Policy::prefetchBeforeCopy;
- using Policy::prefetchBeforeDestroy;
- using Policy::prefetchBeforeRehash;
- using ByteAlloc = typename AllocTraits::template rebind_alloc<uint8_t>;
- using BytePtr = typename std::allocator_traits<ByteAlloc>::pointer;
- using Chunk = F14Chunk<Item>;
- using ChunkPtr =
- typename std::pointer_traits<BytePtr>::template rebind<Chunk>;
- using HashPair = typename F14HashToken::HashPair;
- public:
- using ItemIter = F14ItemIter<ChunkPtr>;
- private:
- //////// begin fields
- ChunkPtr chunks_{Chunk::emptyInstance()};
- InternalSizeType chunkMask_{0};
- SizeAndPackedBegin<InternalSizeType, ItemIter, kEnableItemIteration>
- sizeAndPackedBegin_;
- //////// end fields
- void swapContents(F14Table& rhs) noexcept {
- using std::swap;
- swap(chunks_, rhs.chunks_);
- swap(chunkMask_, rhs.chunkMask_);
- swap(sizeAndPackedBegin_.size_, rhs.sizeAndPackedBegin_.size_);
- if (kEnableItemIteration) {
- swap(
- sizeAndPackedBegin_.packedBegin(),
- rhs.sizeAndPackedBegin_.packedBegin());
- }
- }
- public:
- F14Table(
- std::size_t initialCapacity,
- Hasher const& hasher,
- KeyEqual const& keyEqual,
- Alloc const& alloc)
- : Policy{hasher, keyEqual, alloc} {
- if (initialCapacity > 0) {
- reserve(initialCapacity);
- }
- }
- F14Table(F14Table const& rhs) : Policy{rhs} {
- buildFromF14Table(rhs);
- }
- F14Table(F14Table const& rhs, Alloc const& alloc) : Policy{rhs, alloc} {
- buildFromF14Table(rhs);
- }
- F14Table(F14Table&& rhs) noexcept(
- std::is_nothrow_move_constructible<Hasher>::value&&
- std::is_nothrow_move_constructible<KeyEqual>::value&&
- std::is_nothrow_move_constructible<Alloc>::value)
- : Policy{std::move(rhs)} {
- swapContents(rhs);
- }
- F14Table(F14Table&& rhs, Alloc const& alloc) noexcept(kAllocIsAlwaysEqual)
- : Policy{std::move(rhs), alloc} {
- if (kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
- // move storage (common case)
- swapContents(rhs);
- } else {
- // new storage because allocators unequal, move values (rare case)
- buildFromF14Table(std::move(rhs));
- }
- }
- F14Table& operator=(F14Table const& rhs) {
- if (this != &rhs) {
- reset();
- static_cast<Policy&>(*this) = rhs;
- buildFromF14Table(rhs);
- }
- return *this;
- }
- F14Table& operator=(F14Table&& rhs) noexcept(
- std::is_nothrow_move_assignable<Hasher>::value&&
- std::is_nothrow_move_assignable<KeyEqual>::value &&
- (kAllocIsAlwaysEqual ||
- (AllocTraits::propagate_on_container_move_assignment::value &&
- std::is_nothrow_move_assignable<Alloc>::value))) {
- if (this != &rhs) {
- reset();
- static_cast<Policy&>(*this) = std::move(rhs);
- if (AllocTraits::propagate_on_container_move_assignment::value ||
- kAllocIsAlwaysEqual || this->alloc() == rhs.alloc()) {
- // move storage (common case)
- swapContents(rhs);
- } else {
- // new storage because allocators unequal, move values (rare case)
- buildFromF14Table(std::move(rhs));
- }
- }
- return *this;
- }
- ~F14Table() {
- reset();
- }
- void swap(F14Table& rhs) noexcept(kSwapIsNoexcept) {
- // If propagate_on_container_swap is false and allocators are
- // not equal, the only way to accomplish a swap would be to do
- // dynamic allocation and then move (or swap) each contained value.
- // AllocatorAwareContainer-s are not supposed to attempt this, but
- // rather are supposed to have undefined behavior in that case.
- FOLLY_SAFE_CHECK(
- AllocTraits::propagate_on_container_swap::value ||
- kAllocIsAlwaysEqual || this->alloc() == rhs.alloc(),
- "swap is undefined for unequal non-propagating allocators");
- this->swapPolicy(rhs);
- swapContents(rhs);
- }
- private:
- //////// hash helpers
- // Hash values are used to compute the desired position, which is the
- // chunk index at which we would like to place a value (if there is no
- // overflow), and the tag, which is an additional 8 bits of entropy.
- //
- // The standard's definition of hash function quality only refers to
- // the probability of collisions of the entire hash value, not to the
- // probability of collisions of the results of shifting or masking the
- // hash value. Some hash functions, however, provide this stronger
- // guarantee (not quite the same as the definition of avalanching,
- // but similar).
- //
- // If the user-supplied hasher is an avalanching one (each bit of the
- // hash value has a 50% chance of being the same for differing hash
- // inputs), then we can just take 1 byte of the hash value for the tag
- // and the rest for the desired position. Avalanching hashers also
- // let us map hash value to array index position with just a bitmask
- // without risking clumping. (Many hash tables just accept the risk
- // and do it regardless.)
- //
- // std::hash<std::string> avalanches in all implementations we've
- // examined: libstdc++-v3 uses MurmurHash2, and libc++ uses CityHash
- // or MurmurHash2. The other std::hash specializations, however, do not
- // have this property. std::hash for integral and pointer values is the
- // identity function on libstdc++-v3 and libc++, in particular. In our
- // experience it is also fairly common for user-defined specializations
- // of std::hash to combine fields in an ad-hoc way that does not evenly
- // distribute entropy among the bits of the result (a + 37 * b, for
- // example, where a and b are integer fields).
- //
- // For hash functions we don't trust to avalanche, we repair things by
- // applying a bit mixer to the user-supplied hash.
- #if FOLLY_X64 || FOLLY_AARCH64
- // 64-bit
- static HashPair splitHash(std::size_t hash) {
- static_assert(sizeof(std::size_t) == sizeof(uint64_t), "");
- std::size_t tag;
- if (!isAvalanchingHasher()) {
- #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
- #if FOLLY_SSE
- // SSE4.2 CRC
- std::size_t c = _mm_crc32_u64(0, hash);
- tag = (c >> 24) | 0x80;
- hash += c;
- #else
- // CRC is optional on armv8 (-march=armv8-a+crc), standard on armv8.1
- std::size_t c = __crc32cd(0, hash);
- tag = (c >> 24) | 0x80;
- hash += c;
- #endif
- #else
- // The mixer below is not fully avalanching for all 64 bits of
- // output, but looks quite good for bits 18..63 and puts plenty
- // of entropy even lower when considering multiple bits together
- // (like the tag). Importantly, when under register pressure it
- // uses fewer registers, instructions, and immediate constants
- // than the alternatives, resulting in compact code that is more
- // easily inlinable. In one instantiation a modified Murmur mixer
- // was 48 bytes of assembly (even after using the same multiplicand
- // for both steps) and this one was 27 bytes, for example.
- auto const kMul = 0xc4ceb9fe1a85ec53ULL;
- #ifdef _WIN32
- __int64 signedHi;
- __int64 signedLo = _mul128(
- static_cast<__int64>(hash), static_cast<__int64>(kMul), &signedHi);
- auto hi = static_cast<uint64_t>(signedHi);
- auto lo = static_cast<uint64_t>(signedLo);
- #else
- auto hi = static_cast<uint64_t>(
- (static_cast<unsigned __int128>(hash) * kMul) >> 64);
- auto lo = hash * kMul;
- #endif
- hash = hi ^ lo;
- hash *= kMul;
- tag = ((hash >> 15) & 0x7f) | 0x80;
- hash >>= 22;
- #endif
- } else {
- // we don't trust the top bit
- tag = (hash >> 56) | 0x80;
- }
- return std::make_pair(hash, tag);
- }
- #else
- // 32-bit
- static HashPair splitHash(std::size_t hash) {
- static_assert(sizeof(std::size_t) == sizeof(uint32_t), "");
- uint8_t tag;
- if (!isAvalanchingHasher()) {
- #if FOLLY_F14_CRC_INTRINSIC_AVAILABLE
- #if FOLLY_SSE
- // SSE4.2 CRC
- auto c = _mm_crc32_u32(0, hash);
- tag = static_cast<uint8_t>(~(c >> 25));
- hash += c;
- #else
- auto c = __crc32cw(0, hash);
- tag = static_cast<uint8_t>(~(c >> 25));
- hash += c;
- #endif
- #else
- // finalizer for 32-bit murmur2
- hash ^= hash >> 13;
- hash *= 0x5bd1e995;
- hash ^= hash >> 15;
- tag = static_cast<uint8_t>(~(hash >> 25));
- #endif
- } else {
- // we don't trust the top bit
- tag = (hash >> 24) | 0x80;
- }
- return std::make_pair(hash, tag);
- }
- #endif
- //////// memory management helpers
- static std::size_t chunkAllocSize(
- std::size_t chunkCount,
- std::size_t maxSizeWithoutRehash) {
- if (chunkCount == 1) {
- FOLLY_SAFE_DCHECK((maxSizeWithoutRehash % 2) == 0, "");
- static_assert(offsetof(Chunk, rawItems_) == 16, "");
- return 16 + sizeof(Item) * maxSizeWithoutRehash;
- } else {
- return sizeof(Chunk) * chunkCount;
- }
- }
- ChunkPtr initializeChunks(
- BytePtr raw,
- std::size_t chunkCount,
- std::size_t maxSizeWithoutRehash) {
- static_assert(std::is_trivial<Chunk>::value, "F14Chunk should be POD");
- auto chunks = static_cast<Chunk*>(static_cast<void*>(&*raw));
- for (std::size_t i = 0; i < chunkCount; ++i) {
- chunks[i].clear();
- }
- chunks[0].markEof(chunkCount == 1 ? maxSizeWithoutRehash : 1);
- return std::pointer_traits<ChunkPtr>::pointer_to(*chunks);
- }
- public:
- ItemIter begin() const noexcept {
- FOLLY_SAFE_DCHECK(kEnableItemIteration, "");
- return ItemIter{sizeAndPackedBegin_.packedBegin()};
- }
- ItemIter end() const noexcept {
- return ItemIter{};
- }
- bool empty() const noexcept {
- return size() == 0;
- }
- InternalSizeType size() const noexcept {
- return sizeAndPackedBegin_.size_;
- }
- std::size_t max_size() const noexcept {
- auto& a = this->alloc();
- return std::min<std::size_t>(
- (std::numeric_limits<InternalSizeType>::max)(),
- AllocTraits::max_size(a));
- }
- std::size_t bucket_count() const noexcept {
- // bucket_count is just a synthetic construct for the outside world
- // so that size, bucket_count, load_factor, and max_load_factor are
- // all self-consistent. The only one of those that is real is size().
- if (chunkMask_ != 0) {
- return (chunkMask_ + 1) * Chunk::kDesiredCapacity;
- } else {
- return chunks_->chunk0Capacity();
- }
- }
- std::size_t max_bucket_count() const noexcept {
- return max_size();
- }
- float load_factor() const noexcept {
- return empty()
- ? 0.0f
- : static_cast<float>(size()) / static_cast<float>(bucket_count());
- }
- float max_load_factor() const noexcept {
- return 1.0f;
- }
- void max_load_factor(float) noexcept {
- // Probing hash tables can't run load factors >= 1 (unlike chaining
- // tables). In addition, we have measured that there is little or
- // no performance advantage to running a smaller load factor (cache
- // locality losses outweigh the small reduction in probe lengths,
- // often making it slower). Therefore, we've decided to just fix
- // max_load_factor at 1.0f regardless of what the user requests.
- // This has an additional advantage that we don't have to store it.
- // Taking alignment into consideration this makes every F14 table
- // 8 bytes smaller, and is part of the reason an empty F14NodeMap
- // is almost half the size of an empty std::unordered_map (32 vs
- // 56 bytes).
- //
- // I don't have a strong opinion on whether we should remove this
- // method or leave a stub, let ngbronson or xshi know if you have a
- // compelling argument either way.
- }
- private:
- // Our probe strategy is to advance through additional chunks with
- // a stride that is key-specific. This is called double hashing,
- // and is a well known and high quality probing strategy. So long as
- // the stride and the chunk count are relatively prime, we will visit
- // every chunk once and then return to the original chunk, letting us
- // detect and end the cycle. The chunk count is a power of two, so
- // we can satisfy the relatively prime part by choosing an odd stride.
- // We've already computed a high quality secondary hash value for the
- // tag, so we just use it for the second probe hash as well.
- //
- // At the maximum load factor of 12/14, expected probe length for a
- // find hit is 1.041, with 99% of keys found in the first three chunks.
- // Expected probe length for a find miss (or insert) is 1.275, with a
- // p99 probe length of 4 (fewer than 1% of failing find look at 5 or
- // more chunks).
- //
- // This code is structured so you can try various ways of encoding
- // the current probe state. For example, at the moment the probe's
- // state is the position in the cycle and the resulting chunk index is
- // computed from that inside probeCurrentIndex. We could also make the
- // probe state the chunk index, and then increment it by hp.second *
- // 2 + 1 in probeAdvance. Wrapping can be applied early or late as
- // well. This particular code seems to be easier for the optimizer
- // to understand.
- //
- // We could also implement probing strategies that resulted in the same
- // tour for every key initially assigned to a chunk (linear probing or
- // quadratic), but that results in longer probe lengths. In particular,
- // the cache locality wins of linear probing are not worth the increase
- // in probe lengths (extra work and less branch predictability) in
- // our experiments.
- std::size_t probeDelta(HashPair hp) const {
- return 2 * hp.second + 1;
- }
- template <typename K>
- FOLLY_ALWAYS_INLINE ItemIter findImpl(HashPair hp, K const& key) const {
- std::size_t index = hp.first;
- std::size_t step = probeDelta(hp);
- for (std::size_t tries = 0; tries <= chunkMask_; ++tries) {
- ChunkPtr chunk = chunks_ + (index & chunkMask_);
- if (sizeof(Chunk) > 64) {
- prefetchAddr(chunk->itemAddr(8));
- }
- auto hits = chunk->tagMatchIter(hp.second);
- while (hits.hasNext()) {
- auto i = hits.next();
- if (LIKELY(this->keyMatchesItem(key, chunk->item(i)))) {
- // Tag match and key match were both successful. The chance
- // of a false tag match is 1/128 for each key in the chunk
- // (with a proper hash function).
- return ItemIter{chunk, i};
- }
- }
- if (LIKELY(chunk->outboundOverflowCount() == 0)) {
- // No keys that wanted to be placed in this chunk were denied
- // entry, so our search is over. This is the common case.
- break;
- }
- index += step;
- }
- // Loop exit because tries is exhausted is rare, but possible.
- // That means that for every chunk there is currently a key present
- // in the map that visited that chunk on its probe search but ended
- // up somewhere else, and we have searched every chunk.
- return ItemIter{};
- }
- public:
- // Prehashing splits the work of find(key) into two calls, enabling you
- // to manually implement loop pipelining for hot bulk lookups. prehash
- // computes the hash and prefetches the first computed memory location,
- // and the two-arg find(F14HashToken,K) performs the rest of the search.
- template <typename K>
- F14HashToken prehash(K const& key) const {
- FOLLY_SAFE_DCHECK(chunks_ != nullptr, "");
- auto hp = splitHash(this->computeKeyHash(key));
- ChunkPtr firstChunk = chunks_ + (hp.first & chunkMask_);
- prefetchAddr(firstChunk);
- return F14HashToken(std::move(hp));
- }
- template <typename K>
- FOLLY_ALWAYS_INLINE ItemIter find(K const& key) const {
- auto hp = splitHash(this->computeKeyHash(key));
- return findImpl(hp, key);
- }
- template <typename K>
- FOLLY_ALWAYS_INLINE ItemIter
- find(F14HashToken const& token, K const& key) const {
- FOLLY_SAFE_DCHECK(
- splitHash(this->computeKeyHash(key)) == static_cast<HashPair>(token),
- "");
- return findImpl(static_cast<HashPair>(token), key);
- }
- private:
- void adjustSizeAndBeginAfterInsert(ItemIter iter) {
- if (kEnableItemIteration) {
- // packedBegin is the max of all valid ItemIter::pack()
- auto packed = iter.pack();
- if (sizeAndPackedBegin_.packedBegin() < packed) {
- sizeAndPackedBegin_.packedBegin() = packed;
- }
- }
- ++sizeAndPackedBegin_.size_;
- }
- // Ignores hp if pos.chunk()->hostedOverflowCount() == 0
- void eraseBlank(ItemIter iter, HashPair hp) {
- iter.chunk()->clearTag(iter.index());
- if (iter.chunk()->hostedOverflowCount() != 0) {
- // clean up
- std::size_t index = hp.first;
- std::size_t delta = probeDelta(hp);
- uint8_t hostedOp = 0;
- while (true) {
- ChunkPtr chunk = chunks_ + (index & chunkMask_);
- if (chunk == iter.chunk()) {
- chunk->adjustHostedOverflowCount(hostedOp);
- break;
- }
- chunk->decrOutboundOverflowCount();
- hostedOp = Chunk::kDecrHostedOverflowCount;
- index += delta;
- }
- }
- }
- void adjustSizeAndBeginBeforeErase(ItemIter iter) {
- --sizeAndPackedBegin_.size_;
- if (kEnableItemIteration) {
- if (iter.pack() == sizeAndPackedBegin_.packedBegin()) {
- if (size() == 0) {
- iter = ItemIter{};
- } else {
- iter.precheckedAdvance();
- }
- sizeAndPackedBegin_.packedBegin() = iter.pack();
- }
- }
- }
- template <typename... Args>
- void insertAtBlank(ItemIter pos, HashPair hp, Args&&... args) {
- try {
- auto dst = pos.itemAddr();
- this->constructValueAtItem(size(), dst, std::forward<Args>(args)...);
- } catch (...) {
- eraseBlank(pos, hp);
- throw;
- }
- adjustSizeAndBeginAfterInsert(pos);
- }
- ItemIter allocateTag(uint8_t* fullness, HashPair hp) {
- ChunkPtr chunk;
- std::size_t index = hp.first;
- std::size_t delta = probeDelta(hp);
- uint8_t hostedOp = 0;
- while (true) {
- index &= chunkMask_;
- chunk = chunks_ + index;
- if (LIKELY(fullness[index] < Chunk::kCapacity)) {
- break;
- }
- chunk->incrOutboundOverflowCount();
- hostedOp = Chunk::kIncrHostedOverflowCount;
- index += delta;
- }
- unsigned itemIndex = fullness[index]++;
- FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
- chunk->setTag(itemIndex, hp.second);
- chunk->adjustHostedOverflowCount(hostedOp);
- return ItemIter{chunk, itemIndex};
- }
- ChunkPtr lastOccupiedChunk() const {
- FOLLY_SAFE_DCHECK(size() > 0, "");
- if (kEnableItemIteration) {
- return begin().chunk();
- } else {
- return chunks_ + chunkMask_;
- }
- }
- template <typename T>
- void directBuildFrom(T&& src) {
- FOLLY_SAFE_DCHECK(src.size() > 0 && chunkMask_ == src.chunkMask_, "");
- // We use std::forward<T> to allow portions of src to be moved out by
- // either beforeBuild or afterBuild, but we are just relying on good
- // behavior of our Policy superclass to ensure that any particular
- // field of this is a donor at most once.
- auto undoState =
- this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
- bool success = false;
- SCOPE_EXIT {
- this->afterBuild(
- undoState, success, src.size(), bucket_count(), std::forward<T>(src));
- };
- // Copy can fail part-way through if a Value copy constructor throws.
- // Failing afterBuild is limited in its cleanup power in this case,
- // because it can't enumerate the items that were actually copied.
- // Fortunately we can divide the situation into cases where all of
- // the state is owned by the table itself (F14Node and F14Value),
- // for which clearImpl() can do partial cleanup, and cases where all
- // of the values are owned by the policy (F14Vector), in which case
- // partial failure should not occur. Sorry for the subtle invariants
- // in the Policy API.
- if (is_trivially_copyable<Item>::value && !this->destroyItemOnClear() &&
- bucket_count() == src.bucket_count()) {
- // most happy path
- auto n = chunkAllocSize(chunkMask_ + 1, bucket_count());
- std::memcpy(&chunks_[0], &src.chunks_[0], n);
- sizeAndPackedBegin_.size_ = src.size();
- if (kEnableItemIteration) {
- auto srcBegin = src.begin();
- sizeAndPackedBegin_.packedBegin() =
- ItemIter{chunks_ + (srcBegin.chunk() - src.chunks_),
- srcBegin.index()}
- .pack();
- }
- } else {
- std::size_t maxChunkIndex = src.lastOccupiedChunk() - src.chunks_;
- // happy path, no rehash but pack items toward bottom of chunk and
- // use copy constructor
- auto srcChunk = &src.chunks_[maxChunkIndex];
- Chunk* dstChunk = &chunks_[maxChunkIndex];
- do {
- dstChunk->copyOverflowInfoFrom(*srcChunk);
- auto iter = srcChunk->occupiedIter();
- if (prefetchBeforeCopy()) {
- for (auto piter = iter; piter.hasNext();) {
- this->prefetchValue(srcChunk->citem(piter.next()));
- }
- }
- std::size_t dstI = 0;
- for (; iter.hasNext(); ++dstI) {
- auto srcI = iter.next();
- auto&& srcArg =
- std::forward<T>(src).buildArgForItem(srcChunk->item(srcI));
- auto dst = dstChunk->itemAddr(dstI);
- this->constructValueAtItem(
- 0, dst, std::forward<decltype(srcArg)>(srcArg));
- dstChunk->setTag(dstI, srcChunk->tag(srcI));
- ++sizeAndPackedBegin_.size_;
- }
- --srcChunk;
- --dstChunk;
- } while (size() != src.size());
- // reset doesn't care about packedBegin, so we don't fix it until the end
- if (kEnableItemIteration) {
- sizeAndPackedBegin_.packedBegin() =
- ItemIter{chunks_ + maxChunkIndex,
- chunks_[maxChunkIndex].lastOccupied().index()}
- .pack();
- }
- }
- success = true;
- }
- template <typename T>
- void rehashBuildFrom(T&& src) {
- FOLLY_SAFE_DCHECK(src.chunkMask_ > chunkMask_, "");
- // 1 byte per chunk means < 1 bit per value temporary overhead
- std::array<uint8_t, 256> stackBuf;
- uint8_t* fullness;
- auto cc = chunkMask_ + 1;
- if (cc <= stackBuf.size()) {
- fullness = stackBuf.data();
- } else {
- ByteAlloc a{this->alloc()};
- fullness = &*std::allocator_traits<ByteAlloc>::allocate(a, cc);
- }
- SCOPE_EXIT {
- if (cc > stackBuf.size()) {
- ByteAlloc a{this->alloc()};
- std::allocator_traits<ByteAlloc>::deallocate(
- a,
- std::pointer_traits<typename std::allocator_traits<
- ByteAlloc>::pointer>::pointer_to(*fullness),
- cc);
- }
- };
- std::memset(fullness, '\0', cc);
- // We use std::forward<T> to allow portions of src to be moved out by
- // either beforeBuild or afterBuild, but we are just relying on good
- // behavior of our Policy superclass to ensure that any particular
- // field of this is a donor at most once.
- // Exception safety requires beforeBuild to happen after all of the
- // allocate() calls.
- auto undoState =
- this->beforeBuild(src.size(), bucket_count(), std::forward<T>(src));
- bool success = false;
- SCOPE_EXIT {
- this->afterBuild(
- undoState, success, src.size(), bucket_count(), std::forward<T>(src));
- };
- // The current table is at a valid state at all points for policies
- // in which non-trivial values are owned by the main table (F14Node
- // and F14Value), so reset() will clean things up properly if we
- // fail partway through. For the case that the policy manages value
- // lifecycle (F14Vector) then nothing after beforeBuild can throw and
- // we don't have to worry about partial failure.
- std::size_t srcChunkIndex = src.lastOccupiedChunk() - src.chunks_;
- while (true) {
- auto srcChunk = &src.chunks_[srcChunkIndex];
- auto iter = srcChunk->occupiedIter();
- if (prefetchBeforeRehash()) {
- for (auto piter = iter; piter.hasNext();) {
- this->prefetchValue(srcChunk->item(piter.next()));
- }
- }
- if (srcChunk->hostedOverflowCount() == 0) {
- // all items are in their preferred chunk (no probing), so we
- // don't need to compute any hash values
- while (iter.hasNext()) {
- auto i = iter.next();
- auto& srcItem = srcChunk->item(i);
- auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
- HashPair hp{srcChunkIndex, srcChunk->tag(i)};
- insertAtBlank(
- allocateTag(fullness, hp),
- hp,
- std::forward<decltype(srcArg)>(srcArg));
- }
- } else {
- // any chunk's items might be in here
- while (iter.hasNext()) {
- auto i = iter.next();
- auto& srcItem = srcChunk->item(i);
- auto&& srcArg = std::forward<T>(src).buildArgForItem(srcItem);
- auto const& srcKey = src.keyForValue(srcArg);
- auto hp = splitHash(this->computeKeyHash(srcKey));
- FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(i), "");
- insertAtBlank(
- allocateTag(fullness, hp),
- hp,
- std::forward<decltype(srcArg)>(srcArg));
- }
- }
- if (srcChunkIndex == 0) {
- break;
- }
- --srcChunkIndex;
- }
- success = true;
- }
- template <typename T>
- FOLLY_NOINLINE void buildFromF14Table(T&& src) {
- FOLLY_SAFE_DCHECK(size() == 0, "");
- if (src.size() == 0) {
- return;
- }
- reserveForInsert(src.size());
- try {
- if (chunkMask_ == src.chunkMask_) {
- directBuildFrom(std::forward<T>(src));
- } else {
- rehashBuildFrom(std::forward<T>(src));
- }
- } catch (...) {
- reset();
- F14LinkCheck<getF14IntrinsicsMode()>::check();
- throw;
- }
- }
- FOLLY_NOINLINE void reserveImpl(
- std::size_t capacity,
- std::size_t origChunkCount,
- std::size_t origMaxSizeWithoutRehash) {
- FOLLY_SAFE_DCHECK(capacity >= size(), "");
- // compute new size
- std::size_t const kInitialCapacity = 2;
- std::size_t const kHalfChunkCapacity =
- (Chunk::kDesiredCapacity / 2) & ~std::size_t{1};
- std::size_t newMaxSizeWithoutRehash;
- std::size_t newChunkCount;
- if (capacity <= kHalfChunkCapacity) {
- newChunkCount = 1;
- newMaxSizeWithoutRehash =
- (capacity < kInitialCapacity) ? kInitialCapacity : kHalfChunkCapacity;
- } else {
- newChunkCount = nextPowTwo((capacity - 1) / Chunk::kDesiredCapacity + 1);
- newMaxSizeWithoutRehash = newChunkCount * Chunk::kDesiredCapacity;
- constexpr std::size_t kMaxChunksWithoutCapacityOverflow =
- (std::numeric_limits<std::size_t>::max)() / Chunk::kDesiredCapacity;
- if (newChunkCount > kMaxChunksWithoutCapacityOverflow ||
- newMaxSizeWithoutRehash > max_size()) {
- throw_exception<std::bad_alloc>();
- }
- }
- if (origMaxSizeWithoutRehash != newMaxSizeWithoutRehash) {
- rehashImpl(
- origChunkCount,
- origMaxSizeWithoutRehash,
- newChunkCount,
- newMaxSizeWithoutRehash);
- }
- }
- void rehashImpl(
- std::size_t origChunkCount,
- std::size_t origMaxSizeWithoutRehash,
- std::size_t newChunkCount,
- std::size_t newMaxSizeWithoutRehash) {
- auto origChunks = chunks_;
- BytePtr rawAllocation;
- auto undoState = this->beforeRehash(
- size(),
- origMaxSizeWithoutRehash,
- newMaxSizeWithoutRehash,
- chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash),
- rawAllocation);
- chunks_ =
- initializeChunks(rawAllocation, newChunkCount, newMaxSizeWithoutRehash);
- FOLLY_SAFE_DCHECK(
- newChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
- chunkMask_ = static_cast<InternalSizeType>(newChunkCount - 1);
- bool success = false;
- SCOPE_EXIT {
- // this SCOPE_EXIT reverts chunks_ and chunkMask_ if necessary
- BytePtr finishedRawAllocation = nullptr;
- std::size_t finishedAllocSize = 0;
- if (LIKELY(success)) {
- if (origMaxSizeWithoutRehash > 0) {
- finishedRawAllocation = std::pointer_traits<BytePtr>::pointer_to(
- *static_cast<uint8_t*>(static_cast<void*>(&*origChunks)));
- finishedAllocSize =
- chunkAllocSize(origChunkCount, origMaxSizeWithoutRehash);
- }
- } else {
- finishedRawAllocation = rawAllocation;
- finishedAllocSize =
- chunkAllocSize(newChunkCount, newMaxSizeWithoutRehash);
- chunks_ = origChunks;
- FOLLY_SAFE_DCHECK(
- origChunkCount < std::numeric_limits<InternalSizeType>::max(), "");
- chunkMask_ = static_cast<InternalSizeType>(origChunkCount - 1);
- F14LinkCheck<getF14IntrinsicsMode()>::check();
- }
- this->afterRehash(
- std::move(undoState),
- success,
- size(),
- origMaxSizeWithoutRehash,
- newMaxSizeWithoutRehash,
- finishedRawAllocation,
- finishedAllocSize);
- };
- if (size() == 0) {
- // nothing to do
- } else if (origChunkCount == 1 && newChunkCount == 1) {
- // no mask, no chunk scan, no hash computation, no probing
- auto srcChunk = origChunks;
- auto dstChunk = chunks_;
- std::size_t srcI = 0;
- std::size_t dstI = 0;
- while (dstI < size()) {
- if (LIKELY(srcChunk->occupied(srcI))) {
- dstChunk->setTag(dstI, srcChunk->tag(srcI));
- this->moveItemDuringRehash(
- dstChunk->itemAddr(dstI), srcChunk->item(srcI));
- ++dstI;
- }
- ++srcI;
- }
- if (kEnableItemIteration) {
- sizeAndPackedBegin_.packedBegin() = ItemIter{dstChunk, dstI - 1}.pack();
- }
- } else {
- // 1 byte per chunk means < 1 bit per value temporary overhead
- std::array<uint8_t, 256> stackBuf;
- uint8_t* fullness;
- if (newChunkCount <= stackBuf.size()) {
- fullness = stackBuf.data();
- } else {
- ByteAlloc a{this->alloc()};
- // may throw
- fullness =
- &*std::allocator_traits<ByteAlloc>::allocate(a, newChunkCount);
- }
- std::memset(fullness, '\0', newChunkCount);
- SCOPE_EXIT {
- if (newChunkCount > stackBuf.size()) {
- ByteAlloc a{this->alloc()};
- std::allocator_traits<ByteAlloc>::deallocate(
- a,
- std::pointer_traits<typename std::allocator_traits<
- ByteAlloc>::pointer>::pointer_to(*fullness),
- newChunkCount);
- }
- };
- auto srcChunk = origChunks + origChunkCount - 1;
- std::size_t remaining = size();
- while (remaining > 0) {
- auto iter = srcChunk->occupiedIter();
- if (prefetchBeforeRehash()) {
- for (auto piter = iter; piter.hasNext();) {
- this->prefetchValue(srcChunk->item(piter.next()));
- }
- }
- while (iter.hasNext()) {
- --remaining;
- auto srcI = iter.next();
- Item& srcItem = srcChunk->item(srcI);
- auto hp = splitHash(
- this->computeItemHash(const_cast<Item const&>(srcItem)));
- FOLLY_SAFE_DCHECK(hp.second == srcChunk->tag(srcI), "");
- auto dstIter = allocateTag(fullness, hp);
- this->moveItemDuringRehash(dstIter.itemAddr(), srcItem);
- }
- --srcChunk;
- }
- if (kEnableItemIteration) {
- // this code replaces size invocations of adjustSizeAndBeginAfterInsert
- std::size_t i = chunkMask_;
- while (fullness[i] == 0) {
- --i;
- }
- sizeAndPackedBegin_.packedBegin() =
- ItemIter{chunks_ + i, std::size_t{fullness[i]} - 1}.pack();
- }
- }
- success = true;
- }
- void asanOnReserve(std::size_t capacity) {
- if (kIsSanitizeAddress && capacity > size()) {
- asanPendingSafeInserts += capacity - size();
- }
- }
- bool asanShouldAddExtraRehash() {
- if (!kIsSanitizeAddress) {
- return false;
- } else if (asanPendingSafeInserts > 0) {
- --asanPendingSafeInserts;
- return false;
- } else if (size() <= 1) {
- return size() > 0;
- } else {
- constexpr std::size_t kBigPrime = 4294967291U;
- auto s = (asanRehashState += kBigPrime);
- return (s % size()) == 0;
- }
- }
- void asanExtraRehash() {
- auto cc = chunkMask_ + 1;
- auto bc = bucket_count();
- rehashImpl(cc, bc, cc, bc);
- }
- void asanOnInsert() {
- // When running under ASAN, we add a spurious rehash with 1/size()
- // probability before every insert. This means that finding reference
- // stability problems for F14Value and F14Vector is much more likely.
- // The most common pattern that causes this is
- //
- // auto& ref = map[k1]; map[k2] = foo(ref);
- //
- // One way to fix this is to call map.reserve(N) before such a
- // sequence, where N is the number of keys that might be inserted
- // within the section that retains references.
- if (asanShouldAddExtraRehash()) {
- asanExtraRehash();
- }
- }
- public:
- // user has no control over max_load_factor
- void rehash(std::size_t capacity) {
- reserve(capacity);
- }
- void reserve(std::size_t capacity) {
- // We want to support the pattern
- // map.reserve(2); auto& r1 = map[k1]; auto& r2 = map[k2];
- asanOnReserve(capacity);
- reserveImpl(
- std::max<std::size_t>(capacity, size()),
- chunkMask_ + 1,
- bucket_count());
- }
- // Returns true iff a rehash was performed
- void reserveForInsert(size_t incoming = 1) {
- auto capacity = size() + incoming;
- auto bc = bucket_count();
- if (capacity - 1 >= bc) {
- reserveImpl(capacity, chunkMask_ + 1, bc);
- }
- }
- // Returns pos,true if construct, pos,false if found. key is only used
- // during the search; all constructor args for an inserted value come
- // from args... key won't be accessed after args are touched.
- template <typename K, typename... Args>
- std::pair<ItemIter, bool> tryEmplaceValue(K const& key, Args&&... args) {
- const auto hp = splitHash(this->computeKeyHash(key));
- if (size() > 0) {
- auto existing = findImpl(hp, key);
- if (!existing.atEnd()) {
- return std::make_pair(existing, false);
- }
- }
- asanOnInsert();
- reserveForInsert();
- std::size_t index = hp.first;
- ChunkPtr chunk = chunks_ + (index & chunkMask_);
- auto firstEmpty = chunk->firstEmpty();
- if (!firstEmpty.hasIndex()) {
- std::size_t delta = probeDelta(hp);
- do {
- chunk->incrOutboundOverflowCount();
- index += delta;
- chunk = chunks_ + (index & chunkMask_);
- firstEmpty = chunk->firstEmpty();
- } while (!firstEmpty.hasIndex());
- chunk->adjustHostedOverflowCount(Chunk::kIncrHostedOverflowCount);
- }
- std::size_t itemIndex = firstEmpty.index();
- FOLLY_SAFE_DCHECK(!chunk->occupied(itemIndex), "");
- chunk->setTag(itemIndex, hp.second);
- ItemIter iter{chunk, itemIndex};
- // insertAtBlank will clear the tag if the constructor throws
- insertAtBlank(iter, hp, std::forward<Args>(args)...);
- return std::make_pair(iter, true);
- }
- private:
- template <bool Reset>
- void clearImpl() noexcept {
- if (chunks_ == Chunk::emptyInstance()) {
- FOLLY_SAFE_DCHECK(empty() && bucket_count() == 0, "");
- return;
- }
- // turn clear into reset if the table is >= 16 chunks so that
- // we don't get too low a load factor
- bool willReset = Reset || chunkMask_ + 1 >= 16;
- auto origSize = size();
- auto origCapacity = bucket_count();
- if (willReset) {
- this->beforeReset(origSize, origCapacity);
- } else {
- this->beforeClear(origSize, origCapacity);
- }
- if (!empty()) {
- if (destroyItemOnClear()) {
- for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
- ChunkPtr chunk = chunks_ + ci;
- auto iter = chunk->occupiedIter();
- if (prefetchBeforeDestroy()) {
- for (auto piter = iter; piter.hasNext();) {
- this->prefetchValue(chunk->item(piter.next()));
- }
- }
- while (iter.hasNext()) {
- this->destroyItem(chunk->item(iter.next()));
- }
- }
- }
- if (!willReset) {
- // It's okay to do this in a separate loop because we only do it
- // when the chunk count is small. That avoids a branch when we
- // are promoting a clear to a reset for a large table.
- auto c0c = chunks_[0].chunk0Capacity();
- for (std::size_t ci = 0; ci <= chunkMask_; ++ci) {
- chunks_[ci].clear();
- }
- chunks_[0].markEof(c0c);
- }
- if (kEnableItemIteration) {
- sizeAndPackedBegin_.packedBegin() = ItemIter{}.pack();
- }
- sizeAndPackedBegin_.size_ = 0;
- }
- if (willReset) {
- BytePtr rawAllocation = std::pointer_traits<BytePtr>::pointer_to(
- *static_cast<uint8_t*>(static_cast<void*>(&*chunks_)));
- std::size_t rawSize = chunkAllocSize(chunkMask_ + 1, bucket_count());
- chunks_ = Chunk::emptyInstance();
- chunkMask_ = 0;
- this->afterReset(origSize, origCapacity, rawAllocation, rawSize);
- } else {
- this->afterClear(origSize, origCapacity);
- }
- }
- void eraseImpl(ItemIter pos, HashPair hp) {
- this->destroyItem(pos.item());
- adjustSizeAndBeginBeforeErase(pos);
- eraseBlank(pos, hp);
- }
- public:
- // The item needs to still be hashable during this call. If you want
- // to intercept the value before it is destroyed (to extract it, for
- // example), use eraseIterInto(pos, beforeDestroy).
- void eraseIter(ItemIter pos) {
- eraseIterInto(pos, [](value_type&&) {});
- }
- // The item needs to still be hashable during this call. If you want
- // to intercept the value before it is destroyed (to extract it, for
- // example), do so in the beforeDestroy callback.
- template <typename BeforeDestroy>
- void eraseIterInto(ItemIter pos, BeforeDestroy&& beforeDestroy) {
- HashPair hp{};
- if (pos.chunk()->hostedOverflowCount() != 0) {
- hp = splitHash(this->computeItemHash(pos.citem()));
- }
- beforeDestroy(this->valueAtItemForExtract(pos.item()));
- eraseImpl(pos, hp);
- }
- template <typename K>
- std::size_t eraseKey(K const& key) {
- return eraseKeyInto(key, [](value_type&&) {});
- }
- template <typename K, typename BeforeDestroy>
- std::size_t eraseKeyInto(K const& key, BeforeDestroy&& beforeDestroy) {
- if (UNLIKELY(size() == 0)) {
- return 0;
- }
- auto hp = splitHash(this->computeKeyHash(key));
- auto iter = findImpl(hp, key);
- if (!iter.atEnd()) {
- beforeDestroy(this->valueAtItemForExtract(iter.item()));
- eraseImpl(iter, hp);
- return 1;
- } else {
- return 0;
- }
- }
- void clear() noexcept {
- if (kIsSanitizeAddress) {
- // force recycling of heap memory
- auto bc = bucket_count();
- reset();
- try {
- reserveImpl(bc, 0, 0);
- } catch (std::bad_alloc const&) {
- // ASAN mode only, keep going
- }
- } else {
- clearImpl<false>();
- }
- }
- // Like clear(), but always frees all dynamic storage allocated
- // by the table.
- void reset() noexcept {
- clearImpl<true>();
- }
- // Get memory footprint, not including sizeof(*this).
- std::size_t getAllocatedMemorySize() const {
- std::size_t sum = 0;
- visitAllocationClasses(
- [&sum](std::size_t bytes, std::size_t n) { sum += bytes * n; });
- return sum;
- }
- // Enumerates classes of allocated memory blocks currently owned
- // by this table, calling visitor(allocationSize, allocationCount).
- // This can be used to get a more accurate indication of memory footprint
- // than getAllocatedMemorySize() if you have some way of computing the
- // internal fragmentation of the allocator, such as JEMalloc's nallocx.
- // The visitor might be called twice with the same allocationSize. The
- // visitor's computation should produce the same result for visitor(8,
- // 2) as for two calls to visitor(8, 1), for example. The visitor may
- // be called with a zero allocationCount.
- template <typename V>
- void visitAllocationClasses(V&& visitor) const {
- auto bc = bucket_count();
- this->visitPolicyAllocationClasses(
- (bc == 0 ? 0 : chunkAllocSize(chunkMask_ + 1, bc)),
- size(),
- bc,
- visitor);
- }
- // visitor should take an Item const&
- template <typename V>
- void visitItems(V&& visitor) const {
- if (empty()) {
- return;
- }
- std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
- auto chunk = &chunks_[0];
- for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
- auto iter = chunk->occupiedIter();
- if (prefetchBeforeCopy()) {
- for (auto piter = iter; piter.hasNext();) {
- this->prefetchValue(chunk->citem(piter.next()));
- }
- }
- while (iter.hasNext()) {
- visitor(chunk->citem(iter.next()));
- }
- }
- }
- // visitor should take two Item const*
- template <typename V>
- void visitContiguousItemRanges(V&& visitor) const {
- if (empty()) {
- return;
- }
- std::size_t maxChunkIndex = lastOccupiedChunk() - chunks_;
- auto chunk = &chunks_[0];
- for (std::size_t i = 0; i <= maxChunkIndex; ++i, ++chunk) {
- for (auto iter = chunk->occupiedRangeIter(); iter.hasNext();) {
- auto be = iter.next();
- FOLLY_SAFE_DCHECK(
- chunk->occupied(be.first) && chunk->occupied(be.second - 1), "");
- Item const* b = chunk->itemAddr(be.first);
- visitor(b, b + (be.second - be.first));
- }
- }
- }
- private:
- static std::size_t& histoAt(
- std::vector<std::size_t>& histo,
- std::size_t index) {
- if (histo.size() <= index) {
- histo.resize(index + 1);
- }
- return histo.at(index);
- }
- public:
- // Expensive
- F14TableStats computeStats() const {
- F14TableStats stats;
- if (kIsDebug && kEnableItemIteration) {
- // validate iteration
- std::size_t n = 0;
- ItemIter prev;
- for (auto iter = begin(); iter != end(); iter.advance()) {
- FOLLY_SAFE_DCHECK(n == 0 || iter.pack() < prev.pack(), "");
- ++n;
- prev = iter;
- }
- FOLLY_SAFE_DCHECK(n == size(), "");
- }
- FOLLY_SAFE_DCHECK(
- (chunks_ == Chunk::emptyInstance()) == (bucket_count() == 0), "");
- std::size_t n1 = 0;
- std::size_t n2 = 0;
- auto cc = bucket_count() == 0 ? 0 : chunkMask_ + 1;
- for (std::size_t ci = 0; ci < cc; ++ci) {
- ChunkPtr chunk = chunks_ + ci;
- FOLLY_SAFE_DCHECK(chunk->eof() == (ci == 0), "");
- auto iter = chunk->occupiedIter();
- std::size_t chunkOccupied = 0;
- for (auto piter = iter; piter.hasNext(); piter.next()) {
- ++chunkOccupied;
- }
- n1 += chunkOccupied;
- histoAt(stats.chunkOccupancyHisto, chunkOccupied)++;
- histoAt(
- stats.chunkOutboundOverflowHisto, chunk->outboundOverflowCount())++;
- histoAt(stats.chunkHostedOverflowHisto, chunk->hostedOverflowCount())++;
- while (iter.hasNext()) {
- auto ii = iter.next();
- ++n2;
- {
- auto& item = chunk->citem(ii);
- auto hp = splitHash(this->computeItemHash(item));
- FOLLY_SAFE_DCHECK(chunk->tag(ii) == hp.second, "");
- std::size_t dist = 1;
- std::size_t index = hp.first;
- std::size_t delta = probeDelta(hp);
- while ((index & chunkMask_) != ci) {
- index += delta;
- ++dist;
- }
- histoAt(stats.keyProbeLengthHisto, dist)++;
- }
- // misses could have any tag, so we do the dumb but accurate
- // thing and just try them all
- for (std::size_t ti = 0; ti < 256; ++ti) {
- uint8_t tag = static_cast<uint8_t>(ti == 0 ? 1 : 0);
- HashPair hp{ci, tag};
- std::size_t dist = 1;
- std::size_t index = hp.first;
- std::size_t delta = probeDelta(hp);
- for (std::size_t tries = 0; tries <= chunkMask_ &&
- chunks_[index & chunkMask_].outboundOverflowCount() != 0;
- ++tries) {
- index += delta;
- ++dist;
- }
- histoAt(stats.missProbeLengthHisto, dist)++;
- }
- }
- }
- FOLLY_SAFE_DCHECK(n1 == size(), "");
- FOLLY_SAFE_DCHECK(n2 == size(), "");
- #if FOLLY_HAS_RTTI
- stats.policy = typeid(Policy).name();
- #endif
- stats.size = size();
- stats.valueSize = sizeof(value_type);
- stats.bucketCount = bucket_count();
- stats.chunkCount = cc;
- stats.totalBytes = sizeof(*this) + getAllocatedMemorySize();
- stats.overheadBytes = stats.totalBytes - size() * sizeof(value_type);
- return stats;
- }
- };
- } // namespace detail
- } // namespace f14
- #endif // FOLLY_F14_VECTOR_INTRINSICS_AVAILABLE
- } // namespace folly
|