123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195 |
- /*
- * Copyright 2015-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- #pragma once
- #include <glog/logging.h>
- #ifdef _MSC_VER
- #include <immintrin.h>
- #endif
- #include <folly/CpuId.h>
- #include <folly/Portability.h>
- #include <folly/lang/Assume.h>
- #include <folly/portability/Builtins.h>
- namespace folly {
- namespace compression {
- namespace instructions {
- // NOTE: It's recommended to compile EF coding with -msse4.2, starting
- // with Nehalem, Intel CPUs support POPCNT instruction and gcc will emit
- // it for __builtin_popcountll intrinsic.
- // But we provide an alternative way for the client code: it can switch to
- // the appropriate version of EliasFanoReader<> at runtime (client should
- // implement this switching logic itself) by specifying instruction set to
- // use explicitly.
- struct Default {
- static bool supported(const folly::CpuId& /* cpuId */ = {}) {
- return true;
- }
- static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) {
- return uint64_t(__builtin_popcountll(value));
- }
- static FOLLY_ALWAYS_INLINE int ctz(uint64_t value) {
- DCHECK_GT(value, 0u);
- return __builtin_ctzll(value);
- }
- static FOLLY_ALWAYS_INLINE int clz(uint64_t value) {
- DCHECK_GT(value, 0u);
- return __builtin_clzll(value);
- }
- static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
- return value & (value - 1);
- }
- // Extract `length` bits starting from `start` from value. Only bits [0:63]
- // will be extracted. All higher order bits in the
- // result will be zeroed. If no bits are extracted, return 0.
- static FOLLY_ALWAYS_INLINE uint64_t
- bextr(uint64_t value, uint32_t start, uint32_t length) {
- if (start > 63) {
- return 0ULL;
- }
- if (start + length > 64) {
- length = 64 - start;
- }
- return (value >> start) &
- ((length == 64) ? (~0ULL) : ((1ULL << length) - 1ULL));
- }
- // Clear high bits starting at position index.
- static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) {
- if (index > 63) {
- return 0;
- }
- return value & ((uint64_t(1) << index) - 1);
- }
- };
- struct Nehalem : public Default {
- static bool supported(const folly::CpuId& cpuId = {}) {
- return cpuId.popcnt();
- }
- static FOLLY_ALWAYS_INLINE uint64_t popcount(uint64_t value) {
- // POPCNT is supported starting with Intel Nehalem, AMD K10.
- #if defined(__GNUC__) || defined(__clang__)
- // GCC and Clang won't inline the intrinsics.
- uint64_t result;
- asm("popcntq %1, %0" : "=r"(result) : "r"(value));
- return result;
- #else
- return uint64_t(_mm_popcnt_u64(value));
- #endif
- }
- };
- struct Haswell : public Nehalem {
- static bool supported(const folly::CpuId& cpuId = {}) {
- return Nehalem::supported(cpuId) && cpuId.bmi1() && cpuId.bmi2();
- }
- static FOLLY_ALWAYS_INLINE uint64_t blsr(uint64_t value) {
- // BMI1 is supported starting with Intel Haswell, AMD Piledriver.
- // BLSR combines two instructions into one and reduces register pressure.
- #if defined(__GNUC__) || defined(__clang__)
- // GCC and Clang won't inline the intrinsics.
- uint64_t result;
- asm("blsrq %1, %0" : "=r"(result) : "r"(value));
- return result;
- #else
- return _blsr_u64(value);
- #endif
- }
- static FOLLY_ALWAYS_INLINE uint64_t
- bextr(uint64_t value, uint32_t start, uint32_t length) {
- #if defined(__GNUC__) || defined(__clang__)
- // GCC and Clang won't inline the intrinsics.
- // Encode parameters in `pattern` where `pattern[0:7]` is `start` and
- // `pattern[8:15]` is `length`.
- // Ref: Intel Advanced Vector Extensions Programming Reference
- uint64_t pattern = start & 0xFF;
- pattern = pattern | ((length & 0xFF) << 8);
- uint64_t result;
- asm("bextrq %2, %1, %0" : "=r"(result) : "r"(value), "r"(pattern));
- return result;
- #else
- return _bextr_u64(value, start, length);
- #endif
- }
- static FOLLY_ALWAYS_INLINE uint64_t bzhi(uint64_t value, uint32_t index) {
- #if defined(__GNUC__) || defined(__clang__)
- // GCC and Clang won't inline the intrinsics.
- const uint64_t index64 = index;
- uint64_t result;
- asm("bzhiq %2, %1, %0" : "=r"(result) : "r"(value), "r"(index64));
- return result;
- #else
- return _bzhi_u64(value, index);
- #endif
- }
- };
- enum class Type {
- DEFAULT,
- NEHALEM,
- HASWELL,
- };
- inline Type detect() {
- const static Type type = [] {
- if (instructions::Haswell::supported()) {
- VLOG(2) << "Will use folly::compression::instructions::Haswell";
- return Type::HASWELL;
- } else if (instructions::Nehalem::supported()) {
- VLOG(2) << "Will use folly::compression::instructions::Nehalem";
- return Type::NEHALEM;
- } else {
- VLOG(2) << "Will use folly::compression::instructions::Default";
- return Type::DEFAULT;
- }
- }();
- return type;
- }
- template <class F>
- auto dispatch(Type type, F&& f) -> decltype(f(std::declval<Default>())) {
- switch (type) {
- case Type::HASWELL:
- return f(Haswell());
- case Type::NEHALEM:
- return f(Nehalem());
- case Type::DEFAULT:
- return f(Default());
- }
- assume_unreachable();
- }
- template <class F>
- auto dispatch(F&& f) -> decltype(f(std::declval<Default>())) {
- return dispatch(detect(), std::forward<F>(f));
- }
- } // namespace instructions
- } // namespace compression
- } // namespace folly
|