Varint.h 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <type_traits>
  18. #include <folly/Conv.h>
  19. #include <folly/Expected.h>
  20. #include <folly/Likely.h>
  21. #include <folly/Portability.h>
  22. #include <folly/Range.h>
  23. namespace folly {
  24. /**
  25. * Variable-length integer encoding, using a little-endian, base-128
  26. * representation.
  27. *
  28. * The MSb is set on all bytes except the last.
  29. *
  30. * Details:
  31. * https://developers.google.com/protocol-buffers/docs/encoding#varints
  32. *
  33. * If you want to encode multiple values, GroupVarint (in GroupVarint.h)
  34. * is faster and likely smaller.
  35. */
  36. /**
  37. * Maximum length (in bytes) of the varint encoding of a 32-bit value.
  38. */
  39. constexpr size_t kMaxVarintLength32 = 5;
  40. /**
  41. * Maximum length (in bytes) of the varint encoding of a 64-bit value.
  42. */
  43. constexpr size_t kMaxVarintLength64 = 10;
  44. /**
  45. * Encode a value in the given buffer, returning the number of bytes used
  46. * for encoding.
  47. * buf must have enough space to represent the value (at least
  48. * kMaxVarintLength64 bytes to encode arbitrary 64-bit values)
  49. */
  50. size_t encodeVarint(uint64_t val, uint8_t* buf);
  51. /**
  52. * Determine the number of bytes needed to represent "val".
  53. * 32-bit values need at most 5 bytes.
  54. * 64-bit values need at most 10 bytes.
  55. */
  56. int encodeVarintSize(uint64_t val);
  57. /**
  58. * Decode a value from a given buffer, advances data past the returned value.
  59. * Throws on error.
  60. */
  61. template <class T>
  62. uint64_t decodeVarint(Range<T*>& data);
  63. enum class DecodeVarintError {
  64. TooManyBytes = 0,
  65. TooFewBytes = 1,
  66. };
  67. /**
  68. * A variant of decodeVarint() that does not throw on error. Useful in contexts
  69. * where only part of a serialized varint may be attempted to be decoded, e.g.,
  70. * when a serialized varint arrives on the boundary of a network packet.
  71. */
  72. template <class T>
  73. Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data);
  74. /**
  75. * ZigZag encoding that maps signed integers with a small absolute value
  76. * to unsigned integers with a small (positive) values. Without this,
  77. * encoding negative values using Varint would use up 9 or 10 bytes.
  78. *
  79. * if x >= 0, encodeZigZag(x) == 2*x
  80. * if x < 0, encodeZigZag(x) == -2*x + 1
  81. */
  82. inline uint64_t encodeZigZag(int64_t val) {
  83. // Bit-twiddling magic stolen from the Google protocol buffer document;
  84. // val >> 63 is an arithmetic shift because val is signed
  85. auto uval = static_cast<uint64_t>(val);
  86. return static_cast<uint64_t>((uval << 1) ^ (val >> 63));
  87. }
  88. inline int64_t decodeZigZag(uint64_t val) {
  89. return static_cast<int64_t>((val >> 1) ^ -(val & 1));
  90. }
  91. // Implementation below
  92. inline size_t encodeVarint(uint64_t val, uint8_t* buf) {
  93. uint8_t* p = buf;
  94. while (val >= 128) {
  95. *p++ = 0x80 | (val & 0x7f);
  96. val >>= 7;
  97. }
  98. *p++ = uint8_t(val);
  99. return size_t(p - buf);
  100. }
  101. inline int encodeVarintSize(uint64_t val) {
  102. if (folly::kIsArchAmd64) {
  103. // __builtin_clzll is undefined for 0
  104. int highBit = 64 - __builtin_clzll(val | 1);
  105. return (highBit + 6) / 7;
  106. } else {
  107. int s = 1;
  108. while (val >= 128) {
  109. ++s;
  110. val >>= 7;
  111. }
  112. return s;
  113. }
  114. }
  115. template <class T>
  116. inline uint64_t decodeVarint(Range<T*>& data) {
  117. auto expected = tryDecodeVarint(data);
  118. if (!expected) {
  119. throw std::invalid_argument(
  120. expected.error() == DecodeVarintError::TooManyBytes
  121. ? "Invalid varint value: too many bytes."
  122. : "Invalid varint value: too few bytes.");
  123. }
  124. return *expected;
  125. }
  126. template <class T>
  127. inline Expected<uint64_t, DecodeVarintError> tryDecodeVarint(Range<T*>& data) {
  128. static_assert(
  129. std::is_same<typename std::remove_cv<T>::type, char>::value ||
  130. std::is_same<typename std::remove_cv<T>::type, unsigned char>::value,
  131. "Only character ranges are supported");
  132. const int8_t* begin = reinterpret_cast<const int8_t*>(data.begin());
  133. const int8_t* end = reinterpret_cast<const int8_t*>(data.end());
  134. const int8_t* p = begin;
  135. uint64_t val = 0;
  136. // end is always greater than or equal to begin, so this subtraction is safe
  137. if (LIKELY(size_t(end - begin) >= kMaxVarintLength64)) { // fast path
  138. int64_t b;
  139. do {
  140. b = *p++;
  141. val = (b & 0x7f);
  142. if (b >= 0) {
  143. break;
  144. }
  145. b = *p++;
  146. val |= (b & 0x7f) << 7;
  147. if (b >= 0) {
  148. break;
  149. }
  150. b = *p++;
  151. val |= (b & 0x7f) << 14;
  152. if (b >= 0) {
  153. break;
  154. }
  155. b = *p++;
  156. val |= (b & 0x7f) << 21;
  157. if (b >= 0) {
  158. break;
  159. }
  160. b = *p++;
  161. val |= (b & 0x7f) << 28;
  162. if (b >= 0) {
  163. break;
  164. }
  165. b = *p++;
  166. val |= (b & 0x7f) << 35;
  167. if (b >= 0) {
  168. break;
  169. }
  170. b = *p++;
  171. val |= (b & 0x7f) << 42;
  172. if (b >= 0) {
  173. break;
  174. }
  175. b = *p++;
  176. val |= (b & 0x7f) << 49;
  177. if (b >= 0) {
  178. break;
  179. }
  180. b = *p++;
  181. val |= (b & 0x7f) << 56;
  182. if (b >= 0) {
  183. break;
  184. }
  185. b = *p++;
  186. val |= (b & 0x01) << 63;
  187. if (b >= 0) {
  188. break;
  189. }
  190. return makeUnexpected(DecodeVarintError::TooManyBytes);
  191. } while (false);
  192. } else {
  193. int shift = 0;
  194. while (p != end && *p < 0) {
  195. val |= static_cast<uint64_t>(*p++ & 0x7f) << shift;
  196. shift += 7;
  197. }
  198. if (p == end) {
  199. return makeUnexpected(DecodeVarintError::TooFewBytes);
  200. }
  201. val |= static_cast<uint64_t>(*p++) << shift;
  202. }
  203. data.uncheckedAdvance(p - begin);
  204. return val;
  205. }
  206. } // namespace folly