UninitializedMemoryHacks.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336
  1. /*
  2. * Copyright 2017-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <string>
  18. #include <type_traits>
  19. #include <vector>
  20. namespace {
  21. // This struct is different in every translation unit. We use template
  22. // instantiations to define inline freestanding methods. Since the
  23. // methods are inline it is fine to define them in multiple translation
  24. // units, but the instantiation itself would be an ODR violation if it is
  25. // present in the program more than once. By tagging the instantiations
  26. // with this struct, we avoid ODR problems for the instantiation while
  27. // allowing the resulting methods to be inline-able. If you think that
  28. // seems hacky keep reading...
  29. struct FollyMemoryDetailTranslationUnitTag {};
  30. } // namespace
  31. namespace folly {
  32. namespace detail {
  33. void unsafeStringSetLargerSize(std::string& s, std::size_t n);
  34. template <typename T>
  35. void unsafeVectorSetLargerSize(std::vector<T>& v, std::size_t n);
  36. } // namespace detail
  37. /*
  38. * This file provides helper functions resizeWithoutInitialization()
  39. * that can resize std::string or std::vector without constructing or
  40. * initializing new elements.
  41. *
  42. * IMPORTANT: These functions can be unsafe if used improperly. If you
  43. * don't write to an element with index >= oldSize and < newSize, reading
  44. * the element can expose arbitrary memory contents to the world, including
  45. * the contents of old strings. If you're lucky you'll get a segfault,
  46. * because the kernel is only required to fault in new pages on write
  47. * access. MSAN should be able to catch problems in the common case that
  48. * the string or vector wasn't previously shrunk.
  49. *
  50. * Pay extra attention to your failure paths. For example, if you try
  51. * to read directly into a caller-provided string, make sure to clear
  52. * the string when you get an I/O error.
  53. *
  54. * You should only use this if you have profiling data from production
  55. * that shows that this is not a premature optimization. This code is
  56. * designed for retroactively optimizing code where touching every element
  57. * twice (or touching never-used elements once) shows up in profiling,
  58. * and where restructuring the code to use fixed-length arrays or IOBuf-s
  59. * would be difficult.
  60. *
  61. * NOTE: Just because .resize() shows up in your profile (probably
  62. * via one of the intrinsic memset implementations) doesn't mean that
  63. * these functions will make your program faster. A lot of the cost
  64. * of memset comes from cache misses, so avoiding the memset can mean
  65. * that the cache miss cost just gets pushed to the following code.
  66. * resizeWithoutInitialization can be a win when the contents are bigger
  67. * than a cache level, because the second access isn't free in that case.
  68. * It can be a win when the memory is already cached, so touching it
  69. * doesn't help later code. It can also be a win if the final length
  70. * of the string or vector isn't actually known, so the suffix will be
  71. * chopped off with a second call to .resize().
  72. */
  73. /**
  74. * Like calling s.resize(n), but when growing the string does not
  75. * initialize new elements. It is undefined behavior to read from
  76. * any element added to the string by this method unless it has been
  77. * written to by an operation that follows this call.
  78. *
  79. * IMPORTANT: Read the warning at the top of this header file.
  80. */
  81. inline void resizeWithoutInitialization(std::string& s, std::size_t n) {
  82. if (n <= s.size()) {
  83. s.resize(n);
  84. } else {
  85. // careful not to call reserve unless necessary, as it causes
  86. // shrink_to_fit on many platforms
  87. if (n > s.capacity()) {
  88. s.reserve(n);
  89. }
  90. detail::unsafeStringSetLargerSize(s, n);
  91. }
  92. }
  93. /**
  94. * Like calling v.resize(n), but when growing the vector does not construct
  95. * or initialize new elements. It is undefined behavior to read from any
  96. * element added to the vector by this method unless it has been written
  97. * to by an operation that follows this call.
  98. *
  99. * Use the FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(T) macro to
  100. * declare (and inline define) the internals required to call
  101. * resizeWithoutInitialization for a std::vector<T>. This must
  102. * be done exactly once in each translation unit that wants to call
  103. * resizeWithoutInitialization(std::vector<T>&,size_t). char and unsigned
  104. * char are provided by default. If you don't do this you will get linker
  105. * errors about folly::detail::unsafeVectorSetLargerSize. Requiring that
  106. * T be trivially_destructible is only an approximation of the property
  107. * required of T. In fact what is required is that any random sequence of
  108. * bytes may be safely reinterpreted as a T and passed to T's destructor.
  109. *
  110. * std::vector<bool> has specialized internals and is not supported.
  111. *
  112. * IMPORTANT: Read the warning at the top of this header file.
  113. */
  114. template <
  115. typename T,
  116. typename = typename std::enable_if<
  117. std::is_trivially_destructible<T>::value &&
  118. !std::is_same<T, bool>::value>::type>
  119. void resizeWithoutInitialization(std::vector<T>& v, std::size_t n) {
  120. if (n <= v.size()) {
  121. v.resize(n);
  122. } else {
  123. if (n > v.capacity()) {
  124. v.reserve(n);
  125. }
  126. detail::unsafeVectorSetLargerSize(v, n);
  127. }
  128. }
  129. namespace detail {
  130. #if defined(_LIBCPP_STRING)
  131. // libc++
  132. } // namespace detail
  133. } // namespace folly
  134. template void std::string::__set_size(std::size_t);
  135. namespace folly {
  136. namespace detail {
  137. template <typename Tag, typename T, typename A, A Ptr__set_size>
  138. struct MakeUnsafeStringSetLargerSize {
  139. friend void unsafeStringSetLargerSize(
  140. std::basic_string<T>& s,
  141. std::size_t n) {
  142. // s.__set_size(n);
  143. (s.*Ptr__set_size)(n);
  144. (&s[0])[n] = '\0';
  145. }
  146. };
  147. template struct MakeUnsafeStringSetLargerSize<
  148. FollyMemoryDetailTranslationUnitTag,
  149. char,
  150. void (std::string::*)(std::size_t),
  151. &std::string::__set_size>;
  152. #elif defined(_GLIBCXX_USE_FB)
  153. // FBString
  154. template <typename Tag, typename T, typename A, A Ptrstore_>
  155. struct MakeUnsafeStringSetLargerSize {
  156. friend void unsafeStringSetLargerSize(
  157. std::basic_string<T>& s,
  158. std::size_t n) {
  159. // s.store_.expandNoinit(n - s.size(), false);
  160. (s.*Ptrstore_).expandNoinit(n - s.size(), false);
  161. }
  162. };
  163. template struct MakeUnsafeStringSetLargerSize<
  164. FollyMemoryDetailTranslationUnitTag,
  165. char,
  166. std::fbstring_core<char>(std::string::*),
  167. &std::string::store_>;
  168. #elif defined(_GLIBCXX_STRING) && _GLIBCXX_USE_CXX11_ABI
  169. // libstdc++ new implementation with SSO
  170. } // namespace detail
  171. } // namespace folly
  172. template void std::string::_M_set_length(std::size_t);
  173. namespace folly {
  174. namespace detail {
  175. template <typename Tag, typename T, typename A, A Ptr_M_set_length>
  176. struct MakeUnsafeStringSetLargerSize {
  177. friend void unsafeStringSetLargerSize(
  178. std::basic_string<T>& s,
  179. std::size_t n) {
  180. // s._M_set_length(n);
  181. (s.*Ptr_M_set_length)(n);
  182. }
  183. };
  184. template struct MakeUnsafeStringSetLargerSize<
  185. FollyMemoryDetailTranslationUnitTag,
  186. char,
  187. void (std::string::*)(std::size_t),
  188. &std::string::_M_set_length>;
  189. #elif defined(_GLIBCXX_STRING)
  190. // libstdc++ old implementation
  191. } // namespace detail
  192. } // namespace folly
  193. template std::string::_Rep* std::string::_M_rep() const;
  194. template void std::string::_Rep::_M_set_length_and_sharable(std::size_t);
  195. namespace folly {
  196. namespace detail {
  197. template <
  198. typename Tag,
  199. typename T,
  200. typename A,
  201. A Ptr_M_rep,
  202. typename B,
  203. B Ptr_M_set_length_and_sharable>
  204. struct MakeUnsafeStringSetLargerSize {
  205. friend void unsafeStringSetLargerSize(
  206. std::basic_string<T>& s,
  207. std::size_t n) {
  208. // s._M_rep()->_M_set_length_and_sharable(n);
  209. auto rep = (s.*Ptr_M_rep)();
  210. (rep->*Ptr_M_set_length_and_sharable)(n);
  211. }
  212. };
  213. template struct MakeUnsafeStringSetLargerSize<
  214. FollyMemoryDetailTranslationUnitTag,
  215. char,
  216. std::string::_Rep* (std::string::*)() const,
  217. &std::string::_M_rep,
  218. void (std::string::_Rep::*)(std::size_t),
  219. &std::string::_Rep::_M_set_length_and_sharable>;
  220. #elif defined(_MSC_VER)
  221. // MSVC
  222. inline void unsafeStringSetLargerSize(std::string& s, std::size_t n) {
  223. s._Eos(n);
  224. }
  225. #else
  226. #warning "No implementation for resizeWithoutInitialization of std::string"
  227. #endif
  228. // This machinery bridges template expansion and macro expansion
  229. #define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE) \
  230. namespace folly { \
  231. namespace detail { \
  232. void unsafeVectorSetLargerSizeImpl(std::vector<TYPE>& v, std::size_t); \
  233. template <> \
  234. inline void unsafeVectorSetLargerSize<TYPE>( \
  235. std::vector<TYPE> & v, \
  236. std::size_t n) { \
  237. unsafeVectorSetLargerSizeImpl(v, n); \
  238. } \
  239. } \
  240. }
  241. #if defined(_LIBCPP_VECTOR)
  242. // libc++
  243. template <typename Tag, typename T, typename A, A Ptr__end_>
  244. struct MakeUnsafeVectorSetLargerSize {
  245. friend void unsafeVectorSetLargerSizeImpl(std::vector<T>& v, std::size_t n) {
  246. // v.__end_ += (n - v.size());
  247. using Base = std::__vector_base<T, std::allocator<T>>;
  248. static_assert(
  249. std::is_standard_layout<std::vector<T>>::value &&
  250. sizeof(std::vector<T>) == sizeof(Base),
  251. "reinterpret_cast safety conditions not met");
  252. reinterpret_cast<Base&>(v).*Ptr__end_ += (n - v.size());
  253. }
  254. };
  255. #define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \
  256. template struct folly::detail::MakeUnsafeVectorSetLargerSize< \
  257. FollyMemoryDetailTranslationUnitTag, \
  258. TYPE, \
  259. TYPE*(std::__vector_base<TYPE, std::allocator<TYPE>>::*), \
  260. &std::vector<TYPE>::__end_>; \
  261. FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE)
  262. #elif defined(_GLIBCXX_VECTOR)
  263. // libstdc++
  264. template <
  265. typename Tag,
  266. typename T,
  267. typename A,
  268. A Ptr_M_impl,
  269. typename B,
  270. B Ptr_M_finish>
  271. struct MakeUnsafeVectorSetLargerSize : std::vector<T> {
  272. friend void unsafeVectorSetLargerSizeImpl(std::vector<T>& v, std::size_t n) {
  273. // v._M_impl._M_finish += (n - v.size());
  274. (v.*Ptr_M_impl).*Ptr_M_finish += (n - v.size());
  275. }
  276. };
  277. #define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \
  278. template struct folly::detail::MakeUnsafeVectorSetLargerSize< \
  279. FollyMemoryDetailTranslationUnitTag, \
  280. TYPE, \
  281. std::vector<TYPE>::_Vector_impl( \
  282. std::_Vector_base<TYPE, std::allocator<TYPE>>::*), \
  283. &std::vector<TYPE>::_M_impl, \
  284. TYPE*(std::vector<TYPE>::_Vector_impl::*), \
  285. &std::vector<TYPE>::_Vector_impl::_M_finish>; \
  286. FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE)
  287. #elif defined(_MSC_VER)
  288. // MSVC
  289. #define FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(TYPE) \
  290. extern inline void unsafeVectorSetLargerSizeImpl( \
  291. std::vector<TYPE>& v, std::size_t n) { \
  292. v._Mylast() += (n - v.size()); \
  293. } \
  294. FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT_IMPL(TYPE)
  295. #else
  296. #warning "No implementation for resizeWithoutInitialization of std::vector"
  297. #endif
  298. } // namespace detail
  299. } // namespace folly
  300. #if defined(FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT)
  301. FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(char)
  302. FOLLY_DECLARE_VECTOR_RESIZE_WITHOUT_INIT(unsigned char)
  303. #endif