String.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759
  1. /*
  2. * Copyright 2012-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/String.h>
  17. #include <cctype>
  18. #include <cerrno>
  19. #include <cstdarg>
  20. #include <cstring>
  21. #include <iterator>
  22. #include <sstream>
  23. #include <stdexcept>
  24. #include <glog/logging.h>
  25. #include <folly/Portability.h>
  26. #include <folly/ScopeGuard.h>
  27. #include <folly/container/Array.h>
  28. namespace folly {
  29. static_assert(IsConvertible<float>::value, "");
  30. static_assert(IsConvertible<int>::value, "");
  31. static_assert(IsConvertible<bool>::value, "");
  32. static_assert(IsConvertible<int>::value, "");
  33. static_assert(!IsConvertible<std::vector<int>>::value, "");
  34. namespace detail {
  35. struct string_table_c_escape_make_item {
  36. constexpr char operator()(std::size_t index) const {
  37. // clang-format off
  38. return
  39. index == '"' ? '"' :
  40. index == '\\' ? '\\' :
  41. index == '?' ? '?' :
  42. index == '\n' ? 'n' :
  43. index == '\r' ? 'r' :
  44. index == '\t' ? 't' :
  45. index < 32 || index > 126 ? 'O' : // octal
  46. 'P'; // printable
  47. // clang-format on
  48. }
  49. };
  50. struct string_table_c_unescape_make_item {
  51. constexpr char operator()(std::size_t index) const {
  52. // clang-format off
  53. return
  54. index == '\'' ? '\'' :
  55. index == '?' ? '?' :
  56. index == '\\' ? '\\' :
  57. index == '"' ? '"' :
  58. index == 'a' ? '\a' :
  59. index == 'b' ? '\b' :
  60. index == 'f' ? '\f' :
  61. index == 'n' ? '\n' :
  62. index == 'r' ? '\r' :
  63. index == 't' ? '\t' :
  64. index == 'v' ? '\v' :
  65. index >= '0' && index <= '7' ? 'O' : // octal
  66. index == 'x' ? 'X' : // hex
  67. 'I'; // invalid
  68. // clang-format on
  69. }
  70. };
  71. struct string_table_hex_make_item {
  72. constexpr unsigned char operator()(std::size_t index) const {
  73. // clang-format off
  74. return
  75. index >= '0' && index <= '9' ? index - '0' :
  76. index >= 'a' && index <= 'f' ? index - 'a' + 10 :
  77. index >= 'A' && index <= 'F' ? index - 'A' + 10 :
  78. 16;
  79. // clang-format on
  80. }
  81. };
  82. struct string_table_uri_escape_make_item {
  83. // 0 = passthrough
  84. // 1 = unused
  85. // 2 = safe in path (/)
  86. // 3 = space (replace with '+' in query)
  87. // 4 = always percent-encode
  88. constexpr unsigned char operator()(std::size_t index) const {
  89. // clang-format off
  90. return
  91. index >= '0' && index <= '9' ? 0 :
  92. index >= 'A' && index <= 'Z' ? 0 :
  93. index >= 'a' && index <= 'z' ? 0 :
  94. index == '-' ? 0 :
  95. index == '_' ? 0 :
  96. index == '.' ? 0 :
  97. index == '~' ? 0 :
  98. index == '/' ? 2 :
  99. index == ' ' ? 3 :
  100. 4;
  101. // clang-format on
  102. }
  103. };
  104. FOLLY_STORAGE_CONSTEXPR decltype(cEscapeTable) cEscapeTable =
  105. make_array_with<256>(string_table_c_escape_make_item{});
  106. FOLLY_STORAGE_CONSTEXPR decltype(cUnescapeTable) cUnescapeTable =
  107. make_array_with<256>(string_table_c_unescape_make_item{});
  108. FOLLY_STORAGE_CONSTEXPR decltype(hexTable) hexTable =
  109. make_array_with<256>(string_table_hex_make_item{});
  110. FOLLY_STORAGE_CONSTEXPR decltype(uriEscapeTable) uriEscapeTable =
  111. make_array_with<256>(string_table_uri_escape_make_item{});
  112. } // namespace detail
  113. static inline bool is_oddspace(char c) {
  114. return c == '\n' || c == '\t' || c == '\r';
  115. }
  116. StringPiece ltrimWhitespace(StringPiece sp) {
  117. // Spaces other than ' ' characters are less common but should be
  118. // checked. This configuration where we loop on the ' '
  119. // separately from oddspaces was empirically fastest.
  120. while (true) {
  121. while (!sp.empty() && sp.front() == ' ') {
  122. sp.pop_front();
  123. }
  124. if (!sp.empty() && is_oddspace(sp.front())) {
  125. sp.pop_front();
  126. continue;
  127. }
  128. return sp;
  129. }
  130. }
  131. StringPiece rtrimWhitespace(StringPiece sp) {
  132. // Spaces other than ' ' characters are less common but should be
  133. // checked. This configuration where we loop on the ' '
  134. // separately from oddspaces was empirically fastest.
  135. while (true) {
  136. while (!sp.empty() && sp.back() == ' ') {
  137. sp.pop_back();
  138. }
  139. if (!sp.empty() && is_oddspace(sp.back())) {
  140. sp.pop_back();
  141. continue;
  142. }
  143. return sp;
  144. }
  145. }
  146. namespace {
  147. int stringAppendfImplHelper(
  148. char* buf,
  149. size_t bufsize,
  150. const char* format,
  151. va_list args) {
  152. va_list args_copy;
  153. va_copy(args_copy, args);
  154. int bytes_used = vsnprintf(buf, bufsize, format, args_copy);
  155. va_end(args_copy);
  156. return bytes_used;
  157. }
  158. void stringAppendfImpl(std::string& output, const char* format, va_list args) {
  159. // Very simple; first, try to avoid an allocation by using an inline
  160. // buffer. If that fails to hold the output string, allocate one on
  161. // the heap, use it instead.
  162. //
  163. // It is hard to guess the proper size of this buffer; some
  164. // heuristics could be based on the number of format characters, or
  165. // static analysis of a codebase. Or, we can just pick a number
  166. // that seems big enough for simple cases (say, one line of text on
  167. // a terminal) without being large enough to be concerning as a
  168. // stack variable.
  169. std::array<char, 128> inline_buffer;
  170. int bytes_used = stringAppendfImplHelper(
  171. inline_buffer.data(), inline_buffer.size(), format, args);
  172. if (bytes_used < 0) {
  173. throw std::runtime_error(to<std::string>(
  174. "Invalid format string; snprintf returned negative "
  175. "with format string: ",
  176. format));
  177. }
  178. if (static_cast<size_t>(bytes_used) < inline_buffer.size()) {
  179. output.append(inline_buffer.data(), size_t(bytes_used));
  180. return;
  181. }
  182. // Couldn't fit. Heap allocate a buffer, oh well.
  183. std::unique_ptr<char[]> heap_buffer(new char[size_t(bytes_used + 1)]);
  184. int final_bytes_used = stringAppendfImplHelper(
  185. heap_buffer.get(), size_t(bytes_used + 1), format, args);
  186. // The second call can take fewer bytes if, for example, we were printing a
  187. // string buffer with null-terminating char using a width specifier -
  188. // vsnprintf("%.*s", buf.size(), buf)
  189. CHECK(bytes_used >= final_bytes_used);
  190. // We don't keep the trailing '\0' in our output string
  191. output.append(heap_buffer.get(), size_t(final_bytes_used));
  192. }
  193. } // namespace
  194. std::string stringPrintf(const char* format, ...) {
  195. va_list ap;
  196. va_start(ap, format);
  197. SCOPE_EXIT {
  198. va_end(ap);
  199. };
  200. return stringVPrintf(format, ap);
  201. }
  202. std::string stringVPrintf(const char* format, va_list ap) {
  203. std::string ret;
  204. stringAppendfImpl(ret, format, ap);
  205. return ret;
  206. }
  207. // Basic declarations; allow for parameters of strings and string
  208. // pieces to be specified.
  209. std::string& stringAppendf(std::string* output, const char* format, ...) {
  210. va_list ap;
  211. va_start(ap, format);
  212. SCOPE_EXIT {
  213. va_end(ap);
  214. };
  215. return stringVAppendf(output, format, ap);
  216. }
  217. std::string&
  218. stringVAppendf(std::string* output, const char* format, va_list ap) {
  219. stringAppendfImpl(*output, format, ap);
  220. return *output;
  221. }
  222. void stringPrintf(std::string* output, const char* format, ...) {
  223. va_list ap;
  224. va_start(ap, format);
  225. SCOPE_EXIT {
  226. va_end(ap);
  227. };
  228. return stringVPrintf(output, format, ap);
  229. }
  230. void stringVPrintf(std::string* output, const char* format, va_list ap) {
  231. output->clear();
  232. stringAppendfImpl(*output, format, ap);
  233. }
  234. namespace {
  235. struct PrettySuffix {
  236. const char* suffix;
  237. double val;
  238. };
  239. const PrettySuffix kPrettyTimeSuffixes[] = {
  240. {"s ", 1e0L},
  241. {"ms", 1e-3L},
  242. {"us", 1e-6L},
  243. {"ns", 1e-9L},
  244. {"ps", 1e-12L},
  245. {"s ", 0},
  246. {nullptr, 0},
  247. };
  248. const PrettySuffix kPrettyTimeHmsSuffixes[] = {
  249. {"h ", 60L * 60L},
  250. {"m ", 60L},
  251. {"s ", 1e0L},
  252. {"ms", 1e-3L},
  253. {"us", 1e-6L},
  254. {"ns", 1e-9L},
  255. {"ps", 1e-12L},
  256. {"s ", 0},
  257. {nullptr, 0},
  258. };
  259. const PrettySuffix kPrettyBytesMetricSuffixes[] = {
  260. {"EB", 1e18L},
  261. {"PB", 1e15L},
  262. {"TB", 1e12L},
  263. {"GB", 1e9L},
  264. {"MB", 1e6L},
  265. {"kB", 1e3L},
  266. {"B ", 0L},
  267. {nullptr, 0},
  268. };
  269. const PrettySuffix kPrettyBytesBinarySuffixes[] = {
  270. {"EB", int64_t(1) << 60},
  271. {"PB", int64_t(1) << 50},
  272. {"TB", int64_t(1) << 40},
  273. {"GB", int64_t(1) << 30},
  274. {"MB", int64_t(1) << 20},
  275. {"kB", int64_t(1) << 10},
  276. {"B ", 0L},
  277. {nullptr, 0},
  278. };
  279. const PrettySuffix kPrettyBytesBinaryIECSuffixes[] = {
  280. {"EiB", int64_t(1) << 60},
  281. {"PiB", int64_t(1) << 50},
  282. {"TiB", int64_t(1) << 40},
  283. {"GiB", int64_t(1) << 30},
  284. {"MiB", int64_t(1) << 20},
  285. {"KiB", int64_t(1) << 10},
  286. {"B ", 0L},
  287. {nullptr, 0},
  288. };
  289. const PrettySuffix kPrettyUnitsMetricSuffixes[] = {
  290. {"qntl", 1e18L},
  291. {"qdrl", 1e15L},
  292. {"tril", 1e12L},
  293. {"bil", 1e9L},
  294. {"M", 1e6L},
  295. {"k", 1e3L},
  296. {" ", 0},
  297. {nullptr, 0},
  298. };
  299. const PrettySuffix kPrettyUnitsBinarySuffixes[] = {
  300. {"E", int64_t(1) << 60},
  301. {"P", int64_t(1) << 50},
  302. {"T", int64_t(1) << 40},
  303. {"G", int64_t(1) << 30},
  304. {"M", int64_t(1) << 20},
  305. {"k", int64_t(1) << 10},
  306. {" ", 0},
  307. {nullptr, 0},
  308. };
  309. const PrettySuffix kPrettyUnitsBinaryIECSuffixes[] = {
  310. {"Ei", int64_t(1) << 60},
  311. {"Pi", int64_t(1) << 50},
  312. {"Ti", int64_t(1) << 40},
  313. {"Gi", int64_t(1) << 30},
  314. {"Mi", int64_t(1) << 20},
  315. {"Ki", int64_t(1) << 10},
  316. {" ", 0},
  317. {nullptr, 0},
  318. };
  319. const PrettySuffix kPrettySISuffixes[] = {
  320. {"Y", 1e24L}, {"Z", 1e21L}, {"E", 1e18L}, {"P", 1e15L}, {"T", 1e12L},
  321. {"G", 1e9L}, {"M", 1e6L}, {"k", 1e3L}, {"h", 1e2L}, {"da", 1e1L},
  322. {"d", 1e-1L}, {"c", 1e-2L}, {"m", 1e-3L}, {"u", 1e-6L}, {"n", 1e-9L},
  323. {"p", 1e-12L}, {"f", 1e-15L}, {"a", 1e-18L}, {"z", 1e-21L}, {"y", 1e-24L},
  324. {" ", 0}, {nullptr, 0},
  325. };
  326. const PrettySuffix* const kPrettySuffixes[PRETTY_NUM_TYPES] = {
  327. kPrettyTimeSuffixes,
  328. kPrettyTimeHmsSuffixes,
  329. kPrettyBytesMetricSuffixes,
  330. kPrettyBytesBinarySuffixes,
  331. kPrettyBytesBinaryIECSuffixes,
  332. kPrettyUnitsMetricSuffixes,
  333. kPrettyUnitsBinarySuffixes,
  334. kPrettyUnitsBinaryIECSuffixes,
  335. kPrettySISuffixes,
  336. };
  337. } // namespace
  338. std::string prettyPrint(double val, PrettyType type, bool addSpace) {
  339. char buf[100];
  340. // pick the suffixes to use
  341. assert(type >= 0);
  342. assert(type < PRETTY_NUM_TYPES);
  343. const PrettySuffix* suffixes = kPrettySuffixes[type];
  344. // find the first suffix we're bigger than -- then use it
  345. double abs_val = fabs(val);
  346. for (int i = 0; suffixes[i].suffix; ++i) {
  347. if (abs_val >= suffixes[i].val) {
  348. snprintf(
  349. buf,
  350. sizeof buf,
  351. "%.4g%s%s",
  352. (suffixes[i].val ? (val / suffixes[i].val) : val),
  353. (addSpace ? " " : ""),
  354. suffixes[i].suffix);
  355. return std::string(buf);
  356. }
  357. }
  358. // no suffix, we've got a tiny value -- just print it in sci-notation
  359. snprintf(buf, sizeof buf, "%.4g", val);
  360. return std::string(buf);
  361. }
  362. // TODO:
  363. // 1) Benchmark & optimize
  364. double prettyToDouble(
  365. folly::StringPiece* const prettyString,
  366. const PrettyType type) {
  367. double value = folly::to<double>(prettyString);
  368. while (prettyString->size() > 0 && std::isspace(prettyString->front())) {
  369. prettyString->advance(1); // Skipping spaces between number and suffix
  370. }
  371. const PrettySuffix* suffixes = kPrettySuffixes[type];
  372. int longestPrefixLen = -1;
  373. int bestPrefixId = -1;
  374. for (int j = 0; suffixes[j].suffix; ++j) {
  375. if (suffixes[j].suffix[0] == ' ') { // Checking for " " -> number rule.
  376. if (longestPrefixLen == -1) {
  377. longestPrefixLen = 0; // No characters to skip
  378. bestPrefixId = j;
  379. }
  380. } else if (prettyString->startsWith(suffixes[j].suffix)) {
  381. int suffixLen = int(strlen(suffixes[j].suffix));
  382. // We are looking for a longest suffix matching prefix of the string
  383. // after numeric value. We need this in case suffixes have common prefix.
  384. if (suffixLen > longestPrefixLen) {
  385. longestPrefixLen = suffixLen;
  386. bestPrefixId = j;
  387. }
  388. }
  389. }
  390. if (bestPrefixId == -1) { // No valid suffix rule found
  391. throw std::invalid_argument(folly::to<std::string>(
  392. "Unable to parse suffix \"", *prettyString, "\""));
  393. }
  394. prettyString->advance(size_t(longestPrefixLen));
  395. return suffixes[bestPrefixId].val ? value * suffixes[bestPrefixId].val
  396. : value;
  397. }
  398. double prettyToDouble(folly::StringPiece prettyString, const PrettyType type) {
  399. double result = prettyToDouble(&prettyString, type);
  400. detail::enforceWhitespace(prettyString);
  401. return result;
  402. }
  403. std::string hexDump(const void* ptr, size_t size) {
  404. std::ostringstream os;
  405. hexDump(ptr, size, std::ostream_iterator<StringPiece>(os, "\n"));
  406. return os.str();
  407. }
  408. fbstring errnoStr(int err) {
  409. int savedErrno = errno;
  410. // Ensure that we reset errno upon exit.
  411. auto guard(makeGuard([&] { errno = savedErrno; }));
  412. char buf[1024];
  413. buf[0] = '\0';
  414. fbstring result;
  415. // https://developer.apple.com/library/mac/documentation/Darwin/Reference/ManPages/man3/strerror_r.3.html
  416. // http://www.kernel.org/doc/man-pages/online/pages/man3/strerror.3.html
  417. #if defined(_WIN32) && (defined(__MINGW32__) || defined(_MSC_VER))
  418. // mingw64 has no strerror_r, but Windows has strerror_s, which C11 added
  419. // as well. So maybe we should use this across all platforms (together
  420. // with strerrorlen_s). Note strerror_r and _s have swapped args.
  421. int r = strerror_s(buf, sizeof(buf), err);
  422. if (r != 0) {
  423. result = to<fbstring>(
  424. "Unknown error ", err, " (strerror_r failed with error ", errno, ")");
  425. } else {
  426. result.assign(buf);
  427. }
  428. #elif FOLLY_HAVE_XSI_STRERROR_R || defined(__APPLE__)
  429. // Using XSI-compatible strerror_r
  430. int r = strerror_r(err, buf, sizeof(buf));
  431. // OSX/FreeBSD use EINVAL and Linux uses -1 so just check for non-zero
  432. if (r != 0) {
  433. result = to<fbstring>(
  434. "Unknown error ", err, " (strerror_r failed with error ", errno, ")");
  435. } else {
  436. result.assign(buf);
  437. }
  438. #else
  439. // Using GNU strerror_r
  440. result.assign(strerror_r(err, buf, sizeof(buf)));
  441. #endif
  442. return result;
  443. }
  444. namespace {
  445. void toLowerAscii8(char& c) {
  446. // Branchless tolower, based on the input-rotating trick described
  447. // at http://www.azillionmonkeys.com/qed/asmexample.html
  448. //
  449. // This algorithm depends on an observation: each uppercase
  450. // ASCII character can be converted to its lowercase equivalent
  451. // by adding 0x20.
  452. // Step 1: Clear the high order bit. We'll deal with it in Step 5.
  453. uint8_t rotated = uint8_t(c & 0x7f);
  454. // Currently, the value of rotated, as a function of the original c is:
  455. // below 'A': 0- 64
  456. // 'A'-'Z': 65- 90
  457. // above 'Z': 91-127
  458. // Step 2: Add 0x25 (37)
  459. rotated += 0x25;
  460. // Now the value of rotated, as a function of the original c is:
  461. // below 'A': 37-101
  462. // 'A'-'Z': 102-127
  463. // above 'Z': 128-164
  464. // Step 3: clear the high order bit
  465. rotated &= 0x7f;
  466. // below 'A': 37-101
  467. // 'A'-'Z': 102-127
  468. // above 'Z': 0- 36
  469. // Step 4: Add 0x1a (26)
  470. rotated += 0x1a;
  471. // below 'A': 63-127
  472. // 'A'-'Z': 128-153
  473. // above 'Z': 25- 62
  474. // At this point, note that only the uppercase letters have been
  475. // transformed into values with the high order bit set (128 and above).
  476. // Step 5: Shift the high order bit 2 spaces to the right: the spot
  477. // where the only 1 bit in 0x20 is. But first, how we ignored the
  478. // high order bit of the original c in step 1? If that bit was set,
  479. // we may have just gotten a false match on a value in the range
  480. // 128+'A' to 128+'Z'. To correct this, need to clear the high order
  481. // bit of rotated if the high order bit of c is set. Since we don't
  482. // care about the other bits in rotated, the easiest thing to do
  483. // is invert all the bits in c and bitwise-and them with rotated.
  484. rotated &= ~c;
  485. rotated >>= 2;
  486. // Step 6: Apply a mask to clear everything except the 0x20 bit
  487. // in rotated.
  488. rotated &= 0x20;
  489. // At this point, rotated is 0x20 if c is 'A'-'Z' and 0x00 otherwise
  490. // Step 7: Add rotated to c
  491. c += char(rotated);
  492. }
  493. void toLowerAscii32(uint32_t& c) {
  494. // Besides being branchless, the algorithm in toLowerAscii8() has another
  495. // interesting property: None of the addition operations will cause
  496. // an overflow in the 8-bit value. So we can pack four 8-bit values
  497. // into a uint32_t and run each operation on all four values in parallel
  498. // without having to use any CPU-specific SIMD instructions.
  499. uint32_t rotated = c & uint32_t(0x7f7f7f7fL);
  500. rotated += uint32_t(0x25252525L);
  501. rotated &= uint32_t(0x7f7f7f7fL);
  502. rotated += uint32_t(0x1a1a1a1aL);
  503. // Step 5 involves a shift, so some bits will spill over from each
  504. // 8-bit value into the next. But that's okay, because they're bits
  505. // that will be cleared by the mask in step 6 anyway.
  506. rotated &= ~c;
  507. rotated >>= 2;
  508. rotated &= uint32_t(0x20202020L);
  509. c += rotated;
  510. }
  511. void toLowerAscii64(uint64_t& c) {
  512. // 64-bit version of toLower32
  513. uint64_t rotated = c & uint64_t(0x7f7f7f7f7f7f7f7fL);
  514. rotated += uint64_t(0x2525252525252525L);
  515. rotated &= uint64_t(0x7f7f7f7f7f7f7f7fL);
  516. rotated += uint64_t(0x1a1a1a1a1a1a1a1aL);
  517. rotated &= ~c;
  518. rotated >>= 2;
  519. rotated &= uint64_t(0x2020202020202020L);
  520. c += rotated;
  521. }
  522. } // namespace
  523. void toLowerAscii(char* str, size_t length) {
  524. static const size_t kAlignMask64 = 7;
  525. static const size_t kAlignMask32 = 3;
  526. // Convert a character at a time until we reach an address that
  527. // is at least 32-bit aligned
  528. size_t n = (size_t)str;
  529. n &= kAlignMask32;
  530. n = std::min(n, length);
  531. size_t offset = 0;
  532. if (n != 0) {
  533. n = std::min(4 - n, length);
  534. do {
  535. toLowerAscii8(str[offset]);
  536. offset++;
  537. } while (offset < n);
  538. }
  539. n = (size_t)(str + offset);
  540. n &= kAlignMask64;
  541. if ((n != 0) && (offset + 4 <= length)) {
  542. // The next address is 32-bit aligned but not 64-bit aligned.
  543. // Convert the next 4 bytes in order to get to the 64-bit aligned
  544. // part of the input.
  545. toLowerAscii32(*(uint32_t*)(str + offset));
  546. offset += 4;
  547. }
  548. // Convert 8 characters at a time
  549. while (offset + 8 <= length) {
  550. toLowerAscii64(*(uint64_t*)(str + offset));
  551. offset += 8;
  552. }
  553. // Convert 4 characters at a time
  554. while (offset + 4 <= length) {
  555. toLowerAscii32(*(uint32_t*)(str + offset));
  556. offset += 4;
  557. }
  558. // Convert any characters remaining after the last 4-byte aligned group
  559. while (offset < length) {
  560. toLowerAscii8(str[offset]);
  561. offset++;
  562. }
  563. }
  564. namespace detail {
  565. size_t
  566. hexDumpLine(const void* ptr, size_t offset, size_t size, std::string& line) {
  567. static char hexValues[] = "0123456789abcdef";
  568. // Line layout:
  569. // 8: address
  570. // 1: space
  571. // (1+2)*16: hex bytes, each preceded by a space
  572. // 1: space separating the two halves
  573. // 3: " |"
  574. // 16: characters
  575. // 1: "|"
  576. // Total: 78
  577. line.clear();
  578. line.reserve(78);
  579. const uint8_t* p = reinterpret_cast<const uint8_t*>(ptr) + offset;
  580. size_t n = std::min(size - offset, size_t(16));
  581. line.push_back(hexValues[(offset >> 28) & 0xf]);
  582. line.push_back(hexValues[(offset >> 24) & 0xf]);
  583. line.push_back(hexValues[(offset >> 20) & 0xf]);
  584. line.push_back(hexValues[(offset >> 16) & 0xf]);
  585. line.push_back(hexValues[(offset >> 12) & 0xf]);
  586. line.push_back(hexValues[(offset >> 8) & 0xf]);
  587. line.push_back(hexValues[(offset >> 4) & 0xf]);
  588. line.push_back(hexValues[offset & 0xf]);
  589. line.push_back(' ');
  590. for (size_t i = 0; i < n; i++) {
  591. if (i == 8) {
  592. line.push_back(' ');
  593. }
  594. line.push_back(' ');
  595. line.push_back(hexValues[(p[i] >> 4) & 0xf]);
  596. line.push_back(hexValues[p[i] & 0xf]);
  597. }
  598. // 3 spaces for each byte we're not printing, one separating the halves
  599. // if necessary
  600. line.append(3 * (16 - n) + (n <= 8), ' ');
  601. line.append(" |");
  602. for (size_t i = 0; i < n; i++) {
  603. char c = (p[i] >= 32 && p[i] <= 126 ? static_cast<char>(p[i]) : '.');
  604. line.push_back(c);
  605. }
  606. line.append(16 - n, ' ');
  607. line.push_back('|');
  608. DCHECK_EQ(line.size(), 78u);
  609. return n;
  610. }
  611. } // namespace detail
  612. std::string stripLeftMargin(std::string s) {
  613. std::vector<StringPiece> pieces;
  614. split("\n", s, pieces);
  615. auto piecer = range(pieces);
  616. auto piece = (piecer.end() - 1);
  617. auto needle = std::find_if(piece->begin(), piece->end(), [](char c) {
  618. return c != ' ' && c != '\t';
  619. });
  620. if (needle == piece->end()) {
  621. (piecer.end() - 1)->clear();
  622. }
  623. piece = piecer.begin();
  624. needle = std::find_if(piece->begin(), piece->end(), [](char c) {
  625. return c != ' ' && c != '\t';
  626. });
  627. if (needle == piece->end()) {
  628. piecer.erase(piecer.begin(), piecer.begin() + 1);
  629. }
  630. const auto sentinel = std::numeric_limits<size_t>::max();
  631. auto indent = sentinel;
  632. size_t max_length = 0;
  633. for (piece = piecer.begin(); piece != piecer.end(); piece++) {
  634. needle = std::find_if(piece->begin(), piece->end(), [](char c) {
  635. return c != ' ' && c != '\t';
  636. });
  637. if (needle != piece->end()) {
  638. indent = std::min<size_t>(indent, size_t(needle - piece->begin()));
  639. } else {
  640. max_length = std::max<size_t>(piece->size(), max_length);
  641. }
  642. }
  643. indent = indent == sentinel ? max_length : indent;
  644. for (piece = piecer.begin(); piece != piecer.end(); piece++) {
  645. if (piece->size() < indent) {
  646. piece->clear();
  647. } else {
  648. piece->erase(piece->begin(), piece->begin() + indent);
  649. }
  650. }
  651. return join("\n", piecer);
  652. }
  653. } // namespace folly
  654. #ifdef FOLLY_DEFINED_DMGL
  655. #undef FOLLY_DEFINED_DMGL
  656. #undef DMGL_NO_OPTS
  657. #undef DMGL_PARAMS
  658. #undef DMGL_ANSI
  659. #undef DMGL_JAVA
  660. #undef DMGL_VERBOSE
  661. #undef DMGL_TYPES
  662. #undef DMGL_RET_POSTFIX
  663. #endif