StringBenchmark.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <atomic>
  17. #include <glog/logging.h>
  18. #include <folly/Benchmark.h>
  19. #include <folly/String.h>
  20. #include <folly/container/Foreach.h>
  21. #include <folly/gen/Base.h>
  22. #include <folly/gen/String.h>
  23. using namespace folly;
  24. using namespace folly::gen;
  25. using std::pair;
  26. using std::set;
  27. using std::tuple;
  28. using std::vector;
  29. namespace {
  30. static std::atomic<int> testSize(1000);
  31. static vector<fbstring> testStrVector =
  32. seq(1, testSize.load()) | eachTo<fbstring>() | as<vector>();
  33. static auto testFileContent = from(testStrVector) | unsplit('\n');
  34. const char* const kLine = "The quick brown fox jumped over the lazy dog.\n";
  35. const size_t kLineCount = 10000;
  36. std::string bigLines;
  37. const size_t kSmallLineSize = 17;
  38. std::vector<std::string> smallLines;
  39. void initStringResplitterBenchmark() {
  40. bigLines.reserve(kLineCount * strlen(kLine));
  41. for (size_t i = 0; i < kLineCount; ++i) {
  42. bigLines += kLine;
  43. }
  44. size_t remaining = bigLines.size();
  45. size_t pos = 0;
  46. while (remaining) {
  47. size_t n = std::min(kSmallLineSize, remaining);
  48. smallLines.push_back(bigLines.substr(pos, n));
  49. pos += n;
  50. remaining -= n;
  51. }
  52. }
  53. size_t len(folly::StringPiece s) {
  54. return s.size();
  55. }
  56. } // namespace
  57. BENCHMARK(StringResplitter_Big, iters) {
  58. size_t s = 0;
  59. while (iters--) {
  60. s += from({bigLines}) | resplit('\n') | map(&len) | sum;
  61. }
  62. folly::doNotOptimizeAway(s);
  63. }
  64. BENCHMARK_RELATIVE(StringResplitter_Small, iters) {
  65. size_t s = 0;
  66. while (iters--) {
  67. s += from(smallLines) | resplit('\n') | map(&len) | sum;
  68. }
  69. folly::doNotOptimizeAway(s);
  70. }
  71. BENCHMARK_DRAW_LINE();
  72. BENCHMARK(StringSplit_Old, iters) {
  73. size_t s = 0;
  74. std::string line(kLine);
  75. while (iters--) {
  76. std::vector<StringPiece> parts;
  77. split(' ', line, parts);
  78. s += parts.size();
  79. }
  80. folly::doNotOptimizeAway(s);
  81. }
  82. BENCHMARK_RELATIVE(StringSplit_Gen_Vector, iters) {
  83. size_t s = 0;
  84. StringPiece line(kLine);
  85. while (iters--) {
  86. s += (split(line, ' ') | as<vector>()).size();
  87. }
  88. folly::doNotOptimizeAway(s);
  89. }
  90. BENCHMARK_DRAW_LINE();
  91. BENCHMARK(StringSplit_Old_ReuseVector, iters) {
  92. size_t s = 0;
  93. std::string line(kLine);
  94. std::vector<StringPiece> parts;
  95. while (iters--) {
  96. parts.clear();
  97. split(' ', line, parts);
  98. s += parts.size();
  99. }
  100. folly::doNotOptimizeAway(s);
  101. }
  102. BENCHMARK_RELATIVE(StringSplit_Gen_ReuseVector, iters) {
  103. size_t s = 0;
  104. StringPiece line(kLine);
  105. std::vector<StringPiece> parts;
  106. while (iters--) {
  107. parts.clear();
  108. split(line, ' ') | appendTo(parts);
  109. s += parts.size();
  110. }
  111. folly::doNotOptimizeAway(s);
  112. }
  113. BENCHMARK_RELATIVE(StringSplit_Gen, iters) {
  114. size_t s = 0;
  115. StringPiece line(kLine);
  116. while (iters--) {
  117. s += split(line, ' ') | count;
  118. }
  119. folly::doNotOptimizeAway(s);
  120. }
  121. BENCHMARK_RELATIVE(StringSplit_Gen_Take, iters) {
  122. size_t s = 0;
  123. StringPiece line(kLine);
  124. while (iters--) {
  125. s += split(line, ' ') | take(10) | count;
  126. }
  127. folly::doNotOptimizeAway(s);
  128. }
  129. BENCHMARK_DRAW_LINE();
  130. BENCHMARK(StringUnsplit_Old, iters) {
  131. size_t s = 0;
  132. while (iters--) {
  133. fbstring joined;
  134. join(',', testStrVector, joined);
  135. s += joined.size();
  136. }
  137. folly::doNotOptimizeAway(s);
  138. }
  139. BENCHMARK_RELATIVE(StringUnsplit_Old_ReusedBuffer, iters) {
  140. size_t s = 0;
  141. fbstring joined;
  142. while (iters--) {
  143. joined.clear();
  144. join(',', testStrVector, joined);
  145. s += joined.size();
  146. }
  147. folly::doNotOptimizeAway(s);
  148. }
  149. BENCHMARK_RELATIVE(StringUnsplit_Gen, iters) {
  150. size_t s = 0;
  151. while (iters--) {
  152. fbstring joined = from(testStrVector) | unsplit(',');
  153. s += joined.size();
  154. }
  155. folly::doNotOptimizeAway(s);
  156. }
  157. BENCHMARK_RELATIVE(StringUnsplit_Gen_ReusedBuffer, iters) {
  158. size_t s = 0;
  159. fbstring buffer;
  160. while (iters--) {
  161. buffer.clear();
  162. from(testStrVector) | unsplit(',', &buffer);
  163. s += buffer.size();
  164. }
  165. folly::doNotOptimizeAway(s);
  166. }
  167. BENCHMARK_DRAW_LINE();
  168. void StringUnsplit_Gen(size_t iters, size_t joinSize) {
  169. std::vector<fbstring> v;
  170. BENCHMARK_SUSPEND {
  171. FOR_EACH_RANGE (i, 0, joinSize) { v.push_back(to<fbstring>(rand())); }
  172. }
  173. size_t s = 0;
  174. fbstring buffer;
  175. while (iters--) {
  176. buffer.clear();
  177. from(v) | unsplit(',', &buffer);
  178. s += buffer.size();
  179. }
  180. folly::doNotOptimizeAway(s);
  181. }
  182. BENCHMARK_PARAM(StringUnsplit_Gen, 1000)
  183. BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 2000)
  184. BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 4000)
  185. BENCHMARK_RELATIVE_PARAM(StringUnsplit_Gen, 8000)
  186. BENCHMARK_DRAW_LINE();
  187. void Lines_Gen(size_t iters, int joinSize) {
  188. size_t s = 0;
  189. StringPiece content = testFileContent;
  190. for (size_t i = 0; i < iters; ++i) {
  191. s += lines(content.subpiece(0, joinSize)) | take(100) | count;
  192. }
  193. folly::doNotOptimizeAway(s);
  194. }
  195. BENCHMARK_PARAM(Lines_Gen, 1e3)
  196. BENCHMARK_RELATIVE_PARAM(Lines_Gen, 2e3)
  197. BENCHMARK_RELATIVE_PARAM(Lines_Gen, 3e3)
  198. BENCHMARK_DRAW_LINE();
  199. // clang-format off
  200. fbstring records = seq<size_t>(1, 1000)
  201. | mapped([](size_t i) {
  202. return folly::to<fbstring>(i, ' ', i * i, ' ', i * i * i);
  203. })
  204. | unsplit('\n');
  205. // clang-format o
  206. BENCHMARK(Records_EachToTuple, iters) {
  207. size_t s = 0;
  208. for (size_t i = 0; i < iters; i += 1000) {
  209. // clang-format off
  210. s += split(records, '\n')
  211. | eachToTuple<int, size_t, StringPiece>(' ')
  212. | get<1>()
  213. | sum;
  214. // clang-format on
  215. }
  216. folly::doNotOptimizeAway(s);
  217. }
  218. BENCHMARK_RELATIVE(Records_VectorStringPieceReused, iters) {
  219. size_t s = 0;
  220. std::vector<StringPiece> fields;
  221. for (size_t i = 0; i < iters; i += 1000) {
  222. // clang-format off
  223. s += split(records, '\n')
  224. | mapped([&](StringPiece line) {
  225. fields.clear();
  226. folly::split(' ', line, fields);
  227. CHECK(fields.size() == 3);
  228. return std::make_tuple(
  229. folly::to<int>(fields[0]),
  230. folly::to<size_t>(fields[1]),
  231. StringPiece(fields[2]));
  232. })
  233. | get<1>()
  234. | sum;
  235. // clang-format on
  236. }
  237. folly::doNotOptimizeAway(s);
  238. }
  239. BENCHMARK_RELATIVE(Records_VectorStringPiece, iters) {
  240. size_t s = 0;
  241. for (size_t i = 0; i < iters; i += 1000) {
  242. // clang-format off
  243. s += split(records, '\n')
  244. | mapped([](StringPiece line) {
  245. std::vector<StringPiece> fields;
  246. folly::split(' ', line, fields);
  247. CHECK(fields.size() == 3);
  248. return std::make_tuple(
  249. folly::to<int>(fields[0]),
  250. folly::to<size_t>(fields[1]),
  251. StringPiece(fields[2]));
  252. })
  253. | get<1>()
  254. | sum;
  255. // clang-format on
  256. }
  257. folly::doNotOptimizeAway(s);
  258. }
  259. BENCHMARK_RELATIVE(Records_VectorString, iters) {
  260. size_t s = 0;
  261. for (size_t i = 0; i < iters; i += 1000) {
  262. // clang-format off
  263. s += split(records, '\n')
  264. | mapped([](StringPiece line) {
  265. std::vector<std::string> fields;
  266. folly::split(' ', line, fields);
  267. CHECK(fields.size() == 3);
  268. return std::make_tuple(
  269. folly::to<int>(fields[0]),
  270. folly::to<size_t>(fields[1]),
  271. StringPiece(fields[2]));
  272. })
  273. | get<1>()
  274. | sum;
  275. // clang-format on
  276. }
  277. folly::doNotOptimizeAway(s);
  278. }
  279. // Results from an Intel(R) Xeon(R) CPU E5-2660 0 @ 2.20GHz
  280. // ============================================================================
  281. // folly/gen/test/StringBenchmark.cpp relative time/iter iters/s
  282. // ============================================================================
  283. // StringResplitter_Big 108.58us 9.21K
  284. // StringResplitter_Small 10.60% 1.02ms 976.48
  285. // ----------------------------------------------------------------------------
  286. // StringSplit_Old 357.82ns 2.79M
  287. // StringSplit_Gen_Vector 105.10% 340.46ns 2.94M
  288. // ----------------------------------------------------------------------------
  289. // StringSplit_Old_ReuseVector 96.45ns 10.37M
  290. // StringSplit_Gen_ReuseVector 124.01% 77.78ns 12.86M
  291. // StringSplit_Gen 140.10% 68.85ns 14.52M
  292. // StringSplit_Gen_Take 122.97% 78.44ns 12.75M
  293. // ----------------------------------------------------------------------------
  294. // StringUnsplit_Old 42.99us 23.26K
  295. // StringUnsplit_Old_ReusedBuffer 100.48% 42.79us 23.37K
  296. // StringUnsplit_Gen 96.37% 44.61us 22.42K
  297. // StringUnsplit_Gen_ReusedBuffer 116.96% 36.76us 27.20K
  298. // ----------------------------------------------------------------------------
  299. // StringUnsplit_Gen(1000) 44.71us 22.37K
  300. // StringUnsplit_Gen(2000) 49.28% 90.72us 11.02K
  301. // StringUnsplit_Gen(4000) 24.05% 185.91us 5.38K
  302. // StringUnsplit_Gen(8000) 12.23% 365.42us 2.74K
  303. // ----------------------------------------------------------------------------
  304. // Records_EachToTuple 101.43us 9.86K
  305. // Records_VectorStringPieceReused 93.72% 108.22us 9.24K
  306. // Records_VectorStringPiece 37.14% 273.11us 3.66K
  307. // Records_VectorString 16.70% 607.47us 1.65K
  308. // ============================================================================
  309. int main(int argc, char* argv[]) {
  310. gflags::ParseCommandLineFlags(&argc, &argv, true);
  311. initStringResplitterBenchmark();
  312. runBenchmarks();
  313. return 0;
  314. }