StringTest.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <iosfwd>
  17. #include <map>
  18. #include <vector>
  19. #include <folly/functional/ApplyTuple.h>
  20. #include <folly/gen/String.h>
  21. #include <folly/portability/GTest.h>
  22. using namespace folly::gen;
  23. using namespace folly;
  24. using std::make_tuple;
  25. using std::ostream;
  26. using std::pair;
  27. using std::string;
  28. using std::tuple;
  29. using std::unique_ptr;
  30. using std::vector;
  31. using vec = vector<string>;
  32. static auto collect = eachTo<std::string>() | as<vector>();
  33. TEST(StringGen, EmptySplit) {
  34. {
  35. auto input = "";
  36. auto expected = vec{};
  37. EXPECT_EQ(expected, split(input, ',') | collect);
  38. }
  39. // The last delimiter is eaten, just like std::getline
  40. {
  41. auto input = ",";
  42. auto expected = vec{""};
  43. EXPECT_EQ(expected, split(input, ',') | collect);
  44. }
  45. {
  46. auto input = ",,";
  47. auto expected = vec{"", ""};
  48. EXPECT_EQ(expected, split(input, ',') | collect);
  49. }
  50. {
  51. auto input = ",,";
  52. auto expected = vec{""};
  53. EXPECT_EQ(expected, split(input, ',') | take(1) | collect);
  54. }
  55. }
  56. TEST(StringGen, Split) {
  57. {
  58. auto input = "hello,, world, goodbye, meow";
  59. auto expected = vec{"hello", "", " world", " goodbye", " meow"};
  60. EXPECT_EQ(expected, split(input, ',') | collect);
  61. }
  62. {
  63. auto input = "hello,, world, goodbye, meow";
  64. auto expected = vec{"hello", "", " world"};
  65. EXPECT_EQ(expected, split(input, ',') | take(3) | collect);
  66. }
  67. {
  68. auto input = "hello,, world, goodbye, meow";
  69. auto expected = vec{"hello", "", " world", " goodbye", " meow"};
  70. EXPECT_EQ(expected, split(input, ",") | take(5) | collect);
  71. }
  72. {
  73. auto input = "hello,, world, goodbye, meow";
  74. auto expected = vec{"hello,", "world", "goodbye", "meow"};
  75. EXPECT_EQ(expected, split(input, ", ") | collect);
  76. }
  77. }
  78. TEST(StringGen, SplitByNewLine) {
  79. {
  80. auto input = "hello\n\n world\r\n goodbye\r me\n\row";
  81. auto expected = vec{"hello", "", " world", " goodbye", " me", "", "ow"};
  82. EXPECT_EQ(expected, lines(input) | collect);
  83. }
  84. }
  85. TEST(StringGen, EmptyResplit) {
  86. {
  87. auto input = vec{""};
  88. auto expected = vec{};
  89. EXPECT_EQ(expected, from(input) | resplit(',') | collect);
  90. }
  91. // The last delimiter is eaten, just like std::getline
  92. {
  93. auto input = vec{","};
  94. auto expected = vec{""};
  95. EXPECT_EQ(expected, from(input) | resplit(',') | collect);
  96. }
  97. {
  98. auto input = vec{",,"};
  99. auto expected = vec{"", ""};
  100. EXPECT_EQ(expected, from(input) | resplit(',') | collect);
  101. }
  102. }
  103. TEST(StringGen, Resplit) {
  104. {
  105. auto input = vec{"hello,, world, goodbye, meow"};
  106. auto expected = vec{"hello", "", " world", " goodbye", " meow"};
  107. EXPECT_EQ(expected, from(input) | resplit(',') | collect);
  108. }
  109. {
  110. auto input = vec{"hel", "lo,", ", world", ", goodbye, m", "eow"};
  111. auto expected = vec{"hello", "", " world", " goodbye", " meow"};
  112. EXPECT_EQ(expected, from(input) | resplit(',') | collect);
  113. }
  114. }
  115. TEST(StringGen, ResplitKeepDelimiter) {
  116. {
  117. auto input = vec{"hello,, world, goodbye, meow"};
  118. auto expected = vec{"hello,", ",", " world,", " goodbye,", " meow"};
  119. EXPECT_EQ(expected, from(input) | resplit(',', true) | collect);
  120. }
  121. {
  122. auto input = vec{"hel", "lo,", ", world", ", goodbye, m", "eow"};
  123. auto expected = vec{"hello,", ",", " world,", " goodbye,", " meow"};
  124. EXPECT_EQ(expected, from(input) | resplit(',', true) | collect);
  125. }
  126. }
  127. TEST(StringGen, EachToTuple) {
  128. {
  129. auto lines = "2:1.414:yo 3:1.732:hi";
  130. // clang-format off
  131. auto actual
  132. = split(lines, ' ')
  133. | eachToTuple<int, double, std::string>(':')
  134. | as<vector>();
  135. // clang-format on
  136. vector<tuple<int, double, std::string>> expected{
  137. make_tuple(2, 1.414, "yo"),
  138. make_tuple(3, 1.732, "hi"),
  139. };
  140. EXPECT_EQ(expected, actual);
  141. }
  142. {
  143. auto lines = "2 3";
  144. // clang-format off
  145. auto actual
  146. = split(lines, ' ')
  147. | eachToTuple<int>(',')
  148. | as<vector>();
  149. // clang-format on
  150. vector<tuple<int>> expected{
  151. make_tuple(2),
  152. make_tuple(3),
  153. };
  154. EXPECT_EQ(expected, actual);
  155. }
  156. {
  157. // StringPiece target
  158. auto lines = "1:cat 2:dog";
  159. // clang-format off
  160. auto actual
  161. = split(lines, ' ')
  162. | eachToTuple<int, StringPiece>(':')
  163. | as<vector>();
  164. // clang-format on
  165. vector<tuple<int, StringPiece>> expected{
  166. make_tuple(1, "cat"),
  167. make_tuple(2, "dog"),
  168. };
  169. EXPECT_EQ(expected, actual);
  170. }
  171. {
  172. // Empty field
  173. auto lines = "2:tjackson:4 3::5";
  174. // clang-format off
  175. auto actual
  176. = split(lines, ' ')
  177. | eachToTuple<int, fbstring, int>(':')
  178. | as<vector>();
  179. // clang-format on
  180. vector<tuple<int, fbstring, int>> expected{
  181. make_tuple(2, "tjackson", 4),
  182. make_tuple(3, "", 5),
  183. };
  184. EXPECT_EQ(expected, actual);
  185. }
  186. {
  187. // Excess fields
  188. auto lines = "1:2 3:4:5";
  189. // clang-format off
  190. EXPECT_THROW(
  191. (split(lines, ' ')
  192. | eachToTuple<int, int>(':')
  193. | as<vector>()),
  194. std::runtime_error);
  195. // clang-format on
  196. }
  197. {
  198. // Missing fields
  199. auto lines = "1:2:3 4:5";
  200. // clang-format off
  201. EXPECT_THROW(
  202. (split(lines, ' ')
  203. | eachToTuple<int, int, int>(':')
  204. | as<vector>()),
  205. std::runtime_error);
  206. // clang-format on
  207. }
  208. }
  209. TEST(StringGen, EachToPair) {
  210. {
  211. // char delimiters
  212. auto lines = "2:1.414 3:1.732";
  213. // clang-format off
  214. auto actual
  215. = split(lines, ' ')
  216. | eachToPair<int, double>(':')
  217. | as<std::map<int, double>>();
  218. // clang-format on
  219. std::map<int, double> expected{
  220. {3, 1.732},
  221. {2, 1.414},
  222. };
  223. EXPECT_EQ(expected, actual);
  224. }
  225. {
  226. // string delimiters
  227. auto lines = "ab=>cd ef=>gh";
  228. // clang-format off
  229. auto actual
  230. = split(lines, ' ')
  231. | eachToPair<string, string>("=>")
  232. | as<std::map<string, string>>();
  233. // clang-format on
  234. std::map<string, string> expected{
  235. {"ab", "cd"},
  236. {"ef", "gh"},
  237. };
  238. EXPECT_EQ(expected, actual);
  239. }
  240. }
  241. void checkResplitMaxLength(
  242. vector<string> ins,
  243. char delim,
  244. uint64_t maxLength,
  245. vector<string> outs) {
  246. vector<std::string> pieces;
  247. auto splitter = streamSplitter(
  248. delim,
  249. [&pieces](StringPiece s) {
  250. pieces.push_back(string(s.begin(), s.end()));
  251. return true;
  252. },
  253. maxLength);
  254. for (const auto& in : ins) {
  255. splitter(in);
  256. }
  257. splitter.flush();
  258. EXPECT_EQ(outs.size(), pieces.size());
  259. for (size_t i = 0; i < outs.size(); ++i) {
  260. EXPECT_EQ(outs[i], pieces[i]);
  261. }
  262. // Also check the concatenated input against the same output
  263. if (ins.size() > 1) {
  264. checkResplitMaxLength({folly::join("", ins)}, delim, maxLength, outs);
  265. }
  266. }
  267. TEST(StringGen, ResplitMaxLength) {
  268. // clang-format off
  269. checkResplitMaxLength(
  270. {"hel", "lo,", ", world", ", goodbye, m", "ew"}, ',', 5,
  271. {"hello", ",", ",", " worl", "d,", " good", "bye,", " mew"});
  272. // " meow" cannot be "end of stream", since it's maxLength long
  273. checkResplitMaxLength(
  274. {"hel", "lo,", ", world", ", goodbye, m", "eow"}, ',', 5,
  275. {"hello", ",", ",", " worl", "d,", " good", "bye,", " meow", ""});
  276. checkResplitMaxLength(
  277. {"||", "", "", "", "|a|b", "cdefghijklmn", "|opqrst",
  278. "uvwx|y|||", "z", "0123456789", "|", ""}, '|', 2,
  279. {"|", "|", "|", "a|", "bc", "de", "fg", "hi", "jk", "lm", "n|", "op",
  280. "qr", "st", "uv", "wx", "|", "y|", "|", "|", "z0", "12", "34", "56",
  281. "78", "9|", ""});
  282. // clang-format on
  283. }
  284. template <typename F>
  285. void runUnsplitSuite(F fn) {
  286. fn("hello, world");
  287. fn("hello,world,goodbye");
  288. fn(" ");
  289. fn("");
  290. fn(", ");
  291. fn(", a, b,c");
  292. }
  293. TEST(StringGen, Unsplit) {
  294. auto basicFn = [](StringPiece s) {
  295. EXPECT_EQ(split(s, ',') | unsplit(','), s);
  296. };
  297. auto existingBuffer = [](StringPiece s) {
  298. folly::fbstring buffer("asdf");
  299. split(s, ',') | unsplit(',', &buffer);
  300. auto expected = folly::to<folly::fbstring>("asdf", s.empty() ? "" : ",", s);
  301. EXPECT_EQ(expected, buffer);
  302. };
  303. auto emptyBuffer = [](StringPiece s) {
  304. std::string buffer;
  305. split(s, ',') | unsplit(',', &buffer);
  306. EXPECT_EQ(s, buffer);
  307. };
  308. auto stringDelim = [](StringPiece s) {
  309. EXPECT_EQ(s, split(s, ',') | unsplit(","));
  310. std::string buffer;
  311. split(s, ',') | unsplit(",", &buffer);
  312. EXPECT_EQ(buffer, s);
  313. };
  314. runUnsplitSuite(basicFn);
  315. runUnsplitSuite(existingBuffer);
  316. runUnsplitSuite(emptyBuffer);
  317. runUnsplitSuite(stringDelim);
  318. EXPECT_EQ("1, 2, 3", seq(1, 3) | unsplit(", "));
  319. }
  320. TEST(StringGen, Batch) {
  321. std::vector<std::string> chunks{
  322. "on", "e\nt", "w", "o", "\nthr", "ee\nfo", "ur\n"};
  323. std::vector<std::string> lines{"one", "two", "three", "four"};
  324. EXPECT_EQ(4, from(chunks) | resplit('\n') | count);
  325. EXPECT_EQ(4, from(chunks) | resplit('\n') | batch(2) | rconcat | count);
  326. EXPECT_EQ(4, from(chunks) | resplit('\n') | batch(3) | rconcat | count);
  327. // clang-format off
  328. EXPECT_EQ(
  329. lines,
  330. from(chunks)
  331. | resplit('\n')
  332. | eachTo<std::string>()
  333. | batch(3)
  334. | rconcat
  335. | as<vector>());
  336. // clang-format on
  337. }
  338. TEST(StringGen, UncurryTuple) {
  339. folly::StringPiece file = "1\t2\t3\n1\t4\t9";
  340. auto rows = split(file, '\n') | eachToTuple<int, int, int>('\t');
  341. auto productSum =
  342. rows | map(uncurry([](int x, int y, int z) { return x * y * z; })) | sum;
  343. EXPECT_EQ(42, productSum);
  344. }
  345. TEST(StringGen, UncurryPair) {
  346. folly::StringPiece file = "2\t3\n4\t9";
  347. auto rows = split(file, '\n') | eachToPair<int, int>('\t');
  348. auto productSum =
  349. rows | map(uncurry([](int x, int y) { return x * y; })) | sum;
  350. EXPECT_EQ(42, productSum);
  351. }