CodingTestUtils.h 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #pragma once
  17. #include <algorithm>
  18. #include <fstream>
  19. #include <limits>
  20. #include <random>
  21. #include <string>
  22. #include <unordered_set>
  23. #include <vector>
  24. #include <glog/logging.h>
  25. #include <folly/Benchmark.h>
  26. #include <folly/Likely.h>
  27. #include <folly/Optional.h>
  28. #include <folly/experimental/Instructions.h>
  29. #include <folly/portability/GTest.h>
  30. namespace folly {
  31. namespace compression {
  32. template <class URNG>
  33. std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId, URNG&& g) {
  34. CHECK_LT(n, 2 * maxId);
  35. std::uniform_int_distribution<> uid(1, maxId);
  36. std::unordered_set<uint32_t> dataset;
  37. while (dataset.size() < n) {
  38. uint32_t value = uid(g);
  39. if (dataset.count(value) == 0) {
  40. dataset.insert(value);
  41. }
  42. }
  43. std::vector<uint32_t> ids(dataset.begin(), dataset.end());
  44. std::sort(ids.begin(), ids.end());
  45. return ids;
  46. }
  47. inline std::vector<uint32_t> generateRandomList(size_t n, uint32_t maxId) {
  48. std::mt19937 gen;
  49. return generateRandomList(n, maxId, gen);
  50. }
  51. inline std::vector<uint32_t>
  52. generateSeqList(uint32_t minId, uint32_t maxId, uint32_t step = 1) {
  53. CHECK_LE(minId, maxId);
  54. CHECK_GT(step, 0);
  55. std::vector<uint32_t> ids;
  56. ids.reserve((maxId - minId) / step + 1);
  57. for (uint32_t i = minId; i <= maxId; i += step) {
  58. ids.push_back(i);
  59. }
  60. return ids;
  61. }
  62. inline std::vector<uint32_t> loadList(const std::string& filename) {
  63. std::ifstream fin(filename);
  64. std::vector<uint32_t> result;
  65. uint32_t id;
  66. while (fin >> id) {
  67. result.push_back(id);
  68. }
  69. return result;
  70. }
  71. // Test previousValue only if Reader has it.
  72. template <class... Args>
  73. void maybeTestPreviousValue(Args&&...) {}
  74. // Make all the arguments template because if the types are not exact,
  75. // the above overload will be picked (for example i could be size_t or
  76. // ssize_t).
  77. template <class Vector, class Reader, class Index>
  78. auto maybeTestPreviousValue(const Vector& data, Reader& reader, Index i)
  79. -> decltype(reader.previousValue(), void()) {
  80. if (i != 0) {
  81. EXPECT_EQ(reader.previousValue(), data[i - 1]);
  82. }
  83. }
  84. // Test previous only if Reader has it.
  85. template <class... Args>
  86. void maybeTestPrevious(Args&&...) {}
  87. // Make all the arguments template because if the types are not exact,
  88. // the above overload will be picked (for example i could be size_t or
  89. // ssize_t).
  90. template <class Vector, class Reader, class Index>
  91. auto maybeTestPrevious(const Vector& data, Reader& reader, Index i)
  92. -> decltype(reader.previous(), void()) {
  93. auto r = reader.previous();
  94. if (i != 0) {
  95. EXPECT_TRUE(r);
  96. EXPECT_EQ(reader.value(), data[i - 1]);
  97. } else {
  98. EXPECT_FALSE(r);
  99. }
  100. reader.next();
  101. EXPECT_EQ(reader.value(), data[i]);
  102. }
  103. template <class Reader, class List>
  104. void testNext(const std::vector<uint32_t>& data, const List& list) {
  105. Reader reader(list);
  106. EXPECT_FALSE(reader.valid());
  107. for (size_t i = 0; i < data.size(); ++i) {
  108. EXPECT_TRUE(reader.next());
  109. EXPECT_TRUE(reader.valid());
  110. EXPECT_EQ(reader.value(), data[i]);
  111. EXPECT_EQ(reader.position(), i);
  112. maybeTestPreviousValue(data, reader, i);
  113. maybeTestPrevious(data, reader, i);
  114. }
  115. EXPECT_FALSE(reader.next());
  116. EXPECT_FALSE(reader.valid());
  117. EXPECT_EQ(reader.position(), reader.size());
  118. }
  119. template <class Reader, class List>
  120. void testSkip(
  121. const std::vector<uint32_t>& data,
  122. const List& list,
  123. size_t skipStep) {
  124. CHECK_GT(skipStep, 0);
  125. Reader reader(list);
  126. for (size_t i = skipStep - 1; i < data.size(); i += skipStep) {
  127. EXPECT_TRUE(reader.skip(skipStep));
  128. EXPECT_TRUE(reader.valid());
  129. EXPECT_EQ(reader.value(), data[i]);
  130. EXPECT_EQ(reader.position(), i);
  131. maybeTestPreviousValue(data, reader, i);
  132. maybeTestPrevious(data, reader, i);
  133. }
  134. EXPECT_FALSE(reader.skip(skipStep));
  135. EXPECT_FALSE(reader.valid());
  136. EXPECT_EQ(reader.position(), reader.size());
  137. EXPECT_FALSE(reader.next());
  138. }
  139. template <class Reader, class List>
  140. void testSkip(const std::vector<uint32_t>& data, const List& list) {
  141. for (size_t skipStep = 1; skipStep < 25; ++skipStep) {
  142. testSkip<Reader, List>(data, list, skipStep);
  143. }
  144. for (size_t skipStep = 25; skipStep <= 500; skipStep += 25) {
  145. testSkip<Reader, List>(data, list, skipStep);
  146. }
  147. }
  148. template <class Reader, class List>
  149. void testSkipTo(
  150. const std::vector<uint32_t>& data,
  151. const List& list,
  152. size_t skipToStep) {
  153. CHECK_GT(skipToStep, 0);
  154. Reader reader(list);
  155. const uint32_t delta = std::max<uint32_t>(1, data.back() / skipToStep);
  156. uint32_t value = delta;
  157. auto it = data.begin();
  158. while (true) {
  159. it = std::lower_bound(it, data.end(), value);
  160. if (it == data.end()) {
  161. EXPECT_FALSE(reader.skipTo(value));
  162. break;
  163. }
  164. EXPECT_TRUE(reader.skipTo(value));
  165. EXPECT_TRUE(reader.valid());
  166. EXPECT_EQ(reader.value(), *it);
  167. EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
  168. value = reader.value() + delta;
  169. maybeTestPreviousValue(data, reader, std::distance(data.begin(), it));
  170. maybeTestPrevious(data, reader, std::distance(data.begin(), it));
  171. }
  172. EXPECT_FALSE(reader.valid());
  173. EXPECT_EQ(reader.position(), reader.size());
  174. EXPECT_FALSE(reader.next());
  175. }
  176. template <class Reader, class List>
  177. void testSkipTo(const std::vector<uint32_t>& data, const List& list) {
  178. for (size_t steps = 10; steps < 100; steps += 10) {
  179. testSkipTo<Reader, List>(data, list, steps);
  180. }
  181. for (size_t steps = 100; steps <= 1000; steps += 100) {
  182. testSkipTo<Reader, List>(data, list, steps);
  183. }
  184. testSkipTo<Reader, List>(data, list, std::numeric_limits<size_t>::max());
  185. {
  186. // Skip to the first element.
  187. Reader reader(list);
  188. EXPECT_TRUE(reader.skipTo(data[0]));
  189. EXPECT_EQ(reader.value(), data[0]);
  190. EXPECT_EQ(reader.position(), 0);
  191. }
  192. {
  193. // Skip past the last element.
  194. Reader reader(list);
  195. EXPECT_FALSE(reader.skipTo(data.back() + 1));
  196. EXPECT_FALSE(reader.valid());
  197. EXPECT_EQ(reader.position(), reader.size());
  198. EXPECT_FALSE(reader.next());
  199. }
  200. {
  201. // Skip to maximum integer.
  202. Reader reader(list);
  203. using ValueType = typename Reader::ValueType;
  204. EXPECT_FALSE(reader.skipTo(std::numeric_limits<ValueType>::max()));
  205. EXPECT_FALSE(reader.valid());
  206. EXPECT_EQ(reader.position(), reader.size());
  207. EXPECT_FALSE(reader.next());
  208. }
  209. }
  210. template <class Reader, class List>
  211. void testJump(const std::vector<uint32_t>& data, const List& list) {
  212. std::mt19937 gen;
  213. std::vector<size_t> is(data.size());
  214. for (size_t i = 0; i < data.size(); ++i) {
  215. is[i] = i;
  216. }
  217. std::shuffle(is.begin(), is.end(), gen);
  218. if (Reader::EncoderType::forwardQuantum == 0) {
  219. is.resize(std::min<size_t>(is.size(), 100));
  220. }
  221. Reader reader(list);
  222. for (auto i : is) {
  223. EXPECT_TRUE(reader.jump(i));
  224. EXPECT_EQ(reader.value(), data[i]);
  225. EXPECT_EQ(reader.position(), i);
  226. maybeTestPreviousValue(data, reader, i);
  227. maybeTestPrevious(data, reader, i);
  228. }
  229. EXPECT_FALSE(reader.jump(data.size()));
  230. EXPECT_FALSE(reader.valid());
  231. EXPECT_EQ(reader.position(), reader.size());
  232. }
  233. template <class Reader, class List>
  234. void testJumpTo(const std::vector<uint32_t>& data, const List& list) {
  235. CHECK(!data.empty());
  236. Reader reader(list);
  237. std::mt19937 gen;
  238. std::uniform_int_distribution<> values(0, data.back());
  239. const size_t iters = Reader::EncoderType::skipQuantum == 0 ? 100 : 10000;
  240. for (size_t i = 0; i < iters; ++i) {
  241. const uint32_t value = values(gen);
  242. auto it = std::lower_bound(data.begin(), data.end(), value);
  243. CHECK(it != data.end());
  244. EXPECT_TRUE(reader.jumpTo(value));
  245. EXPECT_EQ(reader.value(), *it);
  246. EXPECT_EQ(reader.position(), std::distance(data.begin(), it));
  247. }
  248. EXPECT_TRUE(reader.jumpTo(0));
  249. EXPECT_EQ(reader.value(), data[0]);
  250. EXPECT_EQ(reader.position(), 0);
  251. EXPECT_TRUE(reader.jumpTo(data.back()));
  252. EXPECT_EQ(reader.value(), data.back());
  253. EXPECT_EQ(reader.position(), reader.size() - 1);
  254. EXPECT_FALSE(reader.jumpTo(data.back() + 1));
  255. EXPECT_FALSE(reader.valid());
  256. EXPECT_EQ(reader.position(), reader.size());
  257. }
  258. template <class Reader, class Encoder>
  259. void testEmpty() {
  260. const typename Encoder::ValueType* const data = nullptr;
  261. auto list = Encoder::encode(data, data);
  262. {
  263. Reader reader(list);
  264. EXPECT_FALSE(reader.next());
  265. EXPECT_EQ(reader.size(), 0);
  266. }
  267. {
  268. Reader reader(list);
  269. EXPECT_FALSE(reader.skip(1));
  270. EXPECT_FALSE(reader.skip(10));
  271. EXPECT_FALSE(reader.jump(0));
  272. EXPECT_FALSE(reader.jump(10));
  273. }
  274. {
  275. Reader reader(list);
  276. EXPECT_FALSE(reader.skipTo(1));
  277. EXPECT_FALSE(reader.jumpTo(1));
  278. }
  279. }
  280. template <class Reader, class Encoder>
  281. void testAll(const std::vector<uint32_t>& data) {
  282. auto list = Encoder::encode(data.begin(), data.end());
  283. testNext<Reader>(data, list);
  284. testSkip<Reader>(data, list);
  285. testSkipTo<Reader>(data, list);
  286. testJump<Reader>(data, list);
  287. testJumpTo<Reader>(data, list);
  288. list.free();
  289. }
  290. template <class Reader, class List>
  291. void bmNext(const List& list, const std::vector<uint32_t>& data, size_t iters) {
  292. if (data.empty()) {
  293. return;
  294. }
  295. Reader reader(list);
  296. for (size_t i = 0; i < iters; ++i) {
  297. if (LIKELY(reader.next())) {
  298. folly::doNotOptimizeAway(reader.value());
  299. } else {
  300. reader.reset();
  301. }
  302. }
  303. }
  304. template <class Reader, class List>
  305. void bmSkip(
  306. const List& list,
  307. const std::vector<uint32_t>& /* data */,
  308. size_t logAvgSkip,
  309. size_t iters) {
  310. size_t avg = (size_t(1) << logAvgSkip);
  311. size_t base = avg - (avg >> 2);
  312. size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
  313. Reader reader(list);
  314. for (size_t i = 0; i < iters; ++i) {
  315. size_t skip = base + (i & mask);
  316. if (LIKELY(reader.skip(skip))) {
  317. folly::doNotOptimizeAway(reader.value());
  318. } else {
  319. reader.reset();
  320. }
  321. }
  322. }
  323. template <class Reader, class List>
  324. void bmSkipTo(
  325. const List& list,
  326. const std::vector<uint32_t>& data,
  327. size_t logAvgSkip,
  328. size_t iters) {
  329. size_t avg = (size_t(1) << logAvgSkip);
  330. size_t base = avg - (avg >> 2);
  331. size_t mask = (avg > 1) ? (avg >> 1) - 1 : 0;
  332. Reader reader(list);
  333. for (size_t i = 0, j = -1; i < iters; ++i) {
  334. size_t skip = base + (i & mask);
  335. j += skip;
  336. if (j >= data.size()) {
  337. reader.reset();
  338. j = -1;
  339. }
  340. reader.skipTo(data[j]);
  341. folly::doNotOptimizeAway(reader.value());
  342. }
  343. }
  344. template <class Reader, class List>
  345. void bmJump(
  346. const List& list,
  347. const std::vector<uint32_t>& data,
  348. const std::vector<size_t>& order,
  349. size_t iters) {
  350. CHECK(!data.empty());
  351. CHECK_EQ(data.size(), order.size());
  352. Reader reader(list);
  353. for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
  354. if (j == order.size()) {
  355. j = 0;
  356. }
  357. reader.jump(order[j]);
  358. folly::doNotOptimizeAway(reader.value());
  359. }
  360. }
  361. template <class Reader, class List>
  362. void bmJumpTo(
  363. const List& list,
  364. const std::vector<uint32_t>& data,
  365. const std::vector<size_t>& order,
  366. size_t iters) {
  367. CHECK(!data.empty());
  368. CHECK_EQ(data.size(), order.size());
  369. Reader reader(list);
  370. for (size_t i = 0, j = 0; i < iters; ++i, ++j) {
  371. if (j == order.size()) {
  372. j = 0;
  373. }
  374. reader.jumpTo(data[order[j]]);
  375. folly::doNotOptimizeAway(reader.value());
  376. }
  377. }
  378. folly::Optional<instructions::Type> instructionsOverride();
  379. template <class F>
  380. auto dispatchInstructions(F&& f)
  381. -> decltype(f(std::declval<instructions::Default>())) {
  382. if (auto type = instructionsOverride()) {
  383. return instructions::dispatch(*type, std::forward<F>(f));
  384. } else {
  385. return instructions::dispatch(std::forward<F>(f));
  386. }
  387. }
  388. } // namespace compression
  389. } // namespace folly