Zlib.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. /*
  2. * Copyright 2017-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #include <folly/compression/Zlib.h>
  17. #if FOLLY_HAVE_LIBZ
  18. #include <folly/Conv.h>
  19. #include <folly/Optional.h>
  20. #include <folly/Range.h>
  21. #include <folly/ScopeGuard.h>
  22. #include <folly/compression/Compression.h>
  23. #include <folly/compression/Utils.h>
  24. #include <folly/io/Cursor.h>
  25. using folly::io::compression::detail::dataStartsWithLE;
  26. using folly::io::compression::detail::prefixToStringLE;
  27. namespace folly {
  28. namespace io {
  29. namespace zlib {
  30. namespace {
  31. bool isValidStrategy(int strategy) {
  32. std::array<int, 5> strategies{{
  33. Z_DEFAULT_STRATEGY,
  34. Z_FILTERED,
  35. Z_HUFFMAN_ONLY,
  36. Z_RLE,
  37. Z_FIXED,
  38. }};
  39. return std::any_of(strategies.begin(), strategies.end(), [&](int i) {
  40. return i == strategy;
  41. });
  42. }
  43. int getWindowBits(Options::Format format, int windowSize) {
  44. switch (format) {
  45. case Options::Format::ZLIB:
  46. return windowSize;
  47. case Options::Format::GZIP:
  48. return windowSize + 16;
  49. case Options::Format::RAW:
  50. return -windowSize;
  51. case Options::Format::AUTO:
  52. return windowSize + 32;
  53. default:
  54. return windowSize;
  55. }
  56. }
  57. CodecType getCodecType(Options options) {
  58. if (options.windowSize == 15 && options.format == Options::Format::ZLIB) {
  59. return CodecType::ZLIB;
  60. } else if (
  61. options.windowSize == 15 && options.format == Options::Format::GZIP) {
  62. return CodecType::GZIP;
  63. } else {
  64. return CodecType::USER_DEFINED;
  65. }
  66. }
  67. class ZlibStreamCodec final : public StreamCodec {
  68. public:
  69. static std::unique_ptr<Codec> createCodec(Options options, int level);
  70. static std::unique_ptr<StreamCodec> createStream(Options options, int level);
  71. explicit ZlibStreamCodec(Options options, int level);
  72. ~ZlibStreamCodec() override;
  73. std::vector<std::string> validPrefixes() const override;
  74. bool canUncompress(const IOBuf* data, Optional<uint64_t> uncompressedLength)
  75. const override;
  76. private:
  77. uint64_t doMaxCompressedLength(uint64_t uncompressedLength) const override;
  78. void doResetStream() override;
  79. bool doCompressStream(
  80. ByteRange& input,
  81. MutableByteRange& output,
  82. StreamCodec::FlushOp flush) override;
  83. bool doUncompressStream(
  84. ByteRange& input,
  85. MutableByteRange& output,
  86. StreamCodec::FlushOp flush) override;
  87. void resetDeflateStream();
  88. void resetInflateStream();
  89. Options options_;
  90. Optional<z_stream> deflateStream_{};
  91. Optional<z_stream> inflateStream_{};
  92. int level_;
  93. bool needReset_{true};
  94. };
  95. static constexpr uint16_t kGZIPMagicLE = 0x8B1F;
  96. std::vector<std::string> ZlibStreamCodec::validPrefixes() const {
  97. if (type() == CodecType::ZLIB) {
  98. // Zlib streams start with a 2 byte header.
  99. //
  100. // 0 1
  101. // +---+---+
  102. // |CMF|FLG|
  103. // +---+---+
  104. //
  105. // We won't restrict the values of any sub-fields except as described below.
  106. //
  107. // The lowest 4 bits of CMF is the compression method (CM).
  108. // CM == 0x8 is the deflate compression method, which is currently the only
  109. // supported compression method, so any valid prefix must have CM == 0x8.
  110. //
  111. // The lowest 5 bits of FLG is FCHECK.
  112. // FCHECK must be such that the two header bytes are a multiple of 31 when
  113. // interpreted as a big endian 16-bit number.
  114. std::vector<std::string> result;
  115. // 16 values for the first byte, 8 values for the second byte.
  116. // There are also 4 combinations where both 0x00 and 0x1F work as FCHECK.
  117. result.reserve(132);
  118. // Select all values for the CMF byte that use the deflate algorithm 0x8.
  119. for (uint32_t first = 0x0800; first <= 0xF800; first += 0x1000) {
  120. // Select all values for the FLG, but leave FCHECK as 0 since it's fixed.
  121. for (uint32_t second = 0x00; second <= 0xE0; second += 0x20) {
  122. uint16_t prefix = first | second;
  123. // Compute FCHECK.
  124. prefix += 31 - (prefix % 31);
  125. result.push_back(prefixToStringLE(Endian::big(prefix)));
  126. // zlib won't produce this, but it is a valid prefix.
  127. if ((prefix & 0x1F) == 31) {
  128. prefix -= 31;
  129. result.push_back(prefixToStringLE(Endian::big(prefix)));
  130. }
  131. }
  132. }
  133. return result;
  134. } else if (type() == CodecType::GZIP) {
  135. // The gzip frame starts with 2 magic bytes.
  136. return {prefixToStringLE(kGZIPMagicLE)};
  137. } else {
  138. return {};
  139. }
  140. }
  141. bool ZlibStreamCodec::canUncompress(const IOBuf* data, Optional<uint64_t>)
  142. const {
  143. if (type() == CodecType::ZLIB) {
  144. uint16_t value;
  145. Cursor cursor{data};
  146. if (!cursor.tryReadBE(value)) {
  147. return false;
  148. }
  149. // zlib compressed if using deflate and is a multiple of 31.
  150. return (value & 0x0F00) == 0x0800 && value % 31 == 0;
  151. } else if (type() == CodecType::GZIP) {
  152. return dataStartsWithLE(data, kGZIPMagicLE);
  153. } else {
  154. return false;
  155. }
  156. }
  157. uint64_t ZlibStreamCodec::doMaxCompressedLength(
  158. uint64_t uncompressedLength) const {
  159. // When passed a nullptr, deflateBound() adds 6 bytes for a zlib wrapper. A
  160. // gzip wrapper is 18 bytes, so we add the 12 byte difference.
  161. return deflateBound(nullptr, uncompressedLength) +
  162. (options_.format == Options::Format::GZIP ? 12 : 0);
  163. }
  164. std::unique_ptr<Codec> ZlibStreamCodec::createCodec(
  165. Options options,
  166. int level) {
  167. return std::make_unique<ZlibStreamCodec>(options, level);
  168. }
  169. std::unique_ptr<StreamCodec> ZlibStreamCodec::createStream(
  170. Options options,
  171. int level) {
  172. return std::make_unique<ZlibStreamCodec>(options, level);
  173. }
  174. static bool inBounds(int value, int low, int high) {
  175. return (value >= low) && (value <= high);
  176. }
  177. static int zlibConvertLevel(int level) {
  178. switch (level) {
  179. case COMPRESSION_LEVEL_FASTEST:
  180. return 1;
  181. case COMPRESSION_LEVEL_DEFAULT:
  182. return 6;
  183. case COMPRESSION_LEVEL_BEST:
  184. return 9;
  185. }
  186. if (!inBounds(level, 0, 9)) {
  187. throw std::invalid_argument(
  188. to<std::string>("ZlibStreamCodec: invalid level: ", level));
  189. }
  190. return level;
  191. }
  192. ZlibStreamCodec::ZlibStreamCodec(Options options, int level)
  193. : StreamCodec(
  194. getCodecType(options),
  195. zlibConvertLevel(level),
  196. getCodecType(options) == CodecType::GZIP ? "gzip" : "zlib"),
  197. level_(zlibConvertLevel(level)) {
  198. options_ = options;
  199. // Although zlib allows a windowSize of 8..15, a value of 8 is not
  200. // properly supported and is treated as a value of 9. This means data deflated
  201. // with windowSize==8 can not be re-inflated with windowSize==8. windowSize==8
  202. // is also not supported for gzip and raw deflation.
  203. // Hence, the codec supports only 9..15.
  204. if (!inBounds(options_.windowSize, 9, 15)) {
  205. throw std::invalid_argument(to<std::string>(
  206. "ZlibStreamCodec: invalid windowSize option: ", options.windowSize));
  207. }
  208. if (!inBounds(options_.memLevel, 1, 9)) {
  209. throw std::invalid_argument(to<std::string>(
  210. "ZlibStreamCodec: invalid memLevel option: ", options.memLevel));
  211. }
  212. if (!isValidStrategy(options_.strategy)) {
  213. throw std::invalid_argument(to<std::string>(
  214. "ZlibStreamCodec: invalid strategy: ", options.strategy));
  215. }
  216. }
  217. ZlibStreamCodec::~ZlibStreamCodec() {
  218. if (deflateStream_) {
  219. deflateEnd(deflateStream_.get_pointer());
  220. deflateStream_.clear();
  221. }
  222. if (inflateStream_) {
  223. inflateEnd(inflateStream_.get_pointer());
  224. inflateStream_.clear();
  225. }
  226. }
  227. void ZlibStreamCodec::doResetStream() {
  228. needReset_ = true;
  229. }
  230. void ZlibStreamCodec::resetDeflateStream() {
  231. if (deflateStream_) {
  232. int const rc = deflateReset(deflateStream_.get_pointer());
  233. if (rc != Z_OK) {
  234. deflateStream_.clear();
  235. throw std::runtime_error(
  236. to<std::string>("ZlibStreamCodec: deflateReset error: ", rc));
  237. }
  238. return;
  239. }
  240. deflateStream_ = z_stream{};
  241. // The automatic header detection format is only for inflation.
  242. // Use zlib for deflation if the format is auto.
  243. int const windowBits = getWindowBits(
  244. options_.format == Options::Format::AUTO ? Options::Format::ZLIB
  245. : options_.format,
  246. options_.windowSize);
  247. int const rc = deflateInit2(
  248. deflateStream_.get_pointer(),
  249. level_,
  250. Z_DEFLATED,
  251. windowBits,
  252. options_.memLevel,
  253. options_.strategy);
  254. if (rc != Z_OK) {
  255. deflateStream_.clear();
  256. throw std::runtime_error(
  257. to<std::string>("ZlibStreamCodec: deflateInit error: ", rc));
  258. }
  259. }
  260. void ZlibStreamCodec::resetInflateStream() {
  261. if (inflateStream_) {
  262. int const rc = inflateReset(inflateStream_.get_pointer());
  263. if (rc != Z_OK) {
  264. inflateStream_.clear();
  265. throw std::runtime_error(
  266. to<std::string>("ZlibStreamCodec: inflateReset error: ", rc));
  267. }
  268. return;
  269. }
  270. inflateStream_ = z_stream{};
  271. int const rc = inflateInit2(
  272. inflateStream_.get_pointer(),
  273. getWindowBits(options_.format, options_.windowSize));
  274. if (rc != Z_OK) {
  275. inflateStream_.clear();
  276. throw std::runtime_error(
  277. to<std::string>("ZlibStreamCodec: inflateInit error: ", rc));
  278. }
  279. }
  280. static int zlibTranslateFlush(StreamCodec::FlushOp flush) {
  281. switch (flush) {
  282. case StreamCodec::FlushOp::NONE:
  283. return Z_NO_FLUSH;
  284. case StreamCodec::FlushOp::FLUSH:
  285. return Z_SYNC_FLUSH;
  286. case StreamCodec::FlushOp::END:
  287. return Z_FINISH;
  288. default:
  289. throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
  290. }
  291. }
  292. static int zlibThrowOnError(int rc) {
  293. switch (rc) {
  294. case Z_OK:
  295. case Z_BUF_ERROR:
  296. case Z_STREAM_END:
  297. return rc;
  298. default:
  299. throw std::runtime_error(to<std::string>("ZlibStreamCodec: error: ", rc));
  300. }
  301. }
  302. bool ZlibStreamCodec::doCompressStream(
  303. ByteRange& input,
  304. MutableByteRange& output,
  305. StreamCodec::FlushOp flush) {
  306. if (needReset_) {
  307. resetDeflateStream();
  308. needReset_ = false;
  309. }
  310. DCHECK(deflateStream_.hasValue());
  311. // zlib will return Z_STREAM_ERROR if output.data() is null.
  312. if (output.data() == nullptr) {
  313. return false;
  314. }
  315. deflateStream_->next_in = const_cast<uint8_t*>(input.data());
  316. deflateStream_->avail_in = input.size();
  317. deflateStream_->next_out = output.data();
  318. deflateStream_->avail_out = output.size();
  319. SCOPE_EXIT {
  320. input.uncheckedAdvance(input.size() - deflateStream_->avail_in);
  321. output.uncheckedAdvance(output.size() - deflateStream_->avail_out);
  322. };
  323. int const rc = zlibThrowOnError(
  324. deflate(deflateStream_.get_pointer(), zlibTranslateFlush(flush)));
  325. switch (flush) {
  326. case StreamCodec::FlushOp::NONE:
  327. return false;
  328. case StreamCodec::FlushOp::FLUSH:
  329. return deflateStream_->avail_in == 0 && deflateStream_->avail_out != 0;
  330. case StreamCodec::FlushOp::END:
  331. return rc == Z_STREAM_END;
  332. default:
  333. throw std::invalid_argument("ZlibStreamCodec: Invalid flush");
  334. }
  335. }
  336. bool ZlibStreamCodec::doUncompressStream(
  337. ByteRange& input,
  338. MutableByteRange& output,
  339. StreamCodec::FlushOp flush) {
  340. if (needReset_) {
  341. resetInflateStream();
  342. needReset_ = false;
  343. }
  344. DCHECK(inflateStream_.hasValue());
  345. // zlib will return Z_STREAM_ERROR if output.data() is null.
  346. if (output.data() == nullptr) {
  347. return false;
  348. }
  349. inflateStream_->next_in = const_cast<uint8_t*>(input.data());
  350. inflateStream_->avail_in = input.size();
  351. inflateStream_->next_out = output.data();
  352. inflateStream_->avail_out = output.size();
  353. SCOPE_EXIT {
  354. input.advance(input.size() - inflateStream_->avail_in);
  355. output.advance(output.size() - inflateStream_->avail_out);
  356. };
  357. int const rc = zlibThrowOnError(
  358. inflate(inflateStream_.get_pointer(), zlibTranslateFlush(flush)));
  359. return rc == Z_STREAM_END;
  360. }
  361. } // namespace
  362. Options defaultGzipOptions() {
  363. return Options(Options::Format::GZIP);
  364. }
  365. Options defaultZlibOptions() {
  366. return Options(Options::Format::ZLIB);
  367. }
  368. std::unique_ptr<Codec> getCodec(Options options, int level) {
  369. return ZlibStreamCodec::createCodec(options, level);
  370. }
  371. std::unique_ptr<StreamCodec> getStreamCodec(Options options, int level) {
  372. return ZlibStreamCodec::createStream(options, level);
  373. }
  374. } // namespace zlib
  375. } // namespace io
  376. } // namespace folly
  377. #endif // FOLLY_HAVE_LIBZ