123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180 |
- /*
- * Copyright 2013-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /**
- * RecordIO: self-synchronizing stream of variable length records
- *
- * RecordIO gives you the ability to write a stream of variable length records
- * and read them later even in the face of data corruption -- randomly inserted
- * or deleted chunks of the file, or modified data. When reading, you may lose
- * corrupted records, but the stream will resynchronize automatically.
- */
- #pragma once
- #define FOLLY_IO_RECORDIO_H_
- #include <atomic>
- #include <memory>
- #include <mutex>
- #include <folly/File.h>
- #include <folly/Range.h>
- #include <folly/io/IOBuf.h>
- #include <folly/system/MemoryMapping.h>
- namespace folly {
- /**
- * Class to write a stream of RecordIO records to a file.
- *
- * RecordIOWriter is thread-safe
- */
- class RecordIOWriter {
- public:
- /**
- * Create a RecordIOWriter around a file; will append to the end of
- * file if it exists.
- *
- * Each file must have a non-zero file id, which is embedded in all
- * record headers. Readers will only return records with the requested
- * file id (or, if the reader is created with fileId=0 in the constructor,
- * the reader will return all records). File ids are only used to allow
- * resynchronization if you store RecordIO records (with headers) inside
- * other RecordIO records (for example, if a record consists of a fragment
- * from another RecordIO file). If you're not planning to do that,
- * the defaults are fine.
- */
- explicit RecordIOWriter(File file, uint32_t fileId = 1);
- /**
- * Write a record. We will use at most headerSize() bytes of headroom,
- * you might want to arrange that before copying your data into it.
- */
- void write(std::unique_ptr<IOBuf> buf);
- /**
- * Return the position in the file where the next byte will be written.
- * Conservative, as stuff can be written at any time from another thread.
- */
- off_t filePos() const {
- return filePos_;
- }
- private:
- File file_;
- uint32_t fileId_;
- std::unique_lock<File> writeLock_;
- std::atomic<off_t> filePos_;
- };
- /**
- * Class to read from a RecordIO file. Will skip invalid records.
- */
- class RecordIOReader {
- public:
- class Iterator;
- /**
- * RecordIOReader is iterable, returning pairs of ByteRange (record content)
- * and position in file where the record (including header) begins.
- * Note that the position includes the header, that is, it can be passed back
- * to seek().
- */
- typedef Iterator iterator;
- typedef Iterator const_iterator;
- typedef std::pair<ByteRange, off_t> value_type;
- typedef value_type& reference;
- typedef const value_type& const_reference;
- /**
- * A record reader with a fileId of 0 will return all records.
- * A record reader with a non-zero fileId will only return records where
- * the fileId matches.
- */
- explicit RecordIOReader(File file, uint32_t fileId = 0);
- Iterator cbegin() const;
- Iterator begin() const;
- Iterator cend() const;
- Iterator end() const;
- /**
- * Create an iterator to the first valid record after pos.
- */
- Iterator seek(off_t pos) const;
- private:
- MemoryMapping map_;
- uint32_t fileId_;
- };
- namespace recordio_helpers {
- // We're exposing the guts of the RecordIO implementation for two reasons:
- // 1. It makes unit testing easier, and
- // 2. It allows you to build different RecordIO readers / writers that use
- // different storage systems underneath (not standard files)
- /**
- * Header size.
- */
- constexpr size_t headerSize(); // defined in RecordIO-inl.h
- /**
- * Write a header in the buffer. We will prepend the header to the front
- * of the chain. Do not write the buffer if empty (we don't allow empty
- * records). Returns the total length, including header (0 if empty)
- * (same as buf->computeChainDataLength(), but likely faster)
- *
- * The fileId should be unique per stream and allows you to have RecordIO
- * headers stored inside the data (for example, have an entire RecordIO
- * file stored as a record inside another RecordIO file). The fileId may
- * not be 0.
- */
- size_t prependHeader(std::unique_ptr<IOBuf>& buf, uint32_t fileId = 1);
- /**
- * Search for the first valid record that begins in searchRange (which must be
- * a subrange of wholeRange). Returns the record data (not the header) if
- * found, ByteRange() otherwise.
- *
- * The fileId may be 0, in which case we'll return the first valid record for
- * *any* fileId, or non-zero, in which case we'll only look for records with
- * the requested fileId.
- */
- struct RecordInfo {
- uint32_t fileId;
- ByteRange record;
- };
- RecordInfo
- findRecord(ByteRange searchRange, ByteRange wholeRange, uint32_t fileId);
- /**
- * Search for the first valid record in range.
- */
- RecordInfo findRecord(ByteRange range, uint32_t fileId);
- /**
- * Check if there is a valid record at the beginning of range. Returns the
- * record data (not the header) if the record is valid, ByteRange() otherwise.
- */
- RecordInfo validateRecord(ByteRange range, uint32_t fileId);
- } // namespace recordio_helpers
- } // namespace folly
- #include <folly/io/RecordIO-inl.h>
|