123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955 |
- /*
- * Copyright 2012-present Facebook, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- /**
- * Subprocess library, modeled after Python's subprocess module
- * (http://docs.python.org/2/library/subprocess.html)
- *
- * This library defines one class (Subprocess) which represents a child
- * process. Subprocess has two constructors: one that takes a vector<string>
- * and executes the given executable without using the shell, and one
- * that takes a string and executes the given command using the shell.
- * Subprocess allows you to redirect the child's standard input, standard
- * output, and standard error to/from child descriptors in the parent,
- * or to create communication pipes between the child and the parent.
- *
- * The simplest example is a thread-safe [1] version of the system() library
- * function:
- * Subprocess(cmd).wait();
- * which executes the command using the default shell and waits for it
- * to complete, returning the exit status.
- *
- * A thread-safe [1] version of popen() (type="r", to read from the child):
- * Subprocess proc(cmd, Subprocess::Options().pipeStdout());
- * // read from proc.stdoutFd()
- * proc.wait();
- *
- * A thread-safe [1] version of popen() (type="w", to write to the child):
- * Subprocess proc(cmd, Subprocess::Options().pipeStdin());
- * // write to proc.stdinFd()
- * proc.wait();
- *
- * If you want to redirect both stdin and stdout to pipes, you can, but note
- * that you're subject to a variety of deadlocks. You'll want to use
- * nonblocking I/O, like the callback version of communicate().
- *
- * The string or IOBuf-based variants of communicate() are the simplest way
- * to communicate with a child via its standard input, standard output, and
- * standard error. They buffer everything in memory, so they are not great
- * for large amounts of data (or long-running processes), but they are much
- * simpler than the callback version.
- *
- * == A note on thread-safety ==
- *
- * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
- * to fork in a way that does not cause grief in multithreaded programs.
- *
- * Caveat: If your system does not have the atomic pipe2 system call, it is
- * not safe to concurrently call Subprocess from different threads.
- * Therefore, it is best to have a single thread be responsible for spawning
- * subprocesses.
- *
- * A particular instances of Subprocess is emphatically **not** thread-safe.
- * If you need to simultaneously communicate via the pipes, and interact
- * with the Subprocess state, your best bet is to:
- * - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
- * - Only interact with the Subprocess from one thread at a time.
- *
- * The current implementation of communicate() cannot be safely interrupted.
- * To do so correctly, one would need to use EventFD, or open a dedicated
- * pipe to be messaged from a different thread -- in particular, kill() will
- * not do, since a descendant may keep the pipes open indefinitely.
- *
- * So, once you call communicate(), you must wait for it to return, and not
- * touch the pipes from other threads. closeParentFd() is emphatically
- * unsafe to call concurrently, and even sendSignal() is not a good idea.
- * You can perhaps give the Subprocess's PID to a different thread before
- * starting communicate(), and use that PID to send a signal without
- * accessing the Subprocess object. In that case, you will need a mutex
- * that ensures you don't wait() before you sent said signal. In a
- * nutshell, don't do this.
- *
- * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
- * a PID, while another thread is in waitpid(), the signal may fire either
- * before or after the process is reaped. This means that your signal can,
- * in pathological circumstances, be delivered to the wrong process (ouch!).
- * To avoid this, you should only use non-blocking waits (i.e. poll()), and
- * make sure to serialize your signals (i.e. kill()) with the waits --
- * either wait & signal from the same thread, or use a mutex.
- */
- #pragma once
- #include <signal.h>
- #include <sys/types.h>
- #if __APPLE__
- #include <sys/wait.h>
- #else
- #include <wait.h>
- #endif
- #include <exception>
- #include <string>
- #include <vector>
- #include <boost/container/flat_map.hpp>
- #include <boost/operators.hpp>
- #include <folly/Exception.h>
- #include <folly/File.h>
- #include <folly/FileUtil.h>
- #include <folly/Function.h>
- #include <folly/MapUtil.h>
- #include <folly/Optional.h>
- #include <folly/Portability.h>
- #include <folly/Range.h>
- #include <folly/gen/String.h>
- #include <folly/io/IOBufQueue.h>
- #include <folly/portability/SysResource.h>
- namespace folly {
- /**
- * Class to wrap a process return code.
- */
- class Subprocess;
- class ProcessReturnCode {
- public:
- enum State {
- // Subprocess starts in the constructor, so this state designates only
- // default-initialized or moved-out ProcessReturnCodes.
- NOT_STARTED,
- RUNNING,
- EXITED,
- KILLED,
- };
- static ProcessReturnCode makeNotStarted() {
- return ProcessReturnCode(RV_NOT_STARTED);
- }
- static ProcessReturnCode makeRunning() {
- return ProcessReturnCode(RV_RUNNING);
- }
- static ProcessReturnCode make(int status);
- // Default-initialized for convenience. Subprocess::returnCode() will
- // never produce this value.
- ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
- // Trivially copyable
- ProcessReturnCode(const ProcessReturnCode& p) = default;
- ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
- // Non-default move: In order for Subprocess to be movable, the "moved
- // out" state must not be "running", or ~Subprocess() will abort.
- ProcessReturnCode(ProcessReturnCode&& p) noexcept;
- ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
- /**
- * Process state. One of:
- * NOT_STARTED: process hasn't been started successfully
- * RUNNING: process is currently running
- * EXITED: process exited (successfully or not)
- * KILLED: process was killed by a signal.
- */
- State state() const;
- /**
- * Helper wrappers around state().
- */
- bool notStarted() const {
- return state() == NOT_STARTED;
- }
- bool running() const {
- return state() == RUNNING;
- }
- bool exited() const {
- return state() == EXITED;
- }
- bool killed() const {
- return state() == KILLED;
- }
- /**
- * Exit status. Only valid if state() == EXITED; throws otherwise.
- */
- int exitStatus() const;
- /**
- * Signal that caused the process's termination. Only valid if
- * state() == KILLED; throws otherwise.
- */
- int killSignal() const;
- /**
- * Was a core file generated? Only valid if state() == KILLED; throws
- * otherwise.
- */
- bool coreDumped() const;
- /**
- * String representation; one of
- * "not started"
- * "running"
- * "exited with status <status>"
- * "killed by signal <signal>"
- * "killed by signal <signal> (core dumped)"
- */
- std::string str() const;
- /**
- * Helper function to enforce a precondition based on this.
- * Throws std::logic_error if in an unexpected state.
- */
- void enforce(State state) const;
- private:
- explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
- static constexpr int RV_NOT_STARTED = -2;
- static constexpr int RV_RUNNING = -1;
- int rawStatus_;
- };
- /**
- * Base exception thrown by the Subprocess methods.
- */
- class FOLLY_EXPORT SubprocessError : public std::runtime_error {
- public:
- using std::runtime_error::runtime_error;
- };
- /**
- * Exception thrown by *Checked methods of Subprocess.
- */
- class FOLLY_EXPORT CalledProcessError : public SubprocessError {
- public:
- explicit CalledProcessError(ProcessReturnCode rc);
- ~CalledProcessError() throw() override = default;
- ProcessReturnCode returnCode() const {
- return returnCode_;
- }
- private:
- ProcessReturnCode returnCode_;
- };
- /**
- * Exception thrown if the subprocess cannot be started.
- */
- class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
- public:
- SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
- ~SubprocessSpawnError() throw() override = default;
- int errnoValue() const {
- return errnoValue_;
- }
- private:
- int errnoValue_;
- };
- /**
- * Subprocess.
- */
- class Subprocess {
- public:
- static const int CLOSE = -1;
- static const int PIPE = -2;
- static const int PIPE_IN = -3;
- static const int PIPE_OUT = -4;
- /**
- * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
- * Every derived class should include the following warning:
- *
- * DANGER: This class runs after fork in a child processes. Be fast, the
- * parent thread is waiting, but remember that other parent threads are
- * running and may mutate your state. Avoid mutating any data belonging to
- * the parent. Avoid interacting with non-POD data that originated in the
- * parent. Avoid any libraries that may internally reference non-POD data.
- * Especially beware parent mutexes -- for example, glog's LOG() uses one.
- */
- struct DangerousPostForkPreExecCallback {
- virtual ~DangerousPostForkPreExecCallback() {}
- // This must return 0 on success, or an `errno` error code.
- virtual int operator()() = 0;
- };
- /**
- * Class representing various options: file descriptor behavior, and
- * whether to use $PATH for searching for the executable,
- *
- * By default, we don't use $PATH, file descriptors are closed if
- * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
- * otherwise.
- */
- class Options {
- friend class Subprocess;
- public:
- Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
- /**
- * Change action for file descriptor fd.
- *
- * "action" may be another file descriptor number (dup2()ed before the
- * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
- *
- * CLOSE: close the file descriptor in the child
- * PIPE_IN: open a pipe *from* the child
- * PIPE_OUT: open a pipe *to* the child
- *
- * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
- * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
- * other file descriptors.
- */
- Options& fd(int fd, int action);
- /**
- * Shortcut to change the action for standard input.
- */
- Options& stdinFd(int action) {
- return fd(STDIN_FILENO, action);
- }
- /**
- * Shortcut to change the action for standard output.
- */
- Options& stdoutFd(int action) {
- return fd(STDOUT_FILENO, action);
- }
- /**
- * Shortcut to change the action for standard error.
- * Note that stderr(1) will redirect the standard error to the same
- * file descriptor as standard output; the equivalent of bash's "2>&1"
- */
- Options& stderrFd(int action) {
- return fd(STDERR_FILENO, action);
- }
- Options& pipeStdin() {
- return fd(STDIN_FILENO, PIPE_IN);
- }
- Options& pipeStdout() {
- return fd(STDOUT_FILENO, PIPE_OUT);
- }
- Options& pipeStderr() {
- return fd(STDERR_FILENO, PIPE_OUT);
- }
- /**
- * Close all other fds (other than standard input, output, error,
- * and file descriptors explicitly specified with fd()).
- *
- * This is potentially slow; it's generally a better idea to
- * set the close-on-exec flag on all file descriptors that shouldn't
- * be inherited by the child.
- *
- * Even with this option set, standard input, output, and error are
- * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
- * desire this.
- */
- Options& closeOtherFds() {
- closeOtherFds_ = true;
- return *this;
- }
- /**
- * Use the search path ($PATH) when searching for the executable.
- */
- Options& usePath() {
- usePath_ = true;
- return *this;
- }
- /**
- * Change the child's working directory, after the vfork.
- */
- Options& chdir(const std::string& dir) {
- childDir_ = dir;
- return *this;
- }
- #if __linux__
- /**
- * Child will receive a signal when the parent exits.
- */
- Options& parentDeathSignal(int sig) {
- parentDeathSignal_ = sig;
- return *this;
- }
- #endif
- /**
- * Child will be made a process group leader when it starts. Upside: one
- * can reliably kill all its non-daemonizing descendants. Downside: the
- * child will not receive Ctrl-C etc during interactive use.
- */
- Options& processGroupLeader() {
- processGroupLeader_ = true;
- return *this;
- }
- /**
- * *** READ THIS WHOLE DOCBLOCK BEFORE USING ***
- *
- * Run this callback in the child after the fork, just before the
- * exec(), and after the child's state has been completely set up:
- * - signal handlers have been reset to default handling and unblocked
- * - the working directory was set
- * - closed any file descriptors specified via Options()
- * - set child process flags (see code)
- *
- * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
- * can cause a fraction of your Subprocess launches to hang forever:
- *
- * LOG(INFO) << "Hello from the child";
- *
- * The reason is that glog has an internal mutex. If your fork() happens
- * when the parent has the mutex locked, the child will wait forever.
- *
- * == GUIDELINES ==
- *
- * - Be quick -- the parent thread is blocked until you exit.
- * - Remember that other parent threads are running, and may mutate your
- * state.
- * - Avoid mutating any data belonging to the parent.
- * - Avoid interacting with non-POD data that came from the parent.
- * - Avoid any libraries that may internally reference non-POD state.
- * - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
- * - Avoid invoking the parent's destructors (you can accidentally
- * delete files, terminate network connections, etc).
- * - Read http://ewontfix.com/7/
- */
- Options& dangerousPostForkPreExecCallback(
- DangerousPostForkPreExecCallback* cob) {
- dangerousPostForkPreExecCallback_ = cob;
- return *this;
- }
- #if __linux__
- /**
- * This is an experimental feature, it is best you don't use it at this
- * point of time.
- * Although folly would support cloning with custom flags in some form, this
- * API might change in the near future. So use the following assuming it is
- * experimental. (Apr 11, 2017)
- *
- * This unlocks Subprocess to support clone flags, many of them need
- * CAP_SYS_ADMIN permissions. It might also require you to go through the
- * implementation to understand what happens before, between and after the
- * fork-and-exec.
- *
- * `man 2 clone` would be a starting point for knowing about the available
- * flags.
- */
- using clone_flags_t = uint64_t;
- Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
- cloneFlags_ = cloneFlags;
- return *this;
- }
- #endif
- private:
- typedef boost::container::flat_map<int, int> FdMap;
- FdMap fdActions_;
- bool closeOtherFds_{false};
- bool usePath_{false};
- std::string childDir_; // "" keeps the parent's working directory
- #if __linux__
- int parentDeathSignal_{0};
- #endif
- bool processGroupLeader_{false};
- DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
- nullptr};
- #if __linux__
- // none means `vfork()` instead of a custom `clone()`
- // Optional<> is used because value of '0' means do clone without any flags.
- Optional<clone_flags_t> cloneFlags_;
- #endif
- };
- // Non-copiable, but movable
- Subprocess(const Subprocess&) = delete;
- Subprocess& operator=(const Subprocess&) = delete;
- Subprocess(Subprocess&&) = default;
- Subprocess& operator=(Subprocess&&) = default;
- /**
- * Create an uninitialized subprocess.
- *
- * In this state it can only be destroyed, or assigned to using the move
- * assignment operator.
- */
- Subprocess();
- /**
- * Create a subprocess from the given arguments. argv[0] must be listed.
- * If not-null, executable must be the actual executable
- * being used (otherwise it's the same as argv[0]).
- *
- * If env is not-null, it must contain name=value strings to be used
- * as the child's environment; otherwise, we inherit the environment
- * from the parent. env must be null if options.usePath is set.
- */
- explicit Subprocess(
- const std::vector<std::string>& argv,
- const Options& options = Options(),
- const char* executable = nullptr,
- const std::vector<std::string>* env = nullptr);
- ~Subprocess();
- /**
- * Create a subprocess run as a shell command (as shell -c 'command')
- *
- * The shell to use is taken from the environment variable $SHELL,
- * or /bin/sh if $SHELL is unset.
- */
- // clang-format off
- [[deprecated(
- "Prefer not running in a shell or use `shellify`.")]]
- explicit Subprocess(
- const std::string& cmd,
- const Options& options = Options(),
- const std::vector<std::string>* env = nullptr);
- // clang-format on
- ////
- //// The methods below only manipulate the process state, and do not
- //// affect its communication pipes.
- ////
- /**
- * Return the child's pid, or -1 if the child wasn't successfully spawned
- * or has already been wait()ed upon.
- */
- pid_t pid() const;
- /**
- * Return the child's status (as per wait()) if the process has already
- * been waited on, -1 if the process is still running, or -2 if the
- * process hasn't been successfully started. NOTE that this does not call
- * waitpid() or Subprocess::poll(), but simply returns the status stored
- * in the Subprocess object.
- */
- ProcessReturnCode returnCode() const {
- return returnCode_;
- }
- /**
- * Poll the child's status and return it. Return the exit status if the
- * subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
- * if called on a Subprocess whose status is no longer RUNNING. No other
- * exceptions are possible. Aborts on egregious violations of contract,
- * e.g. if you wait for the underlying process without going through this
- * Subprocess instance.
- */
- ProcessReturnCode poll(struct rusage* ru = nullptr);
- /**
- * Poll the child's status. If the process is still running, return false.
- * Otherwise, return true if the process exited with status 0 (success),
- * or throw CalledProcessError if the process exited with a non-zero status.
- */
- bool pollChecked();
- /**
- * Wait for the process to terminate and return its status. Like poll(),
- * the only exception this can throw is std::logic_error if you call this
- * on a Subprocess whose status is RUNNING. Aborts on egregious
- * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
- */
- ProcessReturnCode wait();
- /**
- * Wait for the process to terminate, throw if unsuccessful.
- */
- void waitChecked();
- /**
- * Send a signal to the child. Shortcuts for the commonly used Unix
- * signals are below.
- */
- void sendSignal(int signal);
- void terminate() {
- sendSignal(SIGTERM);
- }
- void kill() {
- sendSignal(SIGKILL);
- }
- ////
- //// The methods below only affect the process's communication pipes, but
- //// not its return code or state (they do not poll() or wait()).
- ////
- /**
- * Communicate with the child until all pipes to/from the child are closed.
- *
- * The input buffer is written to the process' stdin pipe, and data is read
- * from the stdout and stderr pipes. Non-blocking I/O is performed on all
- * pipes simultaneously to avoid deadlocks.
- *
- * The stdin pipe will be closed after the full input buffer has been written.
- * An error will be thrown if a non-empty input buffer is supplied but stdin
- * was not configured as a pipe.
- *
- * Returns a pair of buffers containing the data read from stdout and stderr.
- * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
- * for the respective buffer.
- *
- * Note that communicate() and communicateIOBuf() both return when all
- * pipes to/from the child are closed; the child might stay alive after
- * that, so you must still wait().
- *
- * communicateIOBuf() uses IOBufQueue for buffering (which has the
- * advantage that it won't try to allocate all data at once), but it does
- * store the subprocess's entire output in memory before returning.
- *
- * communicate() uses strings for simplicity.
- */
- std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
- IOBufQueue input = IOBufQueue());
- std::pair<std::string, std::string> communicate(
- StringPiece input = StringPiece());
- /**
- * Communicate with the child until all pipes to/from the child are closed.
- *
- * == Semantics ==
- *
- * readCallback(pfd, cfd) will be called whenever there's data available
- * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
- * in the parent (that you use to read from); cfd is the file descriptor
- * in the child (used for identifying the stream; 1 = child's standard
- * output, 2 = child's standard error, etc)
- *
- * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
- * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
- * use to write to); cfd is the file descriptor in the child (used for
- * identifying the stream; 0 = child's standard input, etc)
- *
- * The read and write callbacks must read from / write to pfd and return
- * false during normal operation. Return true to tell communicate() to
- * close the pipe. For readCallback, this might send SIGPIPE to the
- * child, or make its writes fail with EPIPE, so you should generally
- * avoid returning true unless you've reached end-of-file.
- *
- * communicate() returns when all pipes to/from the child are closed; the
- * child might stay alive after that, so you must still wait().
- * Conversely, the child may quit long before its pipes are closed, since
- * its descendants can keep them alive forever.
- *
- * Most users won't need to use this callback version; the simpler version
- * of communicate (which buffers data in memory) will probably work fine.
- *
- * == Things you must get correct ==
- *
- * 1) You MUST consume all data passed to readCallback (or return true to
- * close the pipe). Similarly, you MUST write to a writable pipe (or
- * return true to close the pipe). To do otherwise is an error that can
- * result in a deadlock. You must do this even for pipes you are not
- * interested in.
- *
- * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
- * and set errno to EAGAIN (in which case you should return false). Use
- * readNoInt() from FileUtil.h to handle interrupted reads for you.
- *
- * 3) Your callbacks MUST NOT call any of the Subprocess methods that
- * manipulate the pipe FDs. Check the docblocks, but, for example,
- * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
- * are safe. Stick to reading/writing from pfd, as appropriate.
- *
- * == Good to know ==
- *
- * 1) See ReadLinesCallback for an easy way to consume the child's output
- * streams line-by-line (or tokenized by another delimiter).
- *
- * 2) "Wait until the descendants close the pipes" is usually the behavior
- * you want, since the descendants may have something to say even if the
- * immediate child is dead. If you need to be able to force-close all
- * parent FDs, communicate() will NOT work for you. Do it your own way by
- * using takeOwnershipOfPipes().
- *
- * Why not? You can return "true" from your callbacks to sever active
- * pipes, but inactive ones can remain open indefinitely. It is
- * impossible to safely close inactive pipes while another thread is
- * blocked in communicate(). This is BY DESIGN. Racing communicate()'s
- * read/write callbacks can result in wrong I/O and data corruption. This
- * class would need internal synchronization and timeouts, a poor and
- * expensive implementation choice, in order to make closeParentFd()
- * thread-safe.
- */
- using FdCallback = folly::Function<bool(int, int)>;
- void communicate(FdCallback readCallback, FdCallback writeCallback);
- /**
- * A readCallback for Subprocess::communicate() that helps you consume
- * lines (or other delimited pieces) from your subprocess's file
- * descriptors. Use the readLinesCallback() helper to get template
- * deduction. For example:
- *
- * subprocess.communicate(
- * Subprocess::readLinesCallback(
- * [](int fd, folly::StringPiece s) {
- * std::cout << fd << " said: " << s;
- * return false; // Keep reading from the child
- * }
- * ),
- * [](int pdf, int cfd){ return true; } // Don't write to the child
- * );
- *
- * If a file line exceeds maxLineLength, your callback will get some
- * initial chunks of maxLineLength with no trailing delimiters. The final
- * chunk of a line is delimiter-terminated iff the delimiter was present
- * in the input. In particular, the last line in a file always lacks a
- * delimiter -- so if a file ends on a delimiter, the final line is empty.
- *
- * Like a regular communicate() callback, your fdLineCb() normally returns
- * false. It may return true to tell Subprocess to close the underlying
- * file descriptor. The child process may then receive SIGPIPE or get
- * EPIPE errors on writes.
- */
- template <class Callback>
- class ReadLinesCallback {
- private:
- // Binds an FD to the client-provided FD+line callback
- struct StreamSplitterCallback {
- StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
- // The return value semantics are inverted vs StreamSplitter
- bool operator()(StringPiece s) {
- return !cb_(fd_, s);
- }
- Callback& cb_;
- int fd_;
- };
- typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
- public:
- explicit ReadLinesCallback(
- Callback&& fdLineCb,
- uint64_t maxLineLength = 0, // No line length limit by default
- char delimiter = '\n',
- uint64_t bufSize = 1024)
- : fdLineCb_(std::forward<Callback>(fdLineCb)),
- maxLineLength_(maxLineLength),
- delimiter_(delimiter),
- bufSize_(bufSize) {}
- bool operator()(int pfd, int cfd) {
- // Make a splitter for this cfd if it doesn't already exist
- auto it = fdToSplitter_.find(cfd);
- auto& splitter = (it != fdToSplitter_.end())
- ? it->second
- : fdToSplitter_
- .emplace(
- cfd,
- LineSplitter(
- delimiter_,
- StreamSplitterCallback(fdLineCb_, cfd),
- maxLineLength_))
- .first->second;
- // Read as much as we can from this FD
- char buf[bufSize_];
- while (true) {
- ssize_t ret = readNoInt(pfd, buf, bufSize_);
- if (ret == -1 && errno == EAGAIN) { // No more data for now
- return false;
- }
- checkUnixError(ret, "read");
- if (ret == 0) { // Reached end-of-file
- splitter.flush(); // Ignore return since the file is over anyway
- return true;
- }
- if (!splitter(StringPiece(buf, ret))) {
- return true; // The callback told us to stop
- }
- }
- }
- private:
- Callback fdLineCb_;
- const uint64_t maxLineLength_;
- const char delimiter_;
- const uint64_t bufSize_;
- // We lazily make splitters for all cfds that get used.
- std::unordered_map<int, LineSplitter> fdToSplitter_;
- };
- // Helper to enable template deduction
- template <class Callback>
- static auto readLinesCallback(
- Callback&& fdLineCb,
- uint64_t maxLineLength = 0, // No line length limit by default
- char delimiter = '\n',
- uint64_t bufSize = 1024)
- -> ReadLinesCallback<typename std::decay<Callback>::type> {
- return ReadLinesCallback<typename std::decay<Callback>::type>(
- std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
- }
- /**
- * communicate() callbacks can use this to temporarily enable/disable
- * notifications (callbacks) for a pipe to/from the child. By default,
- * all are enabled. Useful for "chatty" communication -- you want to
- * disable write callbacks until you receive the expected message.
- *
- * Disabling a pipe does not free you from the requirement to consume all
- * incoming data. Failing to do so will easily create deadlock bugs.
- *
- * Throws if the childFd is not known.
- */
- void enableNotifications(int childFd, bool enabled);
- /**
- * Are notifications for one pipe to/from child enabled? Throws if the
- * childFd is not known.
- */
- bool notificationsEnabled(int childFd) const;
- ////
- //// The following methods are meant for the cases when communicate() is
- //// not suitable. You should not need them when you call communicate(),
- //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
- //// takeOwnershipOfPipes() from a communicate() callback.
- ////
- /**
- * Close the parent file descriptor given a file descriptor in the child.
- * DO NOT USE from communicate() callbacks; make them return true instead.
- */
- void closeParentFd(int childFd);
- /**
- * Set all pipes from / to child to be non-blocking. communicate() does
- * this for you.
- */
- void setAllNonBlocking();
- /**
- * Get parent file descriptor corresponding to the given file descriptor
- * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
- * Do not close() the returned file descriptor; use closeParentFd, above.
- */
- int parentFd(int childFd) const {
- return pipes_[findByChildFd(childFd)].pipe.fd();
- }
- int stdinFd() const {
- return parentFd(0);
- }
- int stdoutFd() const {
- return parentFd(1);
- }
- int stderrFd() const {
- return parentFd(2);
- }
- /**
- * The child's pipes are logically separate from the process metadata
- * (they may even be kept alive by the child's descendants). This call
- * lets you manage the pipes' lifetime separetely from the lifetime of the
- * child process.
- *
- * After this call, the Subprocess instance will have no knowledge of
- * these pipes, and the caller assumes responsibility for managing their
- * lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
- * folly::File would otherwise silently suppress I/O errors.
- *
- * No, you may NOT call this from a communicate() callback.
- */
- struct ChildPipe {
- ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {}
- int childFd;
- folly::File pipe; // Owns the parent FD
- };
- std::vector<ChildPipe> takeOwnershipOfPipes();
- private:
- // spawn() sets up a pipe to read errors from the child,
- // then calls spawnInternal() to do the bulk of the work. Once
- // spawnInternal() returns it reads the error pipe to see if the child
- // encountered any errors.
- void spawn(
- std::unique_ptr<const char*[]> argv,
- const char* executable,
- const Options& options,
- const std::vector<std::string>* env);
- void spawnInternal(
- std::unique_ptr<const char*[]> argv,
- const char* executable,
- Options& options,
- const std::vector<std::string>* env,
- int errFd);
- // Actions to run in child.
- // Note that this runs after vfork(), so tread lightly.
- // Returns 0 on success, or an errno value on failure.
- int prepareChild(
- const Options& options,
- const sigset_t* sigmask,
- const char* childDir) const;
- int runChild(
- const char* executable,
- char** argv,
- char** env,
- const Options& options) const;
- /**
- * Read from the error pipe, and throw SubprocessSpawnError if the child
- * failed before calling exec().
- */
- void readChildErrorPipe(int pfd, const char* executable);
- // Returns an index into pipes_. Throws std::invalid_argument if not found.
- size_t findByChildFd(const int childFd) const;
- pid_t pid_{-1};
- ProcessReturnCode returnCode_;
- /**
- * Represents a pipe between this process, and the child process (or its
- * descendant). To interact with these pipes, you can use communicate(),
- * or use parentFd() and related methods, or separate them from the
- * Subprocess instance entirely via takeOwnershipOfPipes().
- */
- struct Pipe : private boost::totally_ordered<Pipe> {
- folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
- int childFd = -1; // Identifies the pipe: what FD is this in the child?
- int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
- bool enabled = true; // Are notifications enabled in communicate()?
- bool operator<(const Pipe& other) const {
- return childFd < other.childFd;
- }
- bool operator==(const Pipe& other) const {
- return childFd == other.childFd;
- }
- };
- // Populated at process start according to fdActions, empty after
- // takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
- // erased, but not inserted, after being populated.
- //
- // The number of pipes between parent and child is assumed to be small,
- // so we're happy with a vector here, even if it means linear erase.
- std::vector<Pipe> pipes_;
- };
- } // namespace folly
|