Subprocess.h 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. /*
  2. * Copyright 2012-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. /**
  17. * Subprocess library, modeled after Python's subprocess module
  18. * (http://docs.python.org/2/library/subprocess.html)
  19. *
  20. * This library defines one class (Subprocess) which represents a child
  21. * process. Subprocess has two constructors: one that takes a vector<string>
  22. * and executes the given executable without using the shell, and one
  23. * that takes a string and executes the given command using the shell.
  24. * Subprocess allows you to redirect the child's standard input, standard
  25. * output, and standard error to/from child descriptors in the parent,
  26. * or to create communication pipes between the child and the parent.
  27. *
  28. * The simplest example is a thread-safe [1] version of the system() library
  29. * function:
  30. * Subprocess(cmd).wait();
  31. * which executes the command using the default shell and waits for it
  32. * to complete, returning the exit status.
  33. *
  34. * A thread-safe [1] version of popen() (type="r", to read from the child):
  35. * Subprocess proc(cmd, Subprocess::Options().pipeStdout());
  36. * // read from proc.stdoutFd()
  37. * proc.wait();
  38. *
  39. * A thread-safe [1] version of popen() (type="w", to write to the child):
  40. * Subprocess proc(cmd, Subprocess::Options().pipeStdin());
  41. * // write to proc.stdinFd()
  42. * proc.wait();
  43. *
  44. * If you want to redirect both stdin and stdout to pipes, you can, but note
  45. * that you're subject to a variety of deadlocks. You'll want to use
  46. * nonblocking I/O, like the callback version of communicate().
  47. *
  48. * The string or IOBuf-based variants of communicate() are the simplest way
  49. * to communicate with a child via its standard input, standard output, and
  50. * standard error. They buffer everything in memory, so they are not great
  51. * for large amounts of data (or long-running processes), but they are much
  52. * simpler than the callback version.
  53. *
  54. * == A note on thread-safety ==
  55. *
  56. * [1] "thread-safe" refers ONLY to the fact that Subprocess is very careful
  57. * to fork in a way that does not cause grief in multithreaded programs.
  58. *
  59. * Caveat: If your system does not have the atomic pipe2 system call, it is
  60. * not safe to concurrently call Subprocess from different threads.
  61. * Therefore, it is best to have a single thread be responsible for spawning
  62. * subprocesses.
  63. *
  64. * A particular instances of Subprocess is emphatically **not** thread-safe.
  65. * If you need to simultaneously communicate via the pipes, and interact
  66. * with the Subprocess state, your best bet is to:
  67. * - takeOwnershipOfPipes() to separate the pipe I/O from the subprocess.
  68. * - Only interact with the Subprocess from one thread at a time.
  69. *
  70. * The current implementation of communicate() cannot be safely interrupted.
  71. * To do so correctly, one would need to use EventFD, or open a dedicated
  72. * pipe to be messaged from a different thread -- in particular, kill() will
  73. * not do, since a descendant may keep the pipes open indefinitely.
  74. *
  75. * So, once you call communicate(), you must wait for it to return, and not
  76. * touch the pipes from other threads. closeParentFd() is emphatically
  77. * unsafe to call concurrently, and even sendSignal() is not a good idea.
  78. * You can perhaps give the Subprocess's PID to a different thread before
  79. * starting communicate(), and use that PID to send a signal without
  80. * accessing the Subprocess object. In that case, you will need a mutex
  81. * that ensures you don't wait() before you sent said signal. In a
  82. * nutshell, don't do this.
  83. *
  84. * In fact, signals are inherently concurrency-unsafe on Unix: if you signal
  85. * a PID, while another thread is in waitpid(), the signal may fire either
  86. * before or after the process is reaped. This means that your signal can,
  87. * in pathological circumstances, be delivered to the wrong process (ouch!).
  88. * To avoid this, you should only use non-blocking waits (i.e. poll()), and
  89. * make sure to serialize your signals (i.e. kill()) with the waits --
  90. * either wait & signal from the same thread, or use a mutex.
  91. */
  92. #pragma once
  93. #include <signal.h>
  94. #include <sys/types.h>
  95. #if __APPLE__
  96. #include <sys/wait.h>
  97. #else
  98. #include <wait.h>
  99. #endif
  100. #include <exception>
  101. #include <string>
  102. #include <vector>
  103. #include <boost/container/flat_map.hpp>
  104. #include <boost/operators.hpp>
  105. #include <folly/Exception.h>
  106. #include <folly/File.h>
  107. #include <folly/FileUtil.h>
  108. #include <folly/Function.h>
  109. #include <folly/MapUtil.h>
  110. #include <folly/Optional.h>
  111. #include <folly/Portability.h>
  112. #include <folly/Range.h>
  113. #include <folly/gen/String.h>
  114. #include <folly/io/IOBufQueue.h>
  115. #include <folly/portability/SysResource.h>
  116. namespace folly {
  117. /**
  118. * Class to wrap a process return code.
  119. */
  120. class Subprocess;
  121. class ProcessReturnCode {
  122. public:
  123. enum State {
  124. // Subprocess starts in the constructor, so this state designates only
  125. // default-initialized or moved-out ProcessReturnCodes.
  126. NOT_STARTED,
  127. RUNNING,
  128. EXITED,
  129. KILLED,
  130. };
  131. static ProcessReturnCode makeNotStarted() {
  132. return ProcessReturnCode(RV_NOT_STARTED);
  133. }
  134. static ProcessReturnCode makeRunning() {
  135. return ProcessReturnCode(RV_RUNNING);
  136. }
  137. static ProcessReturnCode make(int status);
  138. // Default-initialized for convenience. Subprocess::returnCode() will
  139. // never produce this value.
  140. ProcessReturnCode() : rawStatus_(RV_NOT_STARTED) {}
  141. // Trivially copyable
  142. ProcessReturnCode(const ProcessReturnCode& p) = default;
  143. ProcessReturnCode& operator=(const ProcessReturnCode& p) = default;
  144. // Non-default move: In order for Subprocess to be movable, the "moved
  145. // out" state must not be "running", or ~Subprocess() will abort.
  146. ProcessReturnCode(ProcessReturnCode&& p) noexcept;
  147. ProcessReturnCode& operator=(ProcessReturnCode&& p) noexcept;
  148. /**
  149. * Process state. One of:
  150. * NOT_STARTED: process hasn't been started successfully
  151. * RUNNING: process is currently running
  152. * EXITED: process exited (successfully or not)
  153. * KILLED: process was killed by a signal.
  154. */
  155. State state() const;
  156. /**
  157. * Helper wrappers around state().
  158. */
  159. bool notStarted() const {
  160. return state() == NOT_STARTED;
  161. }
  162. bool running() const {
  163. return state() == RUNNING;
  164. }
  165. bool exited() const {
  166. return state() == EXITED;
  167. }
  168. bool killed() const {
  169. return state() == KILLED;
  170. }
  171. /**
  172. * Exit status. Only valid if state() == EXITED; throws otherwise.
  173. */
  174. int exitStatus() const;
  175. /**
  176. * Signal that caused the process's termination. Only valid if
  177. * state() == KILLED; throws otherwise.
  178. */
  179. int killSignal() const;
  180. /**
  181. * Was a core file generated? Only valid if state() == KILLED; throws
  182. * otherwise.
  183. */
  184. bool coreDumped() const;
  185. /**
  186. * String representation; one of
  187. * "not started"
  188. * "running"
  189. * "exited with status <status>"
  190. * "killed by signal <signal>"
  191. * "killed by signal <signal> (core dumped)"
  192. */
  193. std::string str() const;
  194. /**
  195. * Helper function to enforce a precondition based on this.
  196. * Throws std::logic_error if in an unexpected state.
  197. */
  198. void enforce(State state) const;
  199. private:
  200. explicit ProcessReturnCode(int rv) : rawStatus_(rv) {}
  201. static constexpr int RV_NOT_STARTED = -2;
  202. static constexpr int RV_RUNNING = -1;
  203. int rawStatus_;
  204. };
  205. /**
  206. * Base exception thrown by the Subprocess methods.
  207. */
  208. class FOLLY_EXPORT SubprocessError : public std::runtime_error {
  209. public:
  210. using std::runtime_error::runtime_error;
  211. };
  212. /**
  213. * Exception thrown by *Checked methods of Subprocess.
  214. */
  215. class FOLLY_EXPORT CalledProcessError : public SubprocessError {
  216. public:
  217. explicit CalledProcessError(ProcessReturnCode rc);
  218. ~CalledProcessError() throw() override = default;
  219. ProcessReturnCode returnCode() const {
  220. return returnCode_;
  221. }
  222. private:
  223. ProcessReturnCode returnCode_;
  224. };
  225. /**
  226. * Exception thrown if the subprocess cannot be started.
  227. */
  228. class FOLLY_EXPORT SubprocessSpawnError : public SubprocessError {
  229. public:
  230. SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
  231. ~SubprocessSpawnError() throw() override = default;
  232. int errnoValue() const {
  233. return errnoValue_;
  234. }
  235. private:
  236. int errnoValue_;
  237. };
  238. /**
  239. * Subprocess.
  240. */
  241. class Subprocess {
  242. public:
  243. static const int CLOSE = -1;
  244. static const int PIPE = -2;
  245. static const int PIPE_IN = -3;
  246. static const int PIPE_OUT = -4;
  247. /**
  248. * See Subprocess::Options::dangerousPostForkPreExecCallback() for usage.
  249. * Every derived class should include the following warning:
  250. *
  251. * DANGER: This class runs after fork in a child processes. Be fast, the
  252. * parent thread is waiting, but remember that other parent threads are
  253. * running and may mutate your state. Avoid mutating any data belonging to
  254. * the parent. Avoid interacting with non-POD data that originated in the
  255. * parent. Avoid any libraries that may internally reference non-POD data.
  256. * Especially beware parent mutexes -- for example, glog's LOG() uses one.
  257. */
  258. struct DangerousPostForkPreExecCallback {
  259. virtual ~DangerousPostForkPreExecCallback() {}
  260. // This must return 0 on success, or an `errno` error code.
  261. virtual int operator()() = 0;
  262. };
  263. /**
  264. * Class representing various options: file descriptor behavior, and
  265. * whether to use $PATH for searching for the executable,
  266. *
  267. * By default, we don't use $PATH, file descriptors are closed if
  268. * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
  269. * otherwise.
  270. */
  271. class Options {
  272. friend class Subprocess;
  273. public:
  274. Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
  275. /**
  276. * Change action for file descriptor fd.
  277. *
  278. * "action" may be another file descriptor number (dup2()ed before the
  279. * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
  280. *
  281. * CLOSE: close the file descriptor in the child
  282. * PIPE_IN: open a pipe *from* the child
  283. * PIPE_OUT: open a pipe *to* the child
  284. *
  285. * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
  286. * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
  287. * other file descriptors.
  288. */
  289. Options& fd(int fd, int action);
  290. /**
  291. * Shortcut to change the action for standard input.
  292. */
  293. Options& stdinFd(int action) {
  294. return fd(STDIN_FILENO, action);
  295. }
  296. /**
  297. * Shortcut to change the action for standard output.
  298. */
  299. Options& stdoutFd(int action) {
  300. return fd(STDOUT_FILENO, action);
  301. }
  302. /**
  303. * Shortcut to change the action for standard error.
  304. * Note that stderr(1) will redirect the standard error to the same
  305. * file descriptor as standard output; the equivalent of bash's "2>&1"
  306. */
  307. Options& stderrFd(int action) {
  308. return fd(STDERR_FILENO, action);
  309. }
  310. Options& pipeStdin() {
  311. return fd(STDIN_FILENO, PIPE_IN);
  312. }
  313. Options& pipeStdout() {
  314. return fd(STDOUT_FILENO, PIPE_OUT);
  315. }
  316. Options& pipeStderr() {
  317. return fd(STDERR_FILENO, PIPE_OUT);
  318. }
  319. /**
  320. * Close all other fds (other than standard input, output, error,
  321. * and file descriptors explicitly specified with fd()).
  322. *
  323. * This is potentially slow; it's generally a better idea to
  324. * set the close-on-exec flag on all file descriptors that shouldn't
  325. * be inherited by the child.
  326. *
  327. * Even with this option set, standard input, output, and error are
  328. * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
  329. * desire this.
  330. */
  331. Options& closeOtherFds() {
  332. closeOtherFds_ = true;
  333. return *this;
  334. }
  335. /**
  336. * Use the search path ($PATH) when searching for the executable.
  337. */
  338. Options& usePath() {
  339. usePath_ = true;
  340. return *this;
  341. }
  342. /**
  343. * Change the child's working directory, after the vfork.
  344. */
  345. Options& chdir(const std::string& dir) {
  346. childDir_ = dir;
  347. return *this;
  348. }
  349. #if __linux__
  350. /**
  351. * Child will receive a signal when the parent exits.
  352. */
  353. Options& parentDeathSignal(int sig) {
  354. parentDeathSignal_ = sig;
  355. return *this;
  356. }
  357. #endif
  358. /**
  359. * Child will be made a process group leader when it starts. Upside: one
  360. * can reliably kill all its non-daemonizing descendants. Downside: the
  361. * child will not receive Ctrl-C etc during interactive use.
  362. */
  363. Options& processGroupLeader() {
  364. processGroupLeader_ = true;
  365. return *this;
  366. }
  367. /**
  368. * *** READ THIS WHOLE DOCBLOCK BEFORE USING ***
  369. *
  370. * Run this callback in the child after the fork, just before the
  371. * exec(), and after the child's state has been completely set up:
  372. * - signal handlers have been reset to default handling and unblocked
  373. * - the working directory was set
  374. * - closed any file descriptors specified via Options()
  375. * - set child process flags (see code)
  376. *
  377. * This is EXTREMELY DANGEROUS. For example, this innocuous-looking code
  378. * can cause a fraction of your Subprocess launches to hang forever:
  379. *
  380. * LOG(INFO) << "Hello from the child";
  381. *
  382. * The reason is that glog has an internal mutex. If your fork() happens
  383. * when the parent has the mutex locked, the child will wait forever.
  384. *
  385. * == GUIDELINES ==
  386. *
  387. * - Be quick -- the parent thread is blocked until you exit.
  388. * - Remember that other parent threads are running, and may mutate your
  389. * state.
  390. * - Avoid mutating any data belonging to the parent.
  391. * - Avoid interacting with non-POD data that came from the parent.
  392. * - Avoid any libraries that may internally reference non-POD state.
  393. * - Especially beware parent mutexes, e.g. LOG() uses a global mutex.
  394. * - Avoid invoking the parent's destructors (you can accidentally
  395. * delete files, terminate network connections, etc).
  396. * - Read http://ewontfix.com/7/
  397. */
  398. Options& dangerousPostForkPreExecCallback(
  399. DangerousPostForkPreExecCallback* cob) {
  400. dangerousPostForkPreExecCallback_ = cob;
  401. return *this;
  402. }
  403. #if __linux__
  404. /**
  405. * This is an experimental feature, it is best you don't use it at this
  406. * point of time.
  407. * Although folly would support cloning with custom flags in some form, this
  408. * API might change in the near future. So use the following assuming it is
  409. * experimental. (Apr 11, 2017)
  410. *
  411. * This unlocks Subprocess to support clone flags, many of them need
  412. * CAP_SYS_ADMIN permissions. It might also require you to go through the
  413. * implementation to understand what happens before, between and after the
  414. * fork-and-exec.
  415. *
  416. * `man 2 clone` would be a starting point for knowing about the available
  417. * flags.
  418. */
  419. using clone_flags_t = uint64_t;
  420. Options& useCloneWithFlags(clone_flags_t cloneFlags) noexcept {
  421. cloneFlags_ = cloneFlags;
  422. return *this;
  423. }
  424. #endif
  425. private:
  426. typedef boost::container::flat_map<int, int> FdMap;
  427. FdMap fdActions_;
  428. bool closeOtherFds_{false};
  429. bool usePath_{false};
  430. std::string childDir_; // "" keeps the parent's working directory
  431. #if __linux__
  432. int parentDeathSignal_{0};
  433. #endif
  434. bool processGroupLeader_{false};
  435. DangerousPostForkPreExecCallback* dangerousPostForkPreExecCallback_{
  436. nullptr};
  437. #if __linux__
  438. // none means `vfork()` instead of a custom `clone()`
  439. // Optional<> is used because value of '0' means do clone without any flags.
  440. Optional<clone_flags_t> cloneFlags_;
  441. #endif
  442. };
  443. // Non-copiable, but movable
  444. Subprocess(const Subprocess&) = delete;
  445. Subprocess& operator=(const Subprocess&) = delete;
  446. Subprocess(Subprocess&&) = default;
  447. Subprocess& operator=(Subprocess&&) = default;
  448. /**
  449. * Create an uninitialized subprocess.
  450. *
  451. * In this state it can only be destroyed, or assigned to using the move
  452. * assignment operator.
  453. */
  454. Subprocess();
  455. /**
  456. * Create a subprocess from the given arguments. argv[0] must be listed.
  457. * If not-null, executable must be the actual executable
  458. * being used (otherwise it's the same as argv[0]).
  459. *
  460. * If env is not-null, it must contain name=value strings to be used
  461. * as the child's environment; otherwise, we inherit the environment
  462. * from the parent. env must be null if options.usePath is set.
  463. */
  464. explicit Subprocess(
  465. const std::vector<std::string>& argv,
  466. const Options& options = Options(),
  467. const char* executable = nullptr,
  468. const std::vector<std::string>* env = nullptr);
  469. ~Subprocess();
  470. /**
  471. * Create a subprocess run as a shell command (as shell -c 'command')
  472. *
  473. * The shell to use is taken from the environment variable $SHELL,
  474. * or /bin/sh if $SHELL is unset.
  475. */
  476. // clang-format off
  477. [[deprecated(
  478. "Prefer not running in a shell or use `shellify`.")]]
  479. explicit Subprocess(
  480. const std::string& cmd,
  481. const Options& options = Options(),
  482. const std::vector<std::string>* env = nullptr);
  483. // clang-format on
  484. ////
  485. //// The methods below only manipulate the process state, and do not
  486. //// affect its communication pipes.
  487. ////
  488. /**
  489. * Return the child's pid, or -1 if the child wasn't successfully spawned
  490. * or has already been wait()ed upon.
  491. */
  492. pid_t pid() const;
  493. /**
  494. * Return the child's status (as per wait()) if the process has already
  495. * been waited on, -1 if the process is still running, or -2 if the
  496. * process hasn't been successfully started. NOTE that this does not call
  497. * waitpid() or Subprocess::poll(), but simply returns the status stored
  498. * in the Subprocess object.
  499. */
  500. ProcessReturnCode returnCode() const {
  501. return returnCode_;
  502. }
  503. /**
  504. * Poll the child's status and return it. Return the exit status if the
  505. * subprocess had quit, or RUNNING otherwise. Throws an std::logic_error
  506. * if called on a Subprocess whose status is no longer RUNNING. No other
  507. * exceptions are possible. Aborts on egregious violations of contract,
  508. * e.g. if you wait for the underlying process without going through this
  509. * Subprocess instance.
  510. */
  511. ProcessReturnCode poll(struct rusage* ru = nullptr);
  512. /**
  513. * Poll the child's status. If the process is still running, return false.
  514. * Otherwise, return true if the process exited with status 0 (success),
  515. * or throw CalledProcessError if the process exited with a non-zero status.
  516. */
  517. bool pollChecked();
  518. /**
  519. * Wait for the process to terminate and return its status. Like poll(),
  520. * the only exception this can throw is std::logic_error if you call this
  521. * on a Subprocess whose status is RUNNING. Aborts on egregious
  522. * violations of contract, like an out-of-band waitpid(p.pid(), 0, 0).
  523. */
  524. ProcessReturnCode wait();
  525. /**
  526. * Wait for the process to terminate, throw if unsuccessful.
  527. */
  528. void waitChecked();
  529. /**
  530. * Send a signal to the child. Shortcuts for the commonly used Unix
  531. * signals are below.
  532. */
  533. void sendSignal(int signal);
  534. void terminate() {
  535. sendSignal(SIGTERM);
  536. }
  537. void kill() {
  538. sendSignal(SIGKILL);
  539. }
  540. ////
  541. //// The methods below only affect the process's communication pipes, but
  542. //// not its return code or state (they do not poll() or wait()).
  543. ////
  544. /**
  545. * Communicate with the child until all pipes to/from the child are closed.
  546. *
  547. * The input buffer is written to the process' stdin pipe, and data is read
  548. * from the stdout and stderr pipes. Non-blocking I/O is performed on all
  549. * pipes simultaneously to avoid deadlocks.
  550. *
  551. * The stdin pipe will be closed after the full input buffer has been written.
  552. * An error will be thrown if a non-empty input buffer is supplied but stdin
  553. * was not configured as a pipe.
  554. *
  555. * Returns a pair of buffers containing the data read from stdout and stderr.
  556. * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
  557. * for the respective buffer.
  558. *
  559. * Note that communicate() and communicateIOBuf() both return when all
  560. * pipes to/from the child are closed; the child might stay alive after
  561. * that, so you must still wait().
  562. *
  563. * communicateIOBuf() uses IOBufQueue for buffering (which has the
  564. * advantage that it won't try to allocate all data at once), but it does
  565. * store the subprocess's entire output in memory before returning.
  566. *
  567. * communicate() uses strings for simplicity.
  568. */
  569. std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
  570. IOBufQueue input = IOBufQueue());
  571. std::pair<std::string, std::string> communicate(
  572. StringPiece input = StringPiece());
  573. /**
  574. * Communicate with the child until all pipes to/from the child are closed.
  575. *
  576. * == Semantics ==
  577. *
  578. * readCallback(pfd, cfd) will be called whenever there's data available
  579. * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
  580. * in the parent (that you use to read from); cfd is the file descriptor
  581. * in the child (used for identifying the stream; 1 = child's standard
  582. * output, 2 = child's standard error, etc)
  583. *
  584. * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
  585. * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
  586. * use to write to); cfd is the file descriptor in the child (used for
  587. * identifying the stream; 0 = child's standard input, etc)
  588. *
  589. * The read and write callbacks must read from / write to pfd and return
  590. * false during normal operation. Return true to tell communicate() to
  591. * close the pipe. For readCallback, this might send SIGPIPE to the
  592. * child, or make its writes fail with EPIPE, so you should generally
  593. * avoid returning true unless you've reached end-of-file.
  594. *
  595. * communicate() returns when all pipes to/from the child are closed; the
  596. * child might stay alive after that, so you must still wait().
  597. * Conversely, the child may quit long before its pipes are closed, since
  598. * its descendants can keep them alive forever.
  599. *
  600. * Most users won't need to use this callback version; the simpler version
  601. * of communicate (which buffers data in memory) will probably work fine.
  602. *
  603. * == Things you must get correct ==
  604. *
  605. * 1) You MUST consume all data passed to readCallback (or return true to
  606. * close the pipe). Similarly, you MUST write to a writable pipe (or
  607. * return true to close the pipe). To do otherwise is an error that can
  608. * result in a deadlock. You must do this even for pipes you are not
  609. * interested in.
  610. *
  611. * 2) pfd is nonblocking, so be prepared for read() / write() to return -1
  612. * and set errno to EAGAIN (in which case you should return false). Use
  613. * readNoInt() from FileUtil.h to handle interrupted reads for you.
  614. *
  615. * 3) Your callbacks MUST NOT call any of the Subprocess methods that
  616. * manipulate the pipe FDs. Check the docblocks, but, for example,
  617. * neither closeParentFd (return true instead) nor takeOwnershipOfPipes
  618. * are safe. Stick to reading/writing from pfd, as appropriate.
  619. *
  620. * == Good to know ==
  621. *
  622. * 1) See ReadLinesCallback for an easy way to consume the child's output
  623. * streams line-by-line (or tokenized by another delimiter).
  624. *
  625. * 2) "Wait until the descendants close the pipes" is usually the behavior
  626. * you want, since the descendants may have something to say even if the
  627. * immediate child is dead. If you need to be able to force-close all
  628. * parent FDs, communicate() will NOT work for you. Do it your own way by
  629. * using takeOwnershipOfPipes().
  630. *
  631. * Why not? You can return "true" from your callbacks to sever active
  632. * pipes, but inactive ones can remain open indefinitely. It is
  633. * impossible to safely close inactive pipes while another thread is
  634. * blocked in communicate(). This is BY DESIGN. Racing communicate()'s
  635. * read/write callbacks can result in wrong I/O and data corruption. This
  636. * class would need internal synchronization and timeouts, a poor and
  637. * expensive implementation choice, in order to make closeParentFd()
  638. * thread-safe.
  639. */
  640. using FdCallback = folly::Function<bool(int, int)>;
  641. void communicate(FdCallback readCallback, FdCallback writeCallback);
  642. /**
  643. * A readCallback for Subprocess::communicate() that helps you consume
  644. * lines (or other delimited pieces) from your subprocess's file
  645. * descriptors. Use the readLinesCallback() helper to get template
  646. * deduction. For example:
  647. *
  648. * subprocess.communicate(
  649. * Subprocess::readLinesCallback(
  650. * [](int fd, folly::StringPiece s) {
  651. * std::cout << fd << " said: " << s;
  652. * return false; // Keep reading from the child
  653. * }
  654. * ),
  655. * [](int pdf, int cfd){ return true; } // Don't write to the child
  656. * );
  657. *
  658. * If a file line exceeds maxLineLength, your callback will get some
  659. * initial chunks of maxLineLength with no trailing delimiters. The final
  660. * chunk of a line is delimiter-terminated iff the delimiter was present
  661. * in the input. In particular, the last line in a file always lacks a
  662. * delimiter -- so if a file ends on a delimiter, the final line is empty.
  663. *
  664. * Like a regular communicate() callback, your fdLineCb() normally returns
  665. * false. It may return true to tell Subprocess to close the underlying
  666. * file descriptor. The child process may then receive SIGPIPE or get
  667. * EPIPE errors on writes.
  668. */
  669. template <class Callback>
  670. class ReadLinesCallback {
  671. private:
  672. // Binds an FD to the client-provided FD+line callback
  673. struct StreamSplitterCallback {
  674. StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) {}
  675. // The return value semantics are inverted vs StreamSplitter
  676. bool operator()(StringPiece s) {
  677. return !cb_(fd_, s);
  678. }
  679. Callback& cb_;
  680. int fd_;
  681. };
  682. typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
  683. public:
  684. explicit ReadLinesCallback(
  685. Callback&& fdLineCb,
  686. uint64_t maxLineLength = 0, // No line length limit by default
  687. char delimiter = '\n',
  688. uint64_t bufSize = 1024)
  689. : fdLineCb_(std::forward<Callback>(fdLineCb)),
  690. maxLineLength_(maxLineLength),
  691. delimiter_(delimiter),
  692. bufSize_(bufSize) {}
  693. bool operator()(int pfd, int cfd) {
  694. // Make a splitter for this cfd if it doesn't already exist
  695. auto it = fdToSplitter_.find(cfd);
  696. auto& splitter = (it != fdToSplitter_.end())
  697. ? it->second
  698. : fdToSplitter_
  699. .emplace(
  700. cfd,
  701. LineSplitter(
  702. delimiter_,
  703. StreamSplitterCallback(fdLineCb_, cfd),
  704. maxLineLength_))
  705. .first->second;
  706. // Read as much as we can from this FD
  707. char buf[bufSize_];
  708. while (true) {
  709. ssize_t ret = readNoInt(pfd, buf, bufSize_);
  710. if (ret == -1 && errno == EAGAIN) { // No more data for now
  711. return false;
  712. }
  713. checkUnixError(ret, "read");
  714. if (ret == 0) { // Reached end-of-file
  715. splitter.flush(); // Ignore return since the file is over anyway
  716. return true;
  717. }
  718. if (!splitter(StringPiece(buf, ret))) {
  719. return true; // The callback told us to stop
  720. }
  721. }
  722. }
  723. private:
  724. Callback fdLineCb_;
  725. const uint64_t maxLineLength_;
  726. const char delimiter_;
  727. const uint64_t bufSize_;
  728. // We lazily make splitters for all cfds that get used.
  729. std::unordered_map<int, LineSplitter> fdToSplitter_;
  730. };
  731. // Helper to enable template deduction
  732. template <class Callback>
  733. static auto readLinesCallback(
  734. Callback&& fdLineCb,
  735. uint64_t maxLineLength = 0, // No line length limit by default
  736. char delimiter = '\n',
  737. uint64_t bufSize = 1024)
  738. -> ReadLinesCallback<typename std::decay<Callback>::type> {
  739. return ReadLinesCallback<typename std::decay<Callback>::type>(
  740. std::forward<Callback>(fdLineCb), maxLineLength, delimiter, bufSize);
  741. }
  742. /**
  743. * communicate() callbacks can use this to temporarily enable/disable
  744. * notifications (callbacks) for a pipe to/from the child. By default,
  745. * all are enabled. Useful for "chatty" communication -- you want to
  746. * disable write callbacks until you receive the expected message.
  747. *
  748. * Disabling a pipe does not free you from the requirement to consume all
  749. * incoming data. Failing to do so will easily create deadlock bugs.
  750. *
  751. * Throws if the childFd is not known.
  752. */
  753. void enableNotifications(int childFd, bool enabled);
  754. /**
  755. * Are notifications for one pipe to/from child enabled? Throws if the
  756. * childFd is not known.
  757. */
  758. bool notificationsEnabled(int childFd) const;
  759. ////
  760. //// The following methods are meant for the cases when communicate() is
  761. //// not suitable. You should not need them when you call communicate(),
  762. //// and, in fact, it is INHERENTLY UNSAFE to use closeParentFd() or
  763. //// takeOwnershipOfPipes() from a communicate() callback.
  764. ////
  765. /**
  766. * Close the parent file descriptor given a file descriptor in the child.
  767. * DO NOT USE from communicate() callbacks; make them return true instead.
  768. */
  769. void closeParentFd(int childFd);
  770. /**
  771. * Set all pipes from / to child to be non-blocking. communicate() does
  772. * this for you.
  773. */
  774. void setAllNonBlocking();
  775. /**
  776. * Get parent file descriptor corresponding to the given file descriptor
  777. * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
  778. * Do not close() the returned file descriptor; use closeParentFd, above.
  779. */
  780. int parentFd(int childFd) const {
  781. return pipes_[findByChildFd(childFd)].pipe.fd();
  782. }
  783. int stdinFd() const {
  784. return parentFd(0);
  785. }
  786. int stdoutFd() const {
  787. return parentFd(1);
  788. }
  789. int stderrFd() const {
  790. return parentFd(2);
  791. }
  792. /**
  793. * The child's pipes are logically separate from the process metadata
  794. * (they may even be kept alive by the child's descendants). This call
  795. * lets you manage the pipes' lifetime separetely from the lifetime of the
  796. * child process.
  797. *
  798. * After this call, the Subprocess instance will have no knowledge of
  799. * these pipes, and the caller assumes responsibility for managing their
  800. * lifetimes. Pro-tip: prefer to explicitly close() the pipes, since
  801. * folly::File would otherwise silently suppress I/O errors.
  802. *
  803. * No, you may NOT call this from a communicate() callback.
  804. */
  805. struct ChildPipe {
  806. ChildPipe(int fd, folly::File&& ppe) : childFd(fd), pipe(std::move(ppe)) {}
  807. int childFd;
  808. folly::File pipe; // Owns the parent FD
  809. };
  810. std::vector<ChildPipe> takeOwnershipOfPipes();
  811. private:
  812. // spawn() sets up a pipe to read errors from the child,
  813. // then calls spawnInternal() to do the bulk of the work. Once
  814. // spawnInternal() returns it reads the error pipe to see if the child
  815. // encountered any errors.
  816. void spawn(
  817. std::unique_ptr<const char*[]> argv,
  818. const char* executable,
  819. const Options& options,
  820. const std::vector<std::string>* env);
  821. void spawnInternal(
  822. std::unique_ptr<const char*[]> argv,
  823. const char* executable,
  824. Options& options,
  825. const std::vector<std::string>* env,
  826. int errFd);
  827. // Actions to run in child.
  828. // Note that this runs after vfork(), so tread lightly.
  829. // Returns 0 on success, or an errno value on failure.
  830. int prepareChild(
  831. const Options& options,
  832. const sigset_t* sigmask,
  833. const char* childDir) const;
  834. int runChild(
  835. const char* executable,
  836. char** argv,
  837. char** env,
  838. const Options& options) const;
  839. /**
  840. * Read from the error pipe, and throw SubprocessSpawnError if the child
  841. * failed before calling exec().
  842. */
  843. void readChildErrorPipe(int pfd, const char* executable);
  844. // Returns an index into pipes_. Throws std::invalid_argument if not found.
  845. size_t findByChildFd(const int childFd) const;
  846. pid_t pid_{-1};
  847. ProcessReturnCode returnCode_;
  848. /**
  849. * Represents a pipe between this process, and the child process (or its
  850. * descendant). To interact with these pipes, you can use communicate(),
  851. * or use parentFd() and related methods, or separate them from the
  852. * Subprocess instance entirely via takeOwnershipOfPipes().
  853. */
  854. struct Pipe : private boost::totally_ordered<Pipe> {
  855. folly::File pipe; // Our end of the pipe, wrapped in a File to auto-close.
  856. int childFd = -1; // Identifies the pipe: what FD is this in the child?
  857. int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
  858. bool enabled = true; // Are notifications enabled in communicate()?
  859. bool operator<(const Pipe& other) const {
  860. return childFd < other.childFd;
  861. }
  862. bool operator==(const Pipe& other) const {
  863. return childFd == other.childFd;
  864. }
  865. };
  866. // Populated at process start according to fdActions, empty after
  867. // takeOwnershipOfPipes(). Sorted by childFd. Can only have elements
  868. // erased, but not inserted, after being populated.
  869. //
  870. // The number of pipes between parent and child is assumed to be small,
  871. // so we're happy with a vector here, even if it means linear erase.
  872. std::vector<Pipe> pipes_;
  873. };
  874. } // namespace folly