SignalHandler.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. /*
  2. * Copyright 2013-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. // This is heavily inspired by the signal handler from google-glog
  17. #include <folly/experimental/symbolizer/SignalHandler.h>
  18. #include <signal.h>
  19. #include <sys/types.h>
  20. #include <algorithm>
  21. #include <atomic>
  22. #include <cerrno>
  23. #include <ctime>
  24. #include <mutex>
  25. #include <vector>
  26. #include <glog/logging.h>
  27. #include <folly/Conv.h>
  28. #include <folly/ScopeGuard.h>
  29. #include <folly/experimental/symbolizer/ElfCache.h>
  30. #include <folly/experimental/symbolizer/Symbolizer.h>
  31. #include <folly/portability/SysSyscall.h>
  32. #include <folly/portability/Unistd.h>
  33. namespace folly {
  34. namespace symbolizer {
  35. namespace {
  36. /**
  37. * Fatal signal handler registry.
  38. */
  39. class FatalSignalCallbackRegistry {
  40. public:
  41. FatalSignalCallbackRegistry();
  42. void add(SignalCallback func);
  43. void markInstalled();
  44. void run();
  45. private:
  46. std::atomic<bool> installed_;
  47. std::mutex mutex_;
  48. std::vector<SignalCallback> handlers_;
  49. };
  50. FatalSignalCallbackRegistry::FatalSignalCallbackRegistry()
  51. : installed_(false) {}
  52. void FatalSignalCallbackRegistry::add(SignalCallback func) {
  53. std::lock_guard<std::mutex> lock(mutex_);
  54. CHECK(!installed_) << "FatalSignalCallbackRegistry::add may not be used "
  55. "after installing the signal handlers.";
  56. handlers_.push_back(func);
  57. }
  58. void FatalSignalCallbackRegistry::markInstalled() {
  59. std::lock_guard<std::mutex> lock(mutex_);
  60. CHECK(!installed_.exchange(true))
  61. << "FatalSignalCallbackRegistry::markInstalled must be called "
  62. << "at most once";
  63. }
  64. void FatalSignalCallbackRegistry::run() {
  65. if (!installed_) {
  66. return;
  67. }
  68. for (auto& fn : handlers_) {
  69. fn();
  70. }
  71. }
  72. // Leak it so we don't have to worry about destruction order
  73. FatalSignalCallbackRegistry* gFatalSignalCallbackRegistry =
  74. new FatalSignalCallbackRegistry;
  75. struct {
  76. int number;
  77. const char* name;
  78. struct sigaction oldAction;
  79. } kFatalSignals[] = {
  80. {SIGSEGV, "SIGSEGV", {}},
  81. {SIGILL, "SIGILL", {}},
  82. {SIGFPE, "SIGFPE", {}},
  83. {SIGABRT, "SIGABRT", {}},
  84. {SIGBUS, "SIGBUS", {}},
  85. {SIGTERM, "SIGTERM", {}},
  86. {SIGQUIT, "SIGQUIT", {}},
  87. {0, nullptr, {}},
  88. };
  89. void callPreviousSignalHandler(int signum) {
  90. // Restore disposition to old disposition, then kill ourselves with the same
  91. // signal. The signal will be blocked until we return from our handler,
  92. // then it will invoke the default handler and abort.
  93. for (auto p = kFatalSignals; p->name; ++p) {
  94. if (p->number == signum) {
  95. sigaction(signum, &p->oldAction, nullptr);
  96. raise(signum);
  97. return;
  98. }
  99. }
  100. // Not one of the signals we know about. Oh well. Reset to default.
  101. struct sigaction sa;
  102. memset(&sa, 0, sizeof(sa));
  103. sa.sa_handler = SIG_DFL;
  104. sigaction(signum, &sa, nullptr);
  105. raise(signum);
  106. }
  107. // Note: not thread-safe, but that's okay, as we only let one thread
  108. // in our signal handler at a time.
  109. //
  110. // Leak it so we don't have to worry about destruction order
  111. //
  112. // Initialized by installFatalSignalHandler
  113. SafeStackTracePrinter* gStackTracePrinter;
  114. void printDec(uint64_t val) {
  115. char buf[20];
  116. uint32_t n = uint64ToBufferUnsafe(val, buf);
  117. gStackTracePrinter->print(StringPiece(buf, n));
  118. }
  119. const char kHexChars[] = "0123456789abcdef";
  120. void printHex(uint64_t val) {
  121. // TODO(tudorb): Add this to folly/Conv.h
  122. char buf[2 + 2 * sizeof(uint64_t)]; // "0x" prefix, 2 digits for each byte
  123. char* end = buf + sizeof(buf);
  124. char* p = end;
  125. do {
  126. *--p = kHexChars[val & 0x0f];
  127. val >>= 4;
  128. } while (val != 0);
  129. *--p = 'x';
  130. *--p = '0';
  131. gStackTracePrinter->print(StringPiece(p, end));
  132. }
  133. void print(StringPiece sp) {
  134. gStackTracePrinter->print(sp);
  135. }
  136. void flush() {
  137. gStackTracePrinter->flush();
  138. }
  139. void dumpTimeInfo() {
  140. SCOPE_EXIT {
  141. flush();
  142. };
  143. time_t now = time(nullptr);
  144. print("*** Aborted at ");
  145. printDec(now);
  146. print(" (Unix time, try 'date -d @");
  147. printDec(now);
  148. print("') ***\n");
  149. }
  150. const char* sigill_reason(int si_code) {
  151. switch (si_code) {
  152. case ILL_ILLOPC:
  153. return "illegal opcode";
  154. case ILL_ILLOPN:
  155. return "illegal operand";
  156. case ILL_ILLADR:
  157. return "illegal addressing mode";
  158. case ILL_ILLTRP:
  159. return "illegal trap";
  160. case ILL_PRVOPC:
  161. return "privileged opcode";
  162. case ILL_PRVREG:
  163. return "privileged register";
  164. case ILL_COPROC:
  165. return "coprocessor error";
  166. case ILL_BADSTK:
  167. return "internal stack error";
  168. default:
  169. return nullptr;
  170. }
  171. }
  172. const char* sigfpe_reason(int si_code) {
  173. switch (si_code) {
  174. case FPE_INTDIV:
  175. return "integer divide by zero";
  176. case FPE_INTOVF:
  177. return "integer overflow";
  178. case FPE_FLTDIV:
  179. return "floating-point divide by zero";
  180. case FPE_FLTOVF:
  181. return "floating-point overflow";
  182. case FPE_FLTUND:
  183. return "floating-point underflow";
  184. case FPE_FLTRES:
  185. return "floating-point inexact result";
  186. case FPE_FLTINV:
  187. return "floating-point invalid operation";
  188. case FPE_FLTSUB:
  189. return "subscript out of range";
  190. default:
  191. return nullptr;
  192. }
  193. }
  194. const char* sigsegv_reason(int si_code) {
  195. switch (si_code) {
  196. case SEGV_MAPERR:
  197. return "address not mapped to object";
  198. case SEGV_ACCERR:
  199. return "invalid permissions for mapped object";
  200. default:
  201. return nullptr;
  202. }
  203. }
  204. const char* sigbus_reason(int si_code) {
  205. switch (si_code) {
  206. case BUS_ADRALN:
  207. return "invalid address alignment";
  208. case BUS_ADRERR:
  209. return "nonexistent physical address";
  210. case BUS_OBJERR:
  211. return "object-specific hardware error";
  212. // MCEERR_AR and MCEERR_AO: in sigaction(2) but not in headers.
  213. default:
  214. return nullptr;
  215. }
  216. }
  217. const char* sigtrap_reason(int si_code) {
  218. switch (si_code) {
  219. case TRAP_BRKPT:
  220. return "process breakpoint";
  221. case TRAP_TRACE:
  222. return "process trace trap";
  223. // TRAP_BRANCH and TRAP_HWBKPT: in sigaction(2) but not in headers.
  224. default:
  225. return nullptr;
  226. }
  227. }
  228. const char* sigchld_reason(int si_code) {
  229. switch (si_code) {
  230. case CLD_EXITED:
  231. return "child has exited";
  232. case CLD_KILLED:
  233. return "child was killed";
  234. case CLD_DUMPED:
  235. return "child terminated abnormally";
  236. case CLD_TRAPPED:
  237. return "traced child has trapped";
  238. case CLD_STOPPED:
  239. return "child has stopped";
  240. case CLD_CONTINUED:
  241. return "stopped child has continued";
  242. default:
  243. return nullptr;
  244. }
  245. }
  246. const char* sigio_reason(int si_code) {
  247. switch (si_code) {
  248. case POLL_IN:
  249. return "data input available";
  250. case POLL_OUT:
  251. return "output buffers available";
  252. case POLL_MSG:
  253. return "input message available";
  254. case POLL_ERR:
  255. return "I/O error";
  256. case POLL_PRI:
  257. return "high priority input available";
  258. case POLL_HUP:
  259. return "device disconnected";
  260. default:
  261. return nullptr;
  262. }
  263. }
  264. const char* signal_reason(int signum, int si_code) {
  265. switch (signum) {
  266. case SIGILL:
  267. return sigill_reason(si_code);
  268. case SIGFPE:
  269. return sigfpe_reason(si_code);
  270. case SIGSEGV:
  271. return sigsegv_reason(si_code);
  272. case SIGBUS:
  273. return sigbus_reason(si_code);
  274. case SIGTRAP:
  275. return sigtrap_reason(si_code);
  276. case SIGCHLD:
  277. return sigchld_reason(si_code);
  278. case SIGIO:
  279. return sigio_reason(si_code); // aka SIGPOLL
  280. default:
  281. return nullptr;
  282. }
  283. }
  284. void dumpSignalInfo(int signum, siginfo_t* siginfo) {
  285. SCOPE_EXIT {
  286. flush();
  287. };
  288. // Get the signal name, if possible.
  289. const char* name = nullptr;
  290. for (auto p = kFatalSignals; p->name; ++p) {
  291. if (p->number == signum) {
  292. name = p->name;
  293. break;
  294. }
  295. }
  296. print("*** Signal ");
  297. printDec(signum);
  298. if (name) {
  299. print(" (");
  300. print(name);
  301. print(")");
  302. }
  303. print(" (");
  304. printHex(reinterpret_cast<uint64_t>(siginfo->si_addr));
  305. print(") received by PID ");
  306. printDec(getpid());
  307. print(" (pthread TID ");
  308. printHex((uint64_t)pthread_self());
  309. print(") (linux TID ");
  310. printDec(syscall(__NR_gettid));
  311. // Kernel-sourced signals don't give us useful info for pid/uid.
  312. if (siginfo->si_code != SI_KERNEL) {
  313. print(") (maybe from PID ");
  314. printDec(siginfo->si_pid);
  315. print(", UID ");
  316. printDec(siginfo->si_uid);
  317. }
  318. auto reason = signal_reason(signum, siginfo->si_code);
  319. if (reason != nullptr) {
  320. print(") (code: ");
  321. print(reason);
  322. }
  323. print("), stack trace: ***\n");
  324. }
  325. // On Linux, pthread_t is a pointer, so 0 is an invalid value, which we
  326. // take to indicate "no thread in the signal handler".
  327. //
  328. // POSIX defines PTHREAD_NULL for this purpose, but that's not available.
  329. constexpr pthread_t kInvalidThreadId = 0;
  330. std::atomic<pthread_t> gSignalThread(kInvalidThreadId);
  331. std::atomic<bool> gInRecursiveSignalHandler(false);
  332. // Here be dragons.
  333. void innerSignalHandler(int signum, siginfo_t* info, void* /* uctx */) {
  334. // First, let's only let one thread in here at a time.
  335. pthread_t myId = pthread_self();
  336. pthread_t prevSignalThread = kInvalidThreadId;
  337. while (!gSignalThread.compare_exchange_strong(prevSignalThread, myId)) {
  338. if (pthread_equal(prevSignalThread, myId)) {
  339. // First time here. Try to dump the stack trace without symbolization.
  340. // If we still fail, well, we're mightily screwed, so we do nothing the
  341. // next time around.
  342. if (!gInRecursiveSignalHandler.exchange(true)) {
  343. print("Entered fatal signal handler recursively. We're in trouble.\n");
  344. gStackTracePrinter->printStackTrace(false); // no symbolization
  345. }
  346. return;
  347. }
  348. // Wait a while, try again.
  349. timespec ts;
  350. ts.tv_sec = 0;
  351. ts.tv_nsec = 100L * 1000 * 1000; // 100ms
  352. nanosleep(&ts, nullptr);
  353. prevSignalThread = kInvalidThreadId;
  354. }
  355. dumpTimeInfo();
  356. dumpSignalInfo(signum, info);
  357. gStackTracePrinter->printStackTrace(true); // with symbolization
  358. // Run user callbacks
  359. gFatalSignalCallbackRegistry->run();
  360. }
  361. void signalHandler(int signum, siginfo_t* info, void* uctx) {
  362. int savedErrno = errno;
  363. SCOPE_EXIT {
  364. flush();
  365. errno = savedErrno;
  366. };
  367. innerSignalHandler(signum, info, uctx);
  368. gSignalThread = kInvalidThreadId;
  369. // Kill ourselves with the previous handler.
  370. callPreviousSignalHandler(signum);
  371. }
  372. } // namespace
  373. void addFatalSignalCallback(SignalCallback cb) {
  374. gFatalSignalCallbackRegistry->add(cb);
  375. }
  376. void installFatalSignalCallbacks() {
  377. gFatalSignalCallbackRegistry->markInstalled();
  378. }
  379. namespace {
  380. std::atomic<bool> gAlreadyInstalled;
  381. // Small sigaltstack size threshold.
  382. // 8931 is known to cause the signal handler to stack overflow during
  383. // symbolization even for a simple one-liner "kill(getpid(), SIGTERM)".
  384. const size_t kSmallSigAltStackSize = 8931;
  385. bool isSmallSigAltStackEnabled() {
  386. stack_t ss;
  387. if (sigaltstack(nullptr, &ss) != 0) {
  388. return false;
  389. }
  390. if ((ss.ss_flags & SS_DISABLE) != 0) {
  391. return false;
  392. }
  393. return ss.ss_size <= kSmallSigAltStackSize;
  394. }
  395. } // namespace
  396. void installFatalSignalHandler() {
  397. if (gAlreadyInstalled.exchange(true)) {
  398. // Already done.
  399. return;
  400. }
  401. // If a small sigaltstack is enabled (ex. Rust stdlib might use sigaltstack
  402. // to set a small stack), the default SafeStackTracePrinter would likely
  403. // stack overflow. Replace it with the unsafe self-allocate printer.
  404. bool useUnsafePrinter = isSmallSigAltStackEnabled();
  405. if (useUnsafePrinter) {
  406. gStackTracePrinter = new UnsafeSelfAllocateStackTracePrinter();
  407. } else {
  408. gStackTracePrinter = new SafeStackTracePrinter();
  409. }
  410. struct sigaction sa;
  411. memset(&sa, 0, sizeof(sa));
  412. if (useUnsafePrinter) {
  413. // The signal handler is not async-signal-safe. Block all signals to
  414. // make it safer. But it's still unsafe.
  415. sigfillset(&sa.sa_mask);
  416. } else {
  417. sigemptyset(&sa.sa_mask);
  418. }
  419. // By default signal handlers are run on the signaled thread's stack.
  420. // In case of stack overflow running the SIGSEGV signal handler on
  421. // the same stack leads to another SIGSEGV and crashes the program.
  422. // Use SA_ONSTACK, so alternate stack is used (only if configured via
  423. // sigaltstack).
  424. // Golang also requires SA_ONSTACK. See:
  425. // https://golang.org/pkg/os/signal/#hdr-Go_programs_that_use_cgo_or_SWIG
  426. sa.sa_flags |= SA_SIGINFO | SA_ONSTACK;
  427. sa.sa_sigaction = &signalHandler;
  428. for (auto p = kFatalSignals; p->name; ++p) {
  429. CHECK_ERR(sigaction(p->number, &sa, &p->oldAction));
  430. }
  431. }
  432. } // namespace symbolizer
  433. } // namespace folly