TimeUtil.cpp 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289
  1. /*
  2. * Copyright 2014-present Facebook, Inc.
  3. *
  4. * Licensed under the Apache License, Version 2.0 (the "License");
  5. * you may not use this file except in compliance with the License.
  6. * You may obtain a copy of the License at
  7. *
  8. * http://www.apache.org/licenses/LICENSE-2.0
  9. *
  10. * Unless required by applicable law or agreed to in writing, software
  11. * distributed under the License is distributed on an "AS IS" BASIS,
  12. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. * See the License for the specific language governing permissions and
  14. * limitations under the License.
  15. */
  16. #ifndef __STDC_FORMAT_MACROS
  17. #define __STDC_FORMAT_MACROS
  18. #endif
  19. #include <folly/io/async/test/TimeUtil.h>
  20. #include <errno.h>
  21. #include <fcntl.h>
  22. #include <sys/stat.h>
  23. #include <sys/types.h>
  24. #ifdef __linux__
  25. #include <sys/utsname.h>
  26. #endif
  27. #include <chrono>
  28. #include <ostream>
  29. #include <stdexcept>
  30. #include <folly/Conv.h>
  31. #include <folly/ScopeGuard.h>
  32. #include <folly/String.h>
  33. #include <folly/portability/Unistd.h>
  34. #include <folly/system/ThreadId.h>
  35. #include <glog/logging.h>
  36. using std::string;
  37. using namespace std::chrono;
  38. namespace folly {
  39. static int getLinuxVersion(StringPiece release) {
  40. auto dot1 = release.find('.');
  41. if (dot1 == StringPiece::npos) {
  42. throw std::invalid_argument("could not find first dot");
  43. }
  44. auto v1 = folly::to<int>(release.subpiece(0, dot1));
  45. auto dot2 = release.find('.', dot1 + 1);
  46. if (dot2 == StringPiece::npos) {
  47. throw std::invalid_argument("could not find second dot");
  48. }
  49. auto v2 = folly::to<int>(release.subpiece(dot1 + 1, dot2 - (dot1 + 1)));
  50. int dash = release.find('-', dot2 + 1);
  51. auto v3 = folly::to<int>(release.subpiece(dot2 + 1, dash - (dot2 + 1)));
  52. return ((v1 * 1000 + v2) * 1000) + v3;
  53. }
  54. #ifdef __linux__
  55. /**
  56. * Determine the time units used in /proc/<pid>/schedstat
  57. *
  58. * Returns the number of nanoseconds per time unit,
  59. * or -1 if we cannot determine the units.
  60. */
  61. static int64_t determineSchedstatUnits() {
  62. struct utsname unameInfo;
  63. if (uname(&unameInfo) != 0) {
  64. LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
  65. << errnoStr(errno);
  66. return -1;
  67. }
  68. // In Linux version 2.6.23 and later, time time values are always
  69. // reported in nanoseconds.
  70. //
  71. // This change appears to have been made in commit 425e0968a25f, which
  72. // moved some of the sched stats code to a new file. Despite the commit
  73. // message claiming "no code changes are caused by this patch", it changed
  74. // the task.sched_info.cpu_time and task.sched_info.run_delay counters to be
  75. // computed using sched_clock() rather than jiffies.
  76. int linuxVersion;
  77. try {
  78. linuxVersion = getLinuxVersion(unameInfo.release);
  79. } catch (const std::exception&) {
  80. LOG(ERROR) << "unable to determine jiffies/second: failed to parse "
  81. << "kernel release string \"" << unameInfo.release << "\"";
  82. return -1;
  83. }
  84. if (linuxVersion >= 2006023) {
  85. // The units are nanoseconds
  86. return 1;
  87. }
  88. // In Linux versions prior to 2.6.23, the time values are reported in
  89. // jiffies. This is somewhat unfortunate, as the number of jiffies per
  90. // second is configurable. We have to determine the units being used.
  91. //
  92. // It seems like the only real way to figure out the CONFIG_HZ value used by
  93. // this kernel is to look it up in the config file.
  94. //
  95. // Look in /boot/config-<kernel_release>
  96. char configPath[256];
  97. snprintf(
  98. configPath, sizeof(configPath), "/boot/config-%s", unameInfo.release);
  99. FILE* f = fopen(configPath, "r");
  100. if (f == nullptr) {
  101. LOG(ERROR) << "unable to determine jiffies/second: "
  102. "cannot open kernel config file %s"
  103. << configPath;
  104. return -1;
  105. }
  106. SCOPE_EXIT {
  107. fclose(f);
  108. };
  109. int64_t hz = -1;
  110. char buf[1024];
  111. while (fgets(buf, sizeof(buf), f) != nullptr) {
  112. if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
  113. LOG(ERROR) << "unable to determine jiffies/second: tickless kernel";
  114. return -1;
  115. } else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
  116. hz = 1000;
  117. } else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
  118. hz = 300;
  119. } else if (strcmp(buf, "CONFIG_HZ=250\n") == 0) {
  120. hz = 250;
  121. } else if (strcmp(buf, "CONFIG_HZ=100\n") == 0) {
  122. hz = 100;
  123. }
  124. }
  125. if (hz == -1) {
  126. LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
  127. "found in %s"
  128. << configPath;
  129. return -1;
  130. }
  131. return hz;
  132. }
  133. #endif
  134. /**
  135. * Determine how long this process has spent waiting to get scheduled on the
  136. * CPU.
  137. *
  138. * Returns the number of nanoseconds spent waiting, or -1 if the amount of
  139. * time cannot be determined.
  140. */
  141. static nanoseconds getSchedTimeWaiting(pid_t tid) {
  142. #ifndef __linux__
  143. (void)tid;
  144. return nanoseconds(0);
  145. #else
  146. static int64_t timeUnits = determineSchedstatUnits();
  147. if (timeUnits < 0) {
  148. // We couldn't figure out how many jiffies there are in a second.
  149. // Don't bother reading the schedstat info if we can't interpret it.
  150. return nanoseconds(0);
  151. }
  152. int fd = -1;
  153. try {
  154. char schedstatFile[256];
  155. snprintf(schedstatFile, sizeof(schedstatFile), "/proc/%d/schedstat", tid);
  156. fd = open(schedstatFile, O_RDONLY);
  157. if (fd < 0) {
  158. throw std::runtime_error(
  159. folly::to<string>("failed to open process schedstat file", errno));
  160. }
  161. char buf[512];
  162. ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1);
  163. if (bytesReadRet <= 0) {
  164. throw std::runtime_error(
  165. folly::to<string>("failed to read process schedstat file", errno));
  166. }
  167. size_t bytesRead = size_t(bytesReadRet);
  168. if (buf[bytesRead - 1] != '\n') {
  169. throw std::runtime_error("expected newline at end of schedstat data");
  170. }
  171. assert(bytesRead < sizeof(buf));
  172. buf[bytesRead] = '\0';
  173. uint64_t activeJiffies = 0;
  174. uint64_t waitingJiffies = 0;
  175. uint64_t numTasks = 0;
  176. int rc = sscanf(
  177. buf,
  178. "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
  179. &activeJiffies,
  180. &waitingJiffies,
  181. &numTasks);
  182. if (rc != 3) {
  183. throw std::runtime_error("failed to parse schedstat data");
  184. }
  185. close(fd);
  186. return nanoseconds(waitingJiffies * timeUnits);
  187. } catch (const std::runtime_error& e) {
  188. if (fd >= 0) {
  189. close(fd);
  190. }
  191. LOG(ERROR) << "error determining process wait time: %s" << e.what();
  192. return nanoseconds(0);
  193. }
  194. #endif
  195. }
  196. void TimePoint::reset() {
  197. // Remember the current time
  198. timeStart_ = steady_clock::now();
  199. // Remember how long this process has spent waiting to be scheduled
  200. tid_ = getOSThreadID();
  201. timeWaiting_ = getSchedTimeWaiting(tid_);
  202. // In case it took a while to read the schedstat info,
  203. // also record the time after the schedstat check
  204. timeEnd_ = steady_clock::now();
  205. }
  206. std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) {
  207. os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count()
  208. << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", "
  209. << timePoint.getTimeWaiting().count() << ")";
  210. return os;
  211. }
  212. bool checkTimeout(
  213. const TimePoint& start,
  214. const TimePoint& end,
  215. nanoseconds expected,
  216. bool allowSmaller,
  217. nanoseconds tolerance) {
  218. auto elapsedTime = end.getTimeStart() - start.getTimeEnd();
  219. if (!allowSmaller) {
  220. // Timeouts should never fire before the time was up.
  221. // Allow 1ms of wiggle room for rounding errors.
  222. if (elapsedTime < (expected - milliseconds(1))) {
  223. return false;
  224. }
  225. }
  226. // Check that the event fired within a reasonable time of the timout.
  227. //
  228. // If the system is under heavy load, our process may have had to wait for a
  229. // while to be run. The time spent waiting for the processor shouldn't
  230. // count against us, so exclude this time from the check.
  231. nanoseconds timeExcluded;
  232. if (end.getTid() != start.getTid()) {
  233. // We can only correctly compute the amount of time waiting to be scheduled
  234. // if both TimePoints were set in the same thread.
  235. timeExcluded = nanoseconds(0);
  236. } else {
  237. timeExcluded = end.getTimeWaiting() - start.getTimeWaiting();
  238. assert(end.getTimeWaiting() >= start.getTimeWaiting());
  239. // Add a tolerance here due to precision issues on linux, see below note.
  240. assert((elapsedTime + tolerance) >= timeExcluded);
  241. }
  242. nanoseconds effectiveElapsedTime(0);
  243. if (elapsedTime > timeExcluded) {
  244. effectiveElapsedTime = elapsedTime - timeExcluded;
  245. }
  246. // On x86 Linux, sleep calls generally have precision only to the nearest
  247. // millisecond. The tolerance parameter lets users allow a few ms of slop.
  248. auto overrun = effectiveElapsedTime - expected;
  249. if (overrun > tolerance) {
  250. return false;
  251. }
  252. return true;
  253. }
  254. } // namespace folly