deadlock.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452
  1. #!/usr/bin/env python3
  2. from collections import defaultdict
  3. from enum import Enum
  4. import gdb
  5. import re
  6. class DiGraph(object):
  7. '''
  8. Adapted from networkx: http://networkx.github.io/
  9. Represents a directed graph. Edges can store (key, value) attributes.
  10. '''
  11. def __init__(self):
  12. # Map of node -> set of nodes
  13. self.adjacency_map = {}
  14. # Map of (node1, node2) -> map string -> arbitrary attribute
  15. # This will not be copied in subgraph()
  16. self.attributes_map = {}
  17. def neighbors(self, node):
  18. return self.adjacency_map.get(node, set())
  19. def edges(self):
  20. edges = []
  21. for node, neighbors in self.adjacency_map.items():
  22. for neighbor in neighbors:
  23. edges.append((node, neighbor))
  24. return edges
  25. def nodes(self):
  26. return self.adjacency_map.keys()
  27. def attributes(self, node1, node2):
  28. return self.attributes_map[(node1, node2)]
  29. def add_edge(self, node1, node2, **kwargs):
  30. if node1 not in self.adjacency_map:
  31. self.adjacency_map[node1] = set()
  32. if node2 not in self.adjacency_map:
  33. self.adjacency_map[node2] = set()
  34. self.adjacency_map[node1].add(node2)
  35. self.attributes_map[(node1, node2)] = kwargs
  36. def remove_node(self, node):
  37. self.adjacency_map.pop(node, None)
  38. for _, neighbors in self.adjacency_map.items():
  39. neighbors.discard(node)
  40. def subgraph(self, nodes):
  41. graph = DiGraph()
  42. for node in nodes:
  43. for neighbor in self.neighbors(node):
  44. if neighbor in nodes:
  45. graph.add_edge(node, neighbor)
  46. return graph
  47. def node_link_data(self):
  48. '''
  49. Returns the graph as a dictionary in a format that can be
  50. serialized.
  51. '''
  52. data = {
  53. 'directed': True,
  54. 'multigraph': False,
  55. 'graph': {},
  56. 'links': [],
  57. 'nodes': [],
  58. }
  59. # Do one pass to build a map of node -> position in nodes
  60. node_to_number = {}
  61. for node in self.adjacency_map.keys():
  62. node_to_number[node] = len(data['nodes'])
  63. data['nodes'].append({'id': node})
  64. # Do another pass to build the link information
  65. for node, neighbors in self.adjacency_map.items():
  66. for neighbor in neighbors:
  67. link = self.attributes_map[(node, neighbor)].copy()
  68. link['source'] = node_to_number[node]
  69. link['target'] = node_to_number[neighbor]
  70. data['links'].append(link)
  71. return data
  72. def strongly_connected_components(G): # noqa: C901
  73. '''
  74. Adapted from networkx: http://networkx.github.io/
  75. Parameters
  76. ----------
  77. G : DiGraph
  78. Returns
  79. -------
  80. comp : generator of sets
  81. A generator of sets of nodes, one for each strongly connected
  82. component of G.
  83. '''
  84. preorder = {}
  85. lowlink = {}
  86. scc_found = {}
  87. scc_queue = []
  88. i = 0 # Preorder counter
  89. for source in G.nodes():
  90. if source not in scc_found:
  91. queue = [source]
  92. while queue:
  93. v = queue[-1]
  94. if v not in preorder:
  95. i = i + 1
  96. preorder[v] = i
  97. done = 1
  98. v_nbrs = G.neighbors(v)
  99. for w in v_nbrs:
  100. if w not in preorder:
  101. queue.append(w)
  102. done = 0
  103. break
  104. if done == 1:
  105. lowlink[v] = preorder[v]
  106. for w in v_nbrs:
  107. if w not in scc_found:
  108. if preorder[w] > preorder[v]:
  109. lowlink[v] = min([lowlink[v], lowlink[w]])
  110. else:
  111. lowlink[v] = min([lowlink[v], preorder[w]])
  112. queue.pop()
  113. if lowlink[v] == preorder[v]:
  114. scc_found[v] = True
  115. scc = {v}
  116. while (
  117. scc_queue and preorder[scc_queue[-1]] > preorder[v]
  118. ):
  119. k = scc_queue.pop()
  120. scc_found[k] = True
  121. scc.add(k)
  122. yield scc
  123. else:
  124. scc_queue.append(v)
  125. def simple_cycles(G): # noqa: C901
  126. '''
  127. Adapted from networkx: http://networkx.github.io/
  128. Parameters
  129. ----------
  130. G : DiGraph
  131. Returns
  132. -------
  133. cycle_generator: generator
  134. A generator that produces elementary cycles of the graph.
  135. Each cycle is represented by a list of nodes along the cycle.
  136. '''
  137. def _unblock(thisnode, blocked, B):
  138. stack = set([thisnode])
  139. while stack:
  140. node = stack.pop()
  141. if node in blocked:
  142. blocked.remove(node)
  143. stack.update(B[node])
  144. B[node].clear()
  145. # Johnson's algorithm requires some ordering of the nodes.
  146. # We assign the arbitrary ordering given by the strongly connected comps
  147. # There is no need to track the ordering as each node removed as processed.
  148. # save the actual graph so we can mutate it here
  149. # We only take the edges because we do not want to
  150. # copy edge and node attributes here.
  151. subG = G.subgraph(G.nodes())
  152. sccs = list(strongly_connected_components(subG))
  153. while sccs:
  154. scc = sccs.pop()
  155. # order of scc determines ordering of nodes
  156. startnode = scc.pop()
  157. # Processing node runs 'circuit' routine from recursive version
  158. path = [startnode]
  159. blocked = set() # vertex: blocked from search?
  160. closed = set() # nodes involved in a cycle
  161. blocked.add(startnode)
  162. B = defaultdict(set) # graph portions that yield no elementary circuit
  163. stack = [(startnode, list(subG.neighbors(startnode)))]
  164. while stack:
  165. thisnode, nbrs = stack[-1]
  166. if nbrs:
  167. nextnode = nbrs.pop()
  168. if nextnode == startnode:
  169. yield path[:]
  170. closed.update(path)
  171. elif nextnode not in blocked:
  172. path.append(nextnode)
  173. stack.append((nextnode, list(subG.neighbors(nextnode))))
  174. closed.discard(nextnode)
  175. blocked.add(nextnode)
  176. continue
  177. # done with nextnode... look for more neighbors
  178. if not nbrs: # no more nbrs
  179. if thisnode in closed:
  180. _unblock(thisnode, blocked, B)
  181. else:
  182. for nbr in subG.neighbors(thisnode):
  183. if thisnode not in B[nbr]:
  184. B[nbr].add(thisnode)
  185. stack.pop()
  186. path.pop()
  187. # done processing this node
  188. subG.remove_node(startnode)
  189. H = subG.subgraph(scc) # make smaller to avoid work in SCC routine
  190. sccs.extend(list(strongly_connected_components(H)))
  191. def find_cycle(graph):
  192. '''
  193. Looks for a cycle in the graph. If found, returns the first cycle.
  194. If nodes a1, a2, ..., an are in a cycle, then this returns:
  195. [(a1,a2), (a2,a3), ... (an-1,an), (an, a1)]
  196. Otherwise returns an empty list.
  197. '''
  198. cycles = list(simple_cycles(graph))
  199. if cycles:
  200. nodes = cycles[0]
  201. nodes.append(nodes[0])
  202. edges = []
  203. prev = nodes[0]
  204. for node in nodes[1:]:
  205. edges.append((prev, node))
  206. prev = node
  207. return edges
  208. else:
  209. return []
  210. def get_stacktrace(thread_id):
  211. '''
  212. Returns the stack trace for the thread id as a list of strings.
  213. '''
  214. gdb.execute('thread %d' % thread_id, from_tty=False, to_string=True)
  215. output = gdb.execute('bt', from_tty=False, to_string=True)
  216. stacktrace_lines = output.strip().split('\n')
  217. return stacktrace_lines
  218. def is_thread_blocked_with_frame(
  219. thread_id, top_line, expected_top_line, expected_frame
  220. ):
  221. '''
  222. Returns True if we found expected_top_line in top_line, and
  223. we found the expected_frame in the thread's stack trace.
  224. '''
  225. if expected_top_line not in top_line:
  226. return False
  227. stacktrace_lines = get_stacktrace(thread_id)
  228. return any(expected_frame in line for line in stacktrace_lines)
  229. class MutexType(Enum):
  230. '''Types of mutexes that we can detect deadlocks.'''
  231. PTHREAD_MUTEX_T = 'pthread_mutex_t'
  232. PTHREAD_RWLOCK_T = 'pthread_rwlock_t'
  233. @staticmethod
  234. def get_mutex_type(thread_id, top_line):
  235. '''
  236. Returns the probable mutex type, based on the first line
  237. of the thread's stack. Returns None if not found.
  238. '''
  239. if is_thread_blocked_with_frame(
  240. thread_id, top_line, '__lll_lock_wait', 'pthread_mutex'
  241. ):
  242. return MutexType.PTHREAD_MUTEX_T
  243. if is_thread_blocked_with_frame(
  244. thread_id, top_line, 'futex_wait', 'pthread_rwlock'
  245. ):
  246. return MutexType.PTHREAD_RWLOCK_T
  247. return None
  248. @staticmethod
  249. def get_mutex_owner_and_address_func_for_type(mutex_type):
  250. '''
  251. Returns a function to resolve the mutex owner and address for
  252. the given type. The returned function f has the following
  253. signature:
  254. f: args: (map of thread lwp -> thread id), blocked thread lwp
  255. returns: (lwp of thread owning mutex, mutex address)
  256. or (None, None) if not found.
  257. Returns None if there is no function for this mutex_type.
  258. '''
  259. if mutex_type == MutexType.PTHREAD_MUTEX_T:
  260. return get_pthread_mutex_t_owner_and_address
  261. if mutex_type == MutexType.PTHREAD_RWLOCK_T:
  262. return get_pthread_rwlock_t_owner_and_address
  263. return None
  264. def print_cycle(graph, lwp_to_thread_id, cycle):
  265. '''Prints the threads and mutexes involved in the deadlock.'''
  266. for (m, n) in cycle:
  267. print(
  268. 'Thread %d (LWP %d) is waiting on %s (0x%016x) held by '
  269. 'Thread %d (LWP %d)' % (
  270. lwp_to_thread_id[m], m,
  271. graph.attributes(m, n)['mutex_type'].value,
  272. graph.attributes(m, n)['mutex'], lwp_to_thread_id[n], n
  273. )
  274. )
  275. def get_thread_info():
  276. '''
  277. Returns a pair of:
  278. - map of LWP -> thread ID
  279. - map of blocked threads LWP -> potential mutex type
  280. '''
  281. # LWP -> thread ID
  282. lwp_to_thread_id = {}
  283. # LWP -> potential mutex type it is blocked on
  284. blocked_threads = {}
  285. output = gdb.execute('info threads', from_tty=False, to_string=True)
  286. lines = output.strip().split('\n')[1:]
  287. regex = re.compile(r'[\s\*]*(\d+).*Thread.*\(LWP (\d+)\).*')
  288. for line in lines:
  289. try:
  290. thread_id = int(regex.match(line).group(1))
  291. thread_lwp = int(regex.match(line).group(2))
  292. lwp_to_thread_id[thread_lwp] = thread_id
  293. mutex_type = MutexType.get_mutex_type(thread_id, line)
  294. if mutex_type:
  295. blocked_threads[thread_lwp] = mutex_type
  296. except Exception:
  297. continue
  298. return (lwp_to_thread_id, blocked_threads)
  299. def get_pthread_mutex_t_owner_and_address(lwp_to_thread_id, thread_lwp):
  300. '''
  301. Finds the thread holding the mutex that this thread is blocked on.
  302. Returns a pair of (lwp of thread owning mutex, mutex address),
  303. or (None, None) if not found.
  304. '''
  305. # Go up the stack to the pthread_mutex_lock frame
  306. gdb.execute(
  307. 'thread %d' % lwp_to_thread_id[thread_lwp],
  308. from_tty=False,
  309. to_string=True
  310. )
  311. gdb.execute('frame 1', from_tty=False, to_string=True)
  312. # Get the owner of the mutex by inspecting the internal
  313. # fields of the mutex.
  314. try:
  315. mutex_info = gdb.parse_and_eval('mutex').dereference()
  316. mutex_owner_lwp = int(mutex_info['__data']['__owner'])
  317. return (mutex_owner_lwp, int(mutex_info.address))
  318. except gdb.error:
  319. return (None, None)
  320. def get_pthread_rwlock_t_owner_and_address(lwp_to_thread_id, thread_lwp):
  321. '''
  322. If the thread is waiting on a write-locked pthread_rwlock_t, this will
  323. return the pair of:
  324. (lwp of thread that is write-owning the mutex, mutex address)
  325. or (None, None) if not found, or if the mutex is read-locked.
  326. '''
  327. # Go up the stack to the pthread_rwlock_{rd|wr}lock frame
  328. gdb.execute(
  329. 'thread %d' % lwp_to_thread_id[thread_lwp],
  330. from_tty=False,
  331. to_string=True
  332. )
  333. gdb.execute('frame 2', from_tty=False, to_string=True)
  334. # Get the owner of the mutex by inspecting the internal
  335. # fields of the mutex.
  336. try:
  337. rwlock_info = gdb.parse_and_eval('rwlock').dereference()
  338. rwlock_owner_lwp = int(rwlock_info['__data']['__writer'])
  339. # We can only track the owner if it is currently write-locked.
  340. # If it is not write-locked or if it is currently read-locked,
  341. # possibly by multiple threads, we cannot find the owner.
  342. if rwlock_owner_lwp != 0:
  343. return (rwlock_owner_lwp, int(rwlock_info.address))
  344. else:
  345. return (None, None)
  346. except gdb.error:
  347. return (None, None)
  348. class Deadlock(gdb.Command):
  349. '''Detects deadlocks'''
  350. def __init__(self):
  351. super(Deadlock, self).__init__('deadlock', gdb.COMMAND_NONE)
  352. def invoke(self, arg, from_tty):
  353. '''Prints the threads and mutexes in a deadlock, if it exists.'''
  354. lwp_to_thread_id, blocked_threads = get_thread_info()
  355. # Nodes represent threads. Edge (A,B) exists if thread A
  356. # is waiting on a mutex held by thread B.
  357. graph = DiGraph()
  358. # Go through all the blocked threads and see which threads
  359. # they are blocked on, and build the thread wait graph.
  360. for thread_lwp, mutex_type in blocked_threads.items():
  361. get_owner_and_address_func = \
  362. MutexType.get_mutex_owner_and_address_func_for_type(mutex_type)
  363. if not get_owner_and_address_func:
  364. continue
  365. mutex_owner_lwp, mutex_address = get_owner_and_address_func(
  366. lwp_to_thread_id, thread_lwp
  367. )
  368. if mutex_owner_lwp and mutex_address:
  369. graph.add_edge(
  370. thread_lwp,
  371. mutex_owner_lwp,
  372. mutex=mutex_address,
  373. mutex_type=mutex_type
  374. )
  375. # A deadlock exists if there is a cycle in the graph.
  376. cycle = find_cycle(graph)
  377. if cycle:
  378. print('Found deadlock!')
  379. print_cycle(graph, lwp_to_thread_id, cycle)
  380. else:
  381. print(
  382. 'No deadlock detected. '
  383. 'Do you have debug symbols installed?'
  384. )
  385. def load():
  386. # instantiate the Deadlock command
  387. Deadlock()
  388. print('Type "deadlock" to detect deadlocks.')
  389. def info():
  390. return 'Detect deadlocks'
  391. if __name__ == '__main__':
  392. load()