Skip to content

Commit 4fd7359

Browse files
committed
runq: add anomaly checking for runqueues.
Issues like missing IPIs or irq disablement for long, can create obervable anomalies on scheduler side. For example a scheduler IPI, missed/absent due to hypervisor issue, can cause migration/X threads on VMs to get stuck for ever, causing softlockup. Having a knowledge of these anomalies can help in locating the actual problem. For example following snippet shows that migration/23 thread was stuck in runq's wake_list. cpu: 23 has following tasks in its runq wake_list: task pid: 273301, comm: ora_ipc0_csadsd task pid: 281335, comm: ora_lms0_ccawsd task pid: 391691, comm: ora_ipc0_webprd task pid: 390722, comm: ora_ipc0_dppd1 task pid: 394144, comm: ora_lmd1_audprd task pid: 394450, comm: ora_lms1_rbsspr task pid: 393235, comm: ora_lmd0_etudpr task pid: 24469, comm: cvfwd task pid: 357613, comm: tnslsnr task pid: 351700, comm: ocssd.bin task pid: 394519, comm: ora_dia0_wspd1 task pid: 394307, comm: ora_lms1_wmsprd task pid: 394773, comm: ora_lms0_ccadmp task pid: 351141, comm: ocssd.bin task pid: 394690, comm: ora_lms0_wspd1 task pid: 351774, comm: ocssd.bin task pid: 351678, comm: ocssd.bin task pid: 351692, comm: ocssd.bin task pid: 351683, comm: ocssd.bin task pid: 351680, comm: ocssd.bin task pid: 351686, comm: ocssd.bin task pid: 351681, comm: ocssd.bin task pid: 351688, comm: ocssd.bin task pid: 150, comm: migration/23 This will block migration/X threads running on other CPUs. The helpers and corelens module added here can detect such issues. Signed-off-by: Imran Khan <imran.f.khan@oracle.com>
1 parent f67aa34 commit 4fd7359

File tree

1 file changed

+313
-0
lines changed

1 file changed

+313
-0
lines changed

drgn_tools/runq.py

Lines changed: 313 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,45 @@
11
# Copyright (c) 2023, Oracle and/or its affiliates.
22
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
33
import argparse
4+
from typing import Iterator
5+
from typing import Set
46

57
from drgn import container_of
68
from drgn import Object
79
from drgn import Program
10+
from drgn import sizeof
811
from drgn.helpers.common import escape_ascii_string
12+
from drgn.helpers.linux.bitops import for_each_set_bit
913
from drgn.helpers.linux.cpumask import for_each_online_cpu
1014
from drgn.helpers.linux.list import list_for_each_entry
15+
from drgn.helpers.linux.llist import llist_empty
16+
from drgn.helpers.linux.llist import llist_for_each_entry
1117
from drgn.helpers.linux.percpu import per_cpu
18+
from drgn.helpers.linux.rbtree import rbtree_inorder_for_each_entry
1219

1320
from drgn_tools.corelens import CorelensModule
1421
from drgn_tools.task import task_lastrun2now
1522
from drgn_tools.util import timestamp_str
1623

24+
EXPLAIN_WAKELIST = """
25+
[TASKS ON WAKE LIST]
26+
A runq's wakelist temporarily holds tasks that are about to be woken
27+
up on that CPU. If this list has multiple tasks, it usually means that
28+
this CPU has missed multiple scheduler IPIs. This can imply issues
29+
like IRQs being disabled for too long, IRQ delivery issues between hypervisor
30+
and VM, or some other issue.
31+
"""
32+
EXPLAIN_IDLE_RUNQ = """
33+
[IDLE RUN QUEUE]
34+
An idle runq:
35+
* should have current pid as 0.
36+
* should have nr_running as 0.
37+
* ideally should not have any tasks.
38+
this is not necessarily an error because
39+
scheduler may just be bringing it out of idle.
40+
"""
41+
42+
1743
# List runqueus per cpu
1844

1945

@@ -97,6 +123,293 @@ def run_queue(prog: Program) -> None:
97123
print()
98124

99125

126+
def for_each_task_on_cfs_rq(prog: Program, cpu: int) -> Iterator[Object]:
127+
"""
128+
Iterate through all tasks, on a CPU's CFS runq.
129+
130+
:param prog: drgn program
131+
:param cpu: cpu number
132+
:return: Iterator of ``struct task_struct *`` objects
133+
"""
134+
runq = per_cpu(prog["runqueues"], cpu)
135+
cfs_rq = runq.cfs
136+
rb_root = cfs_rq.tasks_timeline.rb_root
137+
for se in rbtree_inorder_for_each_entry(
138+
"struct sched_entity", rb_root, "run_node"
139+
):
140+
task = container_of(se, "struct task_struct", "se")
141+
yield task
142+
143+
144+
def num_tasks_on_cfs_rq(prog: Program, cpu: int) -> int:
145+
"""
146+
Get number of tasks, on a CPU's CFS runq.
147+
148+
:param cpu: cpu number
149+
:return: number of tasks
150+
"""
151+
return len(list(for_each_task_on_cfs_rq(prog, cpu)))
152+
153+
154+
def for_each_task_on_rt_rq(prog: Program, cpu: int) -> Iterator[Object]:
155+
"""
156+
Iterate through all tasks, on a CPU's RT runq.
157+
158+
:param prog: drgn program
159+
:param cpu: cpu number
160+
:return: Iterator of ``struct task_struct *`` objects
161+
"""
162+
runq = per_cpu(prog["runqueues"], cpu)
163+
rt_rq = runq.rt
164+
active = rt_rq.active
165+
queues = active.queue
166+
for prio in for_each_set_bit(active.bitmap, sizeof(active.bitmap) * 8):
167+
head = queues[prio]
168+
if prio > 99:
169+
break
170+
171+
for se in list_for_each_entry(
172+
"struct sched_rt_entity", head.address_of_(), "run_list"
173+
):
174+
task = container_of(se, "struct task_struct", "rt")
175+
yield task
176+
177+
178+
def num_tasks_on_rt_rq(prog: Program, cpu: int) -> int:
179+
"""
180+
Get number of tasks, on a CPU's RT runq.
181+
182+
:param cpu: cpu number
183+
:return: number of tasks
184+
"""
185+
return len(list(for_each_task_on_rt_rq(prog, cpu)))
186+
187+
188+
def for_each_task_on_dl_rq(prog: Program, cpu: int) -> Iterator[Object]:
189+
"""
190+
Iterate through all tasks, on a CPU's DL runq.
191+
192+
:param prog: drgn program
193+
:param cpu: cpu number
194+
:return: Iterator of ``struct task_struct *`` objects
195+
"""
196+
runq = per_cpu(prog["runqueues"], cpu)
197+
dl_rq = runq.dl
198+
rb_root = dl_rq.root.rb_root
199+
for dl in rbtree_inorder_for_each_entry(
200+
"struct sched_dl_entity", rb_root, "rb_node"
201+
):
202+
task = container_of(dl, "struct task_struct", "dl")
203+
yield task
204+
205+
206+
def num_tasks_on_dl_rq(prog: Program, cpu: int) -> int:
207+
"""
208+
Get number of tasks, on a CPU's DL runq.
209+
210+
:param prog: drgn program
211+
:param cpu: cpu number
212+
:return: number of tasks
213+
"""
214+
return len(list(for_each_task_on_dl_rq(prog, cpu)))
215+
216+
217+
def for_each_task_on_rq(prog: Program, cpu: int) -> Iterator[Object]:
218+
"""
219+
Iterate through all tasks, on a CPU's runq.
220+
221+
:param prog: drgn program
222+
:param cpu: cpu number
223+
:return: Iterator of ``struct task_struct *`` objects
224+
"""
225+
226+
yield from for_each_task_on_rt_rq(prog, cpu)
227+
yield from for_each_task_on_cfs_rq(prog, cpu)
228+
yield from for_each_task_on_dl_rq(prog, cpu)
229+
230+
231+
def for_each_task_on_rq_wake_list(prog: Program, cpu: int) -> Iterator[Object]:
232+
"""
233+
Iterate through all tasks, on a CPU's wake_list.
234+
235+
A CPU's wake_list contains tasks, that are in the process of being woken
236+
up and have not yet landed on a CPU runq after wakeup.
237+
Tasks should not reside here for long.
238+
239+
:param prog: drgn program
240+
:param cpu: cpu number
241+
:return: Iterator of ``struct task_struct *`` objects
242+
"""
243+
244+
runq = per_cpu(prog["runqueues"], cpu)
245+
for task in llist_for_each_entry(
246+
"struct task_struct", runq.wake_list.first, "wake_entry"
247+
):
248+
yield task
249+
250+
251+
def check_idle_runq(prog: Program, cpu: int, explanations: Set[str]) -> None:
252+
"""
253+
Check an idle runq.
254+
255+
:param prog: drgn program
256+
:param cpu: cpu number
257+
"""
258+
runq = per_cpu(prog["runqueues"], cpu)
259+
if runq.curr.pid.value_() != 0:
260+
return
261+
262+
explanations.add(EXPLAIN_IDLE_RUNQ)
263+
if runq.nr_running.value_() != 0:
264+
print(f"Idle cpu: {cpu} has non-zero nr_running")
265+
266+
nr_cfs_task = num_tasks_on_cfs_rq(prog, cpu)
267+
nr_rt_task = num_tasks_on_rt_rq(prog, cpu)
268+
nr_dl_task = num_tasks_on_dl_rq(prog, cpu)
269+
270+
if nr_cfs_task != 0:
271+
print(f"Idle cpu: {cpu} has {nr_cfs_task} tasks on CFS runq")
272+
273+
if nr_rt_task != 0:
274+
print(f"Idle cpu: {cpu} has {nr_rt_task} tasks on RT runq")
275+
276+
if nr_dl_task != 0:
277+
print(f"Idle cpu: {cpu} has {nr_dl_task} tasks on DL runq")
278+
print("See IDLE RUN QUEUE below")
279+
280+
281+
def check_runq_wakelist(
282+
prog: Program, cpu: int, explanations: Set[str]
283+
) -> None:
284+
"""
285+
Check runq's wakelist.
286+
287+
:param prog: drgn program
288+
:param cpu: cpu number
289+
"""
290+
runq = per_cpu(prog["runqueues"], cpu)
291+
if llist_empty(runq.wake_list):
292+
return
293+
294+
print("\n")
295+
explanations.add(EXPLAIN_WAKELIST)
296+
print(f"cpu: {cpu} has following tasks in its runq wake_list:")
297+
for task in for_each_task_on_rq_wake_list(prog, cpu):
298+
print(
299+
f"task pid: {task.pid.value_()}, comm: {task.comm.string_().decode()}"
300+
)
301+
print("See TASKS ON WAKE LIST below")
302+
303+
304+
def dump_rt_runq_wait_summary(
305+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
306+
):
307+
"""
308+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
309+
for greater than specified threshold in ms (default 1000 ms).
310+
311+
:param prog: drgn Program
312+
:param cpu: cpu number
313+
:param qduration_thresh_ms: threshold for wait duration on runq
314+
"""
315+
runq = per_cpu(prog["runqueues"], cpu)
316+
for task in for_each_task_on_rt_rq(prog, cpu):
317+
try:
318+
if (
319+
task.sched_info.last_queued.value_() > 0
320+
and task.sched_info.last_queued.value_()
321+
> task.sched_info.last_arrival.value_()
322+
):
323+
qduration = (
324+
runq.clock.value_() - task.sched_info.last_queued.value_()
325+
)
326+
print(
327+
f"cpu: {cpu} pid: {task.pid.value_()} prio: {task.prio.value_()} queued for {qduration} nsecs"
328+
)
329+
except AttributeError:
330+
pass
331+
332+
333+
def dump_cfs_runq_wait_summary(
334+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
335+
):
336+
"""
337+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
338+
for greater than specified threshold in ms (default 1000 ms).
339+
340+
:param prog: drgn Program
341+
:param cpu: cpu number
342+
:param qduration_thresh_ms: threshold for wait duration on runq
343+
"""
344+
runq = per_cpu(prog["runqueues"], cpu)
345+
for task in for_each_task_on_cfs_rq(prog, cpu):
346+
try:
347+
if (
348+
task.sched_info.last_queued.value_() > 0
349+
and task.sched_info.last_queued.value_()
350+
> task.sched_info.last_arrival.value_()
351+
):
352+
qduration = (
353+
runq.clock.value_() - task.sched_info.last_queued.value_()
354+
)
355+
print(
356+
f"cpu: {cpu} pid: {task.pid.value_()} prio: {task.prio.value_()} queued for {qduration} nsecs"
357+
)
358+
except AttributeError:
359+
pass
360+
361+
362+
def dump_runq_wait_summary(
363+
prog: Program, cpu: int, qduration_thresh_ms: int = 1000
364+
):
365+
"""
366+
Iterate through all tasks, on a CPU's runq and list tasks that have been queued
367+
for greater than specified threshold in ms (default 1000 ms).
368+
369+
:param prog: drgn Program
370+
:param cpu: cpu number
371+
:param qduration_thresh_ms: threshold for wait duration on runq
372+
"""
373+
dump_cfs_runq_wait_summary(prog, cpu, qduration_thresh_ms)
374+
dump_rt_runq_wait_summary(prog, cpu, qduration_thresh_ms)
375+
376+
377+
def run_queue_check(prog: Program) -> None:
378+
"""
379+
Check and report runqueue anomalies.
380+
381+
:param prog: drgn program
382+
"""
383+
384+
explanations: Set[str] = set()
385+
for cpu in for_each_online_cpu(prog):
386+
check_idle_runq(prog, cpu, explanations)
387+
check_runq_wakelist(prog, cpu, explanations)
388+
dump_runq_wait_summary(prog, cpu)
389+
print("\n")
390+
391+
if not explanations:
392+
return
393+
print("Note: found some possible run queue issues. Explanations below:")
394+
for explanation in explanations:
395+
print(explanation)
396+
397+
398+
class RunQueueCheck(CorelensModule):
399+
"""
400+
Check and report runqueue anomalies.
401+
The content of this report does not necessarily mean an
402+
error condition or a problem. But it mentions things that
403+
don'y happen usually, so should not be ignored.
404+
"""
405+
406+
name = "runqcheck"
407+
live_ok = False
408+
409+
def run(self, prog: Program, args: argparse.Namespace) -> None:
410+
run_queue_check(prog)
411+
412+
100413
class RunQueue(CorelensModule):
101414
"""
102415
List process that are in RT and CFS queue.

0 commit comments

Comments
 (0)