sched.h 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. #undef TRACE_SYSTEM
  3. #define TRACE_SYSTEM sched
  4. #if !defined(_TRACE_SCHED_H) || defined(TRACE_HEADER_MULTI_READ)
  5. #define _TRACE_SCHED_H
  6. #include <linux/kthread.h>
  7. #include <linux/sched/numa_balancing.h>
  8. #include <linux/tracepoint.h>
  9. #include <linux/binfmts.h>
  10. /*
  11. * Tracepoint for calling kthread_stop, performed to end a kthread:
  12. */
  13. TRACE_EVENT(sched_kthread_stop,
  14. TP_PROTO(struct task_struct *t),
  15. TP_ARGS(t),
  16. TP_STRUCT__entry(
  17. __string( comm, t->comm )
  18. __field( pid_t, pid )
  19. ),
  20. TP_fast_assign(
  21. __assign_str(comm);
  22. __entry->pid = t->pid;
  23. ),
  24. TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid)
  25. );
  26. /*
  27. * Tracepoint for the return value of the kthread stopping:
  28. */
  29. TRACE_EVENT(sched_kthread_stop_ret,
  30. TP_PROTO(int ret),
  31. TP_ARGS(ret),
  32. TP_STRUCT__entry(
  33. __field( int, ret )
  34. ),
  35. TP_fast_assign(
  36. __entry->ret = ret;
  37. ),
  38. TP_printk("ret=%d", __entry->ret)
  39. );
  40. /**
  41. * sched_kthread_work_queue_work - called when a work gets queued
  42. * @worker: pointer to the kthread_worker
  43. * @work: pointer to struct kthread_work
  44. *
  45. * This event occurs when a work is queued immediately or once a
  46. * delayed work is actually queued (ie: once the delay has been
  47. * reached).
  48. */
  49. TRACE_EVENT(sched_kthread_work_queue_work,
  50. TP_PROTO(struct kthread_worker *worker,
  51. struct kthread_work *work),
  52. TP_ARGS(worker, work),
  53. TP_STRUCT__entry(
  54. __field( void *, work )
  55. __field( void *, function)
  56. __field( void *, worker)
  57. ),
  58. TP_fast_assign(
  59. __entry->work = work;
  60. __entry->function = work->func;
  61. __entry->worker = worker;
  62. ),
  63. TP_printk("work struct=%p function=%ps worker=%p",
  64. __entry->work, __entry->function, __entry->worker)
  65. );
  66. /**
  67. * sched_kthread_work_execute_start - called immediately before the work callback
  68. * @work: pointer to struct kthread_work
  69. *
  70. * Allows to track kthread work execution.
  71. */
  72. TRACE_EVENT(sched_kthread_work_execute_start,
  73. TP_PROTO(struct kthread_work *work),
  74. TP_ARGS(work),
  75. TP_STRUCT__entry(
  76. __field( void *, work )
  77. __field( void *, function)
  78. ),
  79. TP_fast_assign(
  80. __entry->work = work;
  81. __entry->function = work->func;
  82. ),
  83. TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
  84. );
  85. /**
  86. * sched_kthread_work_execute_end - called immediately after the work callback
  87. * @work: pointer to struct work_struct
  88. * @function: pointer to worker function
  89. *
  90. * Allows to track workqueue execution.
  91. */
  92. TRACE_EVENT(sched_kthread_work_execute_end,
  93. TP_PROTO(struct kthread_work *work, kthread_work_func_t function),
  94. TP_ARGS(work, function),
  95. TP_STRUCT__entry(
  96. __field( void *, work )
  97. __field( void *, function)
  98. ),
  99. TP_fast_assign(
  100. __entry->work = work;
  101. __entry->function = function;
  102. ),
  103. TP_printk("work struct %p: function %ps", __entry->work, __entry->function)
  104. );
  105. /*
  106. * Tracepoint for waking up a task:
  107. */
  108. DECLARE_EVENT_CLASS(sched_wakeup_template,
  109. TP_PROTO(struct task_struct *p),
  110. TP_ARGS(__perf_task(p)),
  111. TP_STRUCT__entry(
  112. __array( char, comm, TASK_COMM_LEN )
  113. __field( pid_t, pid )
  114. __field( int, prio )
  115. __field( int, target_cpu )
  116. ),
  117. TP_fast_assign(
  118. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  119. __entry->pid = p->pid;
  120. __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  121. __entry->target_cpu = task_cpu(p);
  122. ),
  123. TP_printk("comm=%s pid=%d prio=%d target_cpu=%03d",
  124. __entry->comm, __entry->pid, __entry->prio,
  125. __entry->target_cpu)
  126. );
  127. /*
  128. * Tracepoint called when waking a task; this tracepoint is guaranteed to be
  129. * called from the waking context.
  130. */
  131. DEFINE_EVENT(sched_wakeup_template, sched_waking,
  132. TP_PROTO(struct task_struct *p),
  133. TP_ARGS(p));
  134. /*
  135. * Tracepoint called when the task is actually woken; p->state == TASK_RUNNING.
  136. * It is not always called from the waking context.
  137. */
  138. DEFINE_EVENT(sched_wakeup_template, sched_wakeup,
  139. TP_PROTO(struct task_struct *p),
  140. TP_ARGS(p));
  141. /*
  142. * Tracepoint for waking up a new task:
  143. */
  144. DEFINE_EVENT(sched_wakeup_template, sched_wakeup_new,
  145. TP_PROTO(struct task_struct *p),
  146. TP_ARGS(p));
  147. #ifdef CREATE_TRACE_POINTS
  148. static inline long __trace_sched_switch_state(bool preempt,
  149. unsigned int prev_state,
  150. struct task_struct *p)
  151. {
  152. unsigned int state;
  153. BUG_ON(p != current);
  154. /*
  155. * Preemption ignores task state, therefore preempted tasks are always
  156. * RUNNING (we will not have dequeued if state != RUNNING).
  157. */
  158. if (preempt)
  159. return TASK_REPORT_MAX;
  160. /*
  161. * task_state_index() uses fls() and returns a value from 0-8 range.
  162. * Decrement it by 1 (except TASK_RUNNING state i.e 0) before using
  163. * it for left shift operation to get the correct task->state
  164. * mapping.
  165. */
  166. state = __task_state_index(prev_state, p->exit_state);
  167. return state ? (1 << (state - 1)) : state;
  168. }
  169. #endif /* CREATE_TRACE_POINTS */
  170. /*
  171. * Tracepoint for task switches, performed by the scheduler:
  172. */
  173. TRACE_EVENT(sched_switch,
  174. TP_PROTO(bool preempt,
  175. struct task_struct *prev,
  176. struct task_struct *next,
  177. unsigned int prev_state),
  178. TP_ARGS(preempt, prev, next, prev_state),
  179. TP_STRUCT__entry(
  180. __array( char, prev_comm, TASK_COMM_LEN )
  181. __field( pid_t, prev_pid )
  182. __field( int, prev_prio )
  183. __field( long, prev_state )
  184. __array( char, next_comm, TASK_COMM_LEN )
  185. __field( pid_t, next_pid )
  186. __field( int, next_prio )
  187. ),
  188. TP_fast_assign(
  189. memcpy(__entry->prev_comm, prev->comm, TASK_COMM_LEN);
  190. __entry->prev_pid = prev->pid;
  191. __entry->prev_prio = prev->prio;
  192. __entry->prev_state = __trace_sched_switch_state(preempt, prev_state, prev);
  193. memcpy(__entry->next_comm, next->comm, TASK_COMM_LEN);
  194. __entry->next_pid = next->pid;
  195. __entry->next_prio = next->prio;
  196. /* XXX SCHED_DEADLINE */
  197. ),
  198. TP_printk("prev_comm=%s prev_pid=%d prev_prio=%d prev_state=%s%s ==> next_comm=%s next_pid=%d next_prio=%d",
  199. __entry->prev_comm, __entry->prev_pid, __entry->prev_prio,
  200. (__entry->prev_state & (TASK_REPORT_MAX - 1)) ?
  201. __print_flags(__entry->prev_state & (TASK_REPORT_MAX - 1), "|",
  202. { TASK_INTERRUPTIBLE, "S" },
  203. { TASK_UNINTERRUPTIBLE, "D" },
  204. { __TASK_STOPPED, "T" },
  205. { __TASK_TRACED, "t" },
  206. { EXIT_DEAD, "X" },
  207. { EXIT_ZOMBIE, "Z" },
  208. { TASK_PARKED, "P" },
  209. { TASK_DEAD, "I" }) :
  210. "R",
  211. __entry->prev_state & TASK_REPORT_MAX ? "+" : "",
  212. __entry->next_comm, __entry->next_pid, __entry->next_prio)
  213. );
  214. /*
  215. * Tracepoint for a task being migrated:
  216. */
  217. TRACE_EVENT(sched_migrate_task,
  218. TP_PROTO(struct task_struct *p, int dest_cpu),
  219. TP_ARGS(p, dest_cpu),
  220. TP_STRUCT__entry(
  221. __string( comm, p->comm )
  222. __field( pid_t, pid )
  223. __field( int, prio )
  224. __field( int, orig_cpu )
  225. __field( int, dest_cpu )
  226. ),
  227. TP_fast_assign(
  228. __assign_str(comm);
  229. __entry->pid = p->pid;
  230. __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  231. __entry->orig_cpu = task_cpu(p);
  232. __entry->dest_cpu = dest_cpu;
  233. ),
  234. TP_printk("comm=%s pid=%d prio=%d orig_cpu=%d dest_cpu=%d",
  235. __get_str(comm), __entry->pid, __entry->prio,
  236. __entry->orig_cpu, __entry->dest_cpu)
  237. );
  238. DECLARE_EVENT_CLASS(sched_process_template,
  239. TP_PROTO(struct task_struct *p),
  240. TP_ARGS(p),
  241. TP_STRUCT__entry(
  242. __string( comm, p->comm )
  243. __field( pid_t, pid )
  244. __field( int, prio )
  245. ),
  246. TP_fast_assign(
  247. __assign_str(comm);
  248. __entry->pid = p->pid;
  249. __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  250. ),
  251. TP_printk("comm=%s pid=%d prio=%d",
  252. __get_str(comm), __entry->pid, __entry->prio)
  253. );
  254. /*
  255. * Tracepoint for freeing a task:
  256. */
  257. DEFINE_EVENT(sched_process_template, sched_process_free,
  258. TP_PROTO(struct task_struct *p),
  259. TP_ARGS(p));
  260. /*
  261. * Tracepoint for a task exiting.
  262. * Note, it's a superset of sched_process_template and should be kept
  263. * compatible as much as possible. sched_process_exits has an extra
  264. * `group_dead` argument, so sched_process_template can't be used,
  265. * unfortunately, just like sched_migrate_task above.
  266. */
  267. TRACE_EVENT(sched_process_exit,
  268. TP_PROTO(struct task_struct *p, bool group_dead),
  269. TP_ARGS(p, group_dead),
  270. TP_STRUCT__entry(
  271. __array( char, comm, TASK_COMM_LEN )
  272. __field( pid_t, pid )
  273. __field( int, prio )
  274. __field( bool, group_dead )
  275. ),
  276. TP_fast_assign(
  277. memcpy(__entry->comm, p->comm, TASK_COMM_LEN);
  278. __entry->pid = p->pid;
  279. __entry->prio = p->prio; /* XXX SCHED_DEADLINE */
  280. __entry->group_dead = group_dead;
  281. ),
  282. TP_printk("comm=%s pid=%d prio=%d group_dead=%s",
  283. __entry->comm, __entry->pid, __entry->prio,
  284. __entry->group_dead ? "true" : "false"
  285. )
  286. );
  287. /*
  288. * Tracepoint for waiting on task to unschedule:
  289. */
  290. DEFINE_EVENT(sched_process_template, sched_wait_task,
  291. TP_PROTO(struct task_struct *p),
  292. TP_ARGS(p));
  293. /*
  294. * Tracepoint for a waiting task:
  295. */
  296. TRACE_EVENT(sched_process_wait,
  297. TP_PROTO(struct pid *pid),
  298. TP_ARGS(pid),
  299. TP_STRUCT__entry(
  300. __string( comm, current->comm )
  301. __field( pid_t, pid )
  302. __field( int, prio )
  303. ),
  304. TP_fast_assign(
  305. __assign_str(comm);
  306. __entry->pid = pid_nr(pid);
  307. __entry->prio = current->prio; /* XXX SCHED_DEADLINE */
  308. ),
  309. TP_printk("comm=%s pid=%d prio=%d",
  310. __get_str(comm), __entry->pid, __entry->prio)
  311. );
  312. /*
  313. * Tracepoint for kernel_clone:
  314. */
  315. TRACE_EVENT(sched_process_fork,
  316. TP_PROTO(struct task_struct *parent, struct task_struct *child),
  317. TP_ARGS(parent, child),
  318. TP_STRUCT__entry(
  319. __string( parent_comm, parent->comm )
  320. __field( pid_t, parent_pid )
  321. __string( child_comm, child->comm )
  322. __field( pid_t, child_pid )
  323. ),
  324. TP_fast_assign(
  325. __assign_str(parent_comm);
  326. __entry->parent_pid = parent->pid;
  327. __assign_str(child_comm);
  328. __entry->child_pid = child->pid;
  329. ),
  330. TP_printk("comm=%s pid=%d child_comm=%s child_pid=%d",
  331. __get_str(parent_comm), __entry->parent_pid,
  332. __get_str(child_comm), __entry->child_pid)
  333. );
  334. /*
  335. * Tracepoint for exec:
  336. */
  337. TRACE_EVENT(sched_process_exec,
  338. TP_PROTO(struct task_struct *p, pid_t old_pid,
  339. struct linux_binprm *bprm),
  340. TP_ARGS(p, old_pid, bprm),
  341. TP_STRUCT__entry(
  342. __string( filename, bprm->filename )
  343. __field( pid_t, pid )
  344. __field( pid_t, old_pid )
  345. ),
  346. TP_fast_assign(
  347. __assign_str(filename);
  348. __entry->pid = p->pid;
  349. __entry->old_pid = old_pid;
  350. ),
  351. TP_printk("filename=%s pid=%d old_pid=%d", __get_str(filename),
  352. __entry->pid, __entry->old_pid)
  353. );
  354. /**
  355. * sched_prepare_exec - called before setting up new exec
  356. * @task: pointer to the current task
  357. * @bprm: pointer to linux_binprm used for new exec
  358. *
  359. * Called before flushing the old exec, where @task is still unchanged, but at
  360. * the point of no return during switching to the new exec. At the point it is
  361. * called the exec will either succeed, or on failure terminate the task. Also
  362. * see the "sched_process_exec" tracepoint, which is called right after @task
  363. * has successfully switched to the new exec.
  364. */
  365. TRACE_EVENT(sched_prepare_exec,
  366. TP_PROTO(struct task_struct *task, struct linux_binprm *bprm),
  367. TP_ARGS(task, bprm),
  368. TP_STRUCT__entry(
  369. __string( interp, bprm->interp )
  370. __string( filename, bprm->filename )
  371. __field( pid_t, pid )
  372. __string( comm, task->comm )
  373. ),
  374. TP_fast_assign(
  375. __assign_str(interp);
  376. __assign_str(filename);
  377. __entry->pid = task->pid;
  378. __assign_str(comm);
  379. ),
  380. TP_printk("interp=%s filename=%s pid=%d comm=%s",
  381. __get_str(interp), __get_str(filename),
  382. __entry->pid, __get_str(comm))
  383. );
  384. #ifdef CONFIG_SCHEDSTATS
  385. #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT
  386. #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS
  387. #else
  388. #define DEFINE_EVENT_SCHEDSTAT DEFINE_EVENT_NOP
  389. #define DECLARE_EVENT_CLASS_SCHEDSTAT DECLARE_EVENT_CLASS_NOP
  390. #endif
  391. /*
  392. * XXX the below sched_stat tracepoints only apply to SCHED_OTHER/BATCH/IDLE
  393. * adding sched_stat support to SCHED_FIFO/RR would be welcome.
  394. */
  395. DECLARE_EVENT_CLASS_SCHEDSTAT(sched_stat_template,
  396. TP_PROTO(struct task_struct *tsk, u64 delay),
  397. TP_ARGS(__perf_task(tsk), __perf_count(delay)),
  398. TP_STRUCT__entry(
  399. __string( comm, tsk->comm )
  400. __field( pid_t, pid )
  401. __field( u64, delay )
  402. ),
  403. TP_fast_assign(
  404. __assign_str(comm);
  405. __entry->pid = tsk->pid;
  406. __entry->delay = delay;
  407. ),
  408. TP_printk("comm=%s pid=%d delay=%Lu [ns]",
  409. __get_str(comm), __entry->pid,
  410. (unsigned long long)__entry->delay)
  411. );
  412. /*
  413. * Tracepoint for accounting wait time (time the task is runnable
  414. * but not actually running due to scheduler contention).
  415. */
  416. DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_wait,
  417. TP_PROTO(struct task_struct *tsk, u64 delay),
  418. TP_ARGS(tsk, delay));
  419. /*
  420. * Tracepoint for accounting sleep time (time the task is not runnable,
  421. * including iowait, see below).
  422. */
  423. DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_sleep,
  424. TP_PROTO(struct task_struct *tsk, u64 delay),
  425. TP_ARGS(tsk, delay));
  426. /*
  427. * Tracepoint for accounting iowait time (time the task is not runnable
  428. * due to waiting on IO to complete).
  429. */
  430. DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_iowait,
  431. TP_PROTO(struct task_struct *tsk, u64 delay),
  432. TP_ARGS(tsk, delay));
  433. /*
  434. * Tracepoint for accounting blocked time (time the task is in uninterruptible).
  435. */
  436. DEFINE_EVENT_SCHEDSTAT(sched_stat_template, sched_stat_blocked,
  437. TP_PROTO(struct task_struct *tsk, u64 delay),
  438. TP_ARGS(tsk, delay));
  439. /*
  440. * Tracepoint for accounting runtime (time the task is executing
  441. * on a CPU).
  442. */
  443. DECLARE_EVENT_CLASS(sched_stat_runtime,
  444. TP_PROTO(struct task_struct *tsk, u64 runtime),
  445. TP_ARGS(tsk, __perf_count(runtime)),
  446. TP_STRUCT__entry(
  447. __string( comm, tsk->comm )
  448. __field( pid_t, pid )
  449. __field( u64, runtime )
  450. ),
  451. TP_fast_assign(
  452. __assign_str(comm);
  453. __entry->pid = tsk->pid;
  454. __entry->runtime = runtime;
  455. ),
  456. TP_printk("comm=%s pid=%d runtime=%Lu [ns]",
  457. __get_str(comm), __entry->pid,
  458. (unsigned long long)__entry->runtime)
  459. );
  460. DEFINE_EVENT(sched_stat_runtime, sched_stat_runtime,
  461. TP_PROTO(struct task_struct *tsk, u64 runtime),
  462. TP_ARGS(tsk, runtime));
  463. /*
  464. * Tracepoint for showing priority inheritance modifying a tasks
  465. * priority.
  466. */
  467. TRACE_EVENT(sched_pi_setprio,
  468. TP_PROTO(struct task_struct *tsk, struct task_struct *pi_task),
  469. TP_ARGS(tsk, pi_task),
  470. TP_STRUCT__entry(
  471. __string( comm, tsk->comm )
  472. __field( pid_t, pid )
  473. __field( int, oldprio )
  474. __field( int, newprio )
  475. ),
  476. TP_fast_assign(
  477. __assign_str(comm);
  478. __entry->pid = tsk->pid;
  479. __entry->oldprio = tsk->prio;
  480. __entry->newprio = pi_task ?
  481. min(tsk->normal_prio, pi_task->prio) :
  482. tsk->normal_prio;
  483. /* XXX SCHED_DEADLINE bits missing */
  484. ),
  485. TP_printk("comm=%s pid=%d oldprio=%d newprio=%d",
  486. __get_str(comm), __entry->pid,
  487. __entry->oldprio, __entry->newprio)
  488. );
  489. #ifdef CONFIG_DETECT_HUNG_TASK
  490. TRACE_EVENT(sched_process_hang,
  491. TP_PROTO(struct task_struct *tsk),
  492. TP_ARGS(tsk),
  493. TP_STRUCT__entry(
  494. __string( comm, tsk->comm )
  495. __field( pid_t, pid )
  496. ),
  497. TP_fast_assign(
  498. __assign_str(comm);
  499. __entry->pid = tsk->pid;
  500. ),
  501. TP_printk("comm=%s pid=%d", __get_str(comm), __entry->pid)
  502. );
  503. #endif /* CONFIG_DETECT_HUNG_TASK */
  504. #ifdef CONFIG_NUMA_BALANCING
  505. /*
  506. * Tracks migration of tasks from one runqueue to another. Can be used to
  507. * detect if automatic NUMA balancing is bouncing between nodes.
  508. */
  509. TRACE_EVENT(sched_move_numa,
  510. TP_PROTO(struct task_struct *tsk, int src_cpu, int dst_cpu),
  511. TP_ARGS(tsk, src_cpu, dst_cpu),
  512. TP_STRUCT__entry(
  513. __field( pid_t, pid )
  514. __field( pid_t, tgid )
  515. __field( pid_t, ngid )
  516. __field( int, src_cpu )
  517. __field( int, src_nid )
  518. __field( int, dst_cpu )
  519. __field( int, dst_nid )
  520. ),
  521. TP_fast_assign(
  522. __entry->pid = task_pid_nr(tsk);
  523. __entry->tgid = task_tgid_nr(tsk);
  524. __entry->ngid = task_numa_group_id(tsk);
  525. __entry->src_cpu = src_cpu;
  526. __entry->src_nid = cpu_to_node(src_cpu);
  527. __entry->dst_cpu = dst_cpu;
  528. __entry->dst_nid = cpu_to_node(dst_cpu);
  529. ),
  530. TP_printk("pid=%d tgid=%d ngid=%d src_cpu=%d src_nid=%d dst_cpu=%d dst_nid=%d",
  531. __entry->pid, __entry->tgid, __entry->ngid,
  532. __entry->src_cpu, __entry->src_nid,
  533. __entry->dst_cpu, __entry->dst_nid)
  534. );
  535. DECLARE_EVENT_CLASS(sched_numa_pair_template,
  536. TP_PROTO(struct task_struct *src_tsk, int src_cpu,
  537. struct task_struct *dst_tsk, int dst_cpu),
  538. TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu),
  539. TP_STRUCT__entry(
  540. __field( pid_t, src_pid )
  541. __field( pid_t, src_tgid )
  542. __field( pid_t, src_ngid )
  543. __field( int, src_cpu )
  544. __field( int, src_nid )
  545. __field( pid_t, dst_pid )
  546. __field( pid_t, dst_tgid )
  547. __field( pid_t, dst_ngid )
  548. __field( int, dst_cpu )
  549. __field( int, dst_nid )
  550. ),
  551. TP_fast_assign(
  552. __entry->src_pid = task_pid_nr(src_tsk);
  553. __entry->src_tgid = task_tgid_nr(src_tsk);
  554. __entry->src_ngid = task_numa_group_id(src_tsk);
  555. __entry->src_cpu = src_cpu;
  556. __entry->src_nid = cpu_to_node(src_cpu);
  557. __entry->dst_pid = dst_tsk ? task_pid_nr(dst_tsk) : 0;
  558. __entry->dst_tgid = dst_tsk ? task_tgid_nr(dst_tsk) : 0;
  559. __entry->dst_ngid = dst_tsk ? task_numa_group_id(dst_tsk) : 0;
  560. __entry->dst_cpu = dst_cpu;
  561. __entry->dst_nid = dst_cpu >= 0 ? cpu_to_node(dst_cpu) : -1;
  562. ),
  563. TP_printk("src_pid=%d src_tgid=%d src_ngid=%d src_cpu=%d src_nid=%d dst_pid=%d dst_tgid=%d dst_ngid=%d dst_cpu=%d dst_nid=%d",
  564. __entry->src_pid, __entry->src_tgid, __entry->src_ngid,
  565. __entry->src_cpu, __entry->src_nid,
  566. __entry->dst_pid, __entry->dst_tgid, __entry->dst_ngid,
  567. __entry->dst_cpu, __entry->dst_nid)
  568. );
  569. DEFINE_EVENT(sched_numa_pair_template, sched_stick_numa,
  570. TP_PROTO(struct task_struct *src_tsk, int src_cpu,
  571. struct task_struct *dst_tsk, int dst_cpu),
  572. TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu)
  573. );
  574. DEFINE_EVENT(sched_numa_pair_template, sched_swap_numa,
  575. TP_PROTO(struct task_struct *src_tsk, int src_cpu,
  576. struct task_struct *dst_tsk, int dst_cpu),
  577. TP_ARGS(src_tsk, src_cpu, dst_tsk, dst_cpu)
  578. );
  579. #define NUMAB_SKIP_REASON \
  580. EM( NUMAB_SKIP_UNSUITABLE, "unsuitable" ) \
  581. EM( NUMAB_SKIP_SHARED_RO, "shared_ro" ) \
  582. EM( NUMAB_SKIP_INACCESSIBLE, "inaccessible" ) \
  583. EM( NUMAB_SKIP_SCAN_DELAY, "scan_delay" ) \
  584. EM( NUMAB_SKIP_PID_INACTIVE, "pid_inactive" ) \
  585. EM( NUMAB_SKIP_IGNORE_PID, "ignore_pid_inactive" ) \
  586. EMe(NUMAB_SKIP_SEQ_COMPLETED, "seq_completed" )
  587. /* Redefine for export. */
  588. #undef EM
  589. #undef EMe
  590. #define EM(a, b) TRACE_DEFINE_ENUM(a);
  591. #define EMe(a, b) TRACE_DEFINE_ENUM(a);
  592. NUMAB_SKIP_REASON
  593. /* Redefine for symbolic printing. */
  594. #undef EM
  595. #undef EMe
  596. #define EM(a, b) { a, b },
  597. #define EMe(a, b) { a, b }
  598. TRACE_EVENT(sched_skip_vma_numa,
  599. TP_PROTO(struct mm_struct *mm, struct vm_area_struct *vma,
  600. enum numa_vmaskip_reason reason),
  601. TP_ARGS(mm, vma, reason),
  602. TP_STRUCT__entry(
  603. __field(unsigned long, numa_scan_offset)
  604. __field(unsigned long, vm_start)
  605. __field(unsigned long, vm_end)
  606. __field(enum numa_vmaskip_reason, reason)
  607. ),
  608. TP_fast_assign(
  609. __entry->numa_scan_offset = mm->numa_scan_offset;
  610. __entry->vm_start = vma->vm_start;
  611. __entry->vm_end = vma->vm_end;
  612. __entry->reason = reason;
  613. ),
  614. TP_printk("numa_scan_offset=%lX vm_start=%lX vm_end=%lX reason=%s",
  615. __entry->numa_scan_offset,
  616. __entry->vm_start,
  617. __entry->vm_end,
  618. __print_symbolic(__entry->reason, NUMAB_SKIP_REASON))
  619. );
  620. TRACE_EVENT(sched_skip_cpuset_numa,
  621. TP_PROTO(struct task_struct *tsk, nodemask_t *mem_allowed_ptr),
  622. TP_ARGS(tsk, mem_allowed_ptr),
  623. TP_STRUCT__entry(
  624. __array( char, comm, TASK_COMM_LEN )
  625. __field( pid_t, pid )
  626. __field( pid_t, tgid )
  627. __field( pid_t, ngid )
  628. __array( unsigned long, mem_allowed, BITS_TO_LONGS(MAX_NUMNODES))
  629. ),
  630. TP_fast_assign(
  631. memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN);
  632. __entry->pid = task_pid_nr(tsk);
  633. __entry->tgid = task_tgid_nr(tsk);
  634. __entry->ngid = task_numa_group_id(tsk);
  635. BUILD_BUG_ON(sizeof(nodemask_t) != \
  636. BITS_TO_LONGS(MAX_NUMNODES) * sizeof(long));
  637. memcpy(__entry->mem_allowed, mem_allowed_ptr->bits,
  638. sizeof(__entry->mem_allowed));
  639. ),
  640. TP_printk("comm=%s pid=%d tgid=%d ngid=%d mem_nodes_allowed=%*pbl",
  641. __entry->comm,
  642. __entry->pid,
  643. __entry->tgid,
  644. __entry->ngid,
  645. MAX_NUMNODES, __entry->mem_allowed)
  646. );
  647. #endif /* CONFIG_NUMA_BALANCING */
  648. /*
  649. * Tracepoint for waking a polling cpu without an IPI.
  650. */
  651. TRACE_EVENT(sched_wake_idle_without_ipi,
  652. TP_PROTO(int cpu),
  653. TP_ARGS(cpu),
  654. TP_STRUCT__entry(
  655. __field( int, cpu )
  656. ),
  657. TP_fast_assign(
  658. __entry->cpu = cpu;
  659. ),
  660. TP_printk("cpu=%d", __entry->cpu)
  661. );
  662. /*
  663. * Following tracepoints are not exported in tracefs and provide hooking
  664. * mechanisms only for testing and debugging purposes.
  665. */
  666. DECLARE_TRACE(pelt_cfs,
  667. TP_PROTO(struct cfs_rq *cfs_rq),
  668. TP_ARGS(cfs_rq));
  669. DECLARE_TRACE(pelt_rt,
  670. TP_PROTO(struct rq *rq),
  671. TP_ARGS(rq));
  672. DECLARE_TRACE(pelt_dl,
  673. TP_PROTO(struct rq *rq),
  674. TP_ARGS(rq));
  675. DECLARE_TRACE(pelt_hw,
  676. TP_PROTO(struct rq *rq),
  677. TP_ARGS(rq));
  678. DECLARE_TRACE(pelt_irq,
  679. TP_PROTO(struct rq *rq),
  680. TP_ARGS(rq));
  681. DECLARE_TRACE(pelt_se,
  682. TP_PROTO(struct sched_entity *se),
  683. TP_ARGS(se));
  684. DECLARE_TRACE(sched_cpu_capacity,
  685. TP_PROTO(struct rq *rq),
  686. TP_ARGS(rq));
  687. DECLARE_TRACE(sched_overutilized,
  688. TP_PROTO(struct root_domain *rd, bool overutilized),
  689. TP_ARGS(rd, overutilized));
  690. DECLARE_TRACE(sched_util_est_cfs,
  691. TP_PROTO(struct cfs_rq *cfs_rq),
  692. TP_ARGS(cfs_rq));
  693. DECLARE_TRACE(sched_util_est_se,
  694. TP_PROTO(struct sched_entity *se),
  695. TP_ARGS(se));
  696. DECLARE_TRACE(sched_update_nr_running,
  697. TP_PROTO(struct rq *rq, int change),
  698. TP_ARGS(rq, change));
  699. DECLARE_TRACE(sched_compute_energy,
  700. TP_PROTO(struct task_struct *p, int dst_cpu, unsigned long energy,
  701. unsigned long max_util, unsigned long busy_time),
  702. TP_ARGS(p, dst_cpu, energy, max_util, busy_time));
  703. DECLARE_TRACE(sched_entry,
  704. TP_PROTO(bool preempt),
  705. TP_ARGS(preempt));
  706. DECLARE_TRACE(sched_exit,
  707. TP_PROTO(bool is_switch),
  708. TP_ARGS(is_switch));
  709. DECLARE_TRACE_CONDITION(sched_set_state,
  710. TP_PROTO(struct task_struct *tsk, int state),
  711. TP_ARGS(tsk, state),
  712. TP_CONDITION(!!(tsk->__state) != !!state));
  713. DECLARE_TRACE(sched_set_need_resched,
  714. TP_PROTO(struct task_struct *tsk, int cpu, int tif),
  715. TP_ARGS(tsk, cpu, tif));
  716. #endif /* _TRACE_SCHED_H */
  717. /* This part must be outside protection */
  718. #include <trace/define_trace.h>