numa.bpf.c 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * A scheduler that validates the behavior of the NUMA-aware
  4. * functionalities.
  5. *
  6. * The scheduler creates a separate DSQ for each NUMA node, ensuring tasks
  7. * are exclusively processed by CPUs within their respective nodes. Idle
  8. * CPUs are selected only within the same node, so task migration can only
  9. * occurs between CPUs belonging to the same node.
  10. *
  11. * Copyright (c) 2025 Andrea Righi <arighi@nvidia.com>
  12. */
  13. #include <scx/common.bpf.h>
  14. char _license[] SEC("license") = "GPL";
  15. UEI_DEFINE(uei);
  16. const volatile unsigned int __COMPAT_SCX_PICK_IDLE_IN_NODE;
  17. static bool is_cpu_idle(s32 cpu, int node)
  18. {
  19. const struct cpumask *idle_cpumask;
  20. bool idle;
  21. idle_cpumask = __COMPAT_scx_bpf_get_idle_cpumask_node(node);
  22. idle = bpf_cpumask_test_cpu(cpu, idle_cpumask);
  23. scx_bpf_put_cpumask(idle_cpumask);
  24. return idle;
  25. }
  26. s32 BPF_STRUCT_OPS(numa_select_cpu,
  27. struct task_struct *p, s32 prev_cpu, u64 wake_flags)
  28. {
  29. int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
  30. s32 cpu;
  31. /*
  32. * We could just use __COMPAT_scx_bpf_pick_any_cpu_node() here,
  33. * since it already tries to pick an idle CPU within the node
  34. * first, but let's use both functions for better testing coverage.
  35. */
  36. cpu = __COMPAT_scx_bpf_pick_idle_cpu_node(p->cpus_ptr, node,
  37. __COMPAT_SCX_PICK_IDLE_IN_NODE);
  38. if (cpu < 0)
  39. cpu = __COMPAT_scx_bpf_pick_any_cpu_node(p->cpus_ptr, node,
  40. __COMPAT_SCX_PICK_IDLE_IN_NODE);
  41. if (is_cpu_idle(cpu, node))
  42. scx_bpf_error("CPU %d should be marked as busy", cpu);
  43. if (__COMPAT_scx_bpf_cpu_node(cpu) != node)
  44. scx_bpf_error("CPU %d should be in node %d", cpu, node);
  45. return cpu;
  46. }
  47. void BPF_STRUCT_OPS(numa_enqueue, struct task_struct *p, u64 enq_flags)
  48. {
  49. int node = __COMPAT_scx_bpf_cpu_node(scx_bpf_task_cpu(p));
  50. scx_bpf_dsq_insert(p, node, SCX_SLICE_DFL, enq_flags);
  51. }
  52. void BPF_STRUCT_OPS(numa_dispatch, s32 cpu, struct task_struct *prev)
  53. {
  54. int node = __COMPAT_scx_bpf_cpu_node(cpu);
  55. scx_bpf_dsq_move_to_local(node);
  56. }
  57. s32 BPF_STRUCT_OPS_SLEEPABLE(numa_init)
  58. {
  59. int node, err;
  60. bpf_for(node, 0, __COMPAT_scx_bpf_nr_node_ids()) {
  61. err = scx_bpf_create_dsq(node, node);
  62. if (err)
  63. return err;
  64. }
  65. return 0;
  66. }
  67. void BPF_STRUCT_OPS(numa_exit, struct scx_exit_info *ei)
  68. {
  69. UEI_RECORD(uei, ei);
  70. }
  71. SEC(".struct_ops.link")
  72. struct sched_ext_ops numa_ops = {
  73. .select_cpu = (void *)numa_select_cpu,
  74. .enqueue = (void *)numa_enqueue,
  75. .dispatch = (void *)numa_dispatch,
  76. .init = (void *)numa_init,
  77. .exit = (void *)numa_exit,
  78. .name = "numa",
  79. };