| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159 |
- /* SPDX-License-Identifier: GPL-2.0 */
- /*
- * A simple scheduler.
- *
- * By default, it operates as a simple global weighted vtime scheduler and can
- * be switched to FIFO scheduling. It also demonstrates the following niceties.
- *
- * - Statistics tracking how many tasks are queued to local and global dsq's.
- * - Termination notification for userspace.
- *
- * While very simple, this scheduler should work reasonably well on CPUs with a
- * uniform L3 cache topology. While preemption is not implemented, the fact that
- * the scheduling queue is shared across all CPUs means that whatever is at the
- * front of the queue is likely to be executed fairly quickly given enough
- * number of CPUs. The FIFO scheduling mode may be beneficial to some workloads
- * but comes with the usual problems with FIFO scheduling where saturating
- * threads can easily drown out interactive ones.
- *
- * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
- * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
- * Copyright (c) 2022 David Vernet <dvernet@meta.com>
- */
- #include <scx/common.bpf.h>
- char _license[] SEC("license") = "GPL";
- const volatile bool fifo_sched;
- static u64 vtime_now;
- UEI_DEFINE(uei);
- /*
- * Built-in DSQs such as SCX_DSQ_GLOBAL cannot be used as priority queues
- * (meaning, cannot be dispatched to with scx_bpf_dsq_insert_vtime()). We
- * therefore create a separate DSQ with ID 0 that we dispatch to and consume
- * from. If scx_simple only supported global FIFO scheduling, then we could just
- * use SCX_DSQ_GLOBAL.
- */
- #define SHARED_DSQ 0
- struct {
- __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
- __uint(key_size, sizeof(u32));
- __uint(value_size, sizeof(u64));
- __uint(max_entries, 2); /* [local, global] */
- } stats SEC(".maps");
- static void stat_inc(u32 idx)
- {
- u64 *cnt_p = bpf_map_lookup_elem(&stats, &idx);
- if (cnt_p)
- (*cnt_p)++;
- }
- s32 BPF_STRUCT_OPS(simple_select_cpu, struct task_struct *p, s32 prev_cpu, u64 wake_flags)
- {
- bool is_idle = false;
- s32 cpu;
- cpu = scx_bpf_select_cpu_dfl(p, prev_cpu, wake_flags, &is_idle);
- if (is_idle) {
- stat_inc(0); /* count local queueing */
- scx_bpf_dsq_insert(p, SCX_DSQ_LOCAL, SCX_SLICE_DFL, 0);
- }
- return cpu;
- }
- void BPF_STRUCT_OPS(simple_enqueue, struct task_struct *p, u64 enq_flags)
- {
- stat_inc(1); /* count global queueing */
- if (fifo_sched) {
- scx_bpf_dsq_insert(p, SHARED_DSQ, SCX_SLICE_DFL, enq_flags);
- } else {
- u64 vtime = p->scx.dsq_vtime;
- /*
- * Limit the amount of budget that an idling task can accumulate
- * to one slice.
- */
- if (time_before(vtime, vtime_now - SCX_SLICE_DFL))
- vtime = vtime_now - SCX_SLICE_DFL;
- scx_bpf_dsq_insert_vtime(p, SHARED_DSQ, SCX_SLICE_DFL, vtime,
- enq_flags);
- }
- }
- void BPF_STRUCT_OPS(simple_dispatch, s32 cpu, struct task_struct *prev)
- {
- scx_bpf_dsq_move_to_local(SHARED_DSQ);
- }
- void BPF_STRUCT_OPS(simple_running, struct task_struct *p)
- {
- if (fifo_sched)
- return;
- /*
- * Global vtime always progresses forward as tasks start executing. The
- * test and update can be performed concurrently from multiple CPUs and
- * thus racy. Any error should be contained and temporary. Let's just
- * live with it.
- */
- if (time_before(vtime_now, p->scx.dsq_vtime))
- vtime_now = p->scx.dsq_vtime;
- }
- void BPF_STRUCT_OPS(simple_stopping, struct task_struct *p, bool runnable)
- {
- if (fifo_sched)
- return;
- /*
- * Scale the execution time by the inverse of the weight and charge.
- *
- * Note that the default yield implementation yields by setting
- * @p->scx.slice to zero and the following would treat the yielding task
- * as if it has consumed all its slice. If this penalizes yielding tasks
- * too much, determine the execution time by taking explicit timestamps
- * instead of depending on @p->scx.slice.
- */
- p->scx.dsq_vtime += (SCX_SLICE_DFL - p->scx.slice) * 100 / p->scx.weight;
- }
- void BPF_STRUCT_OPS(simple_enable, struct task_struct *p)
- {
- p->scx.dsq_vtime = vtime_now;
- }
- s32 BPF_STRUCT_OPS_SLEEPABLE(simple_init)
- {
- int ret;
- ret = scx_bpf_create_dsq(SHARED_DSQ, -1);
- if (ret) {
- scx_bpf_error("failed to create DSQ %d (%d)", SHARED_DSQ, ret);
- return ret;
- }
- return 0;
- }
- void BPF_STRUCT_OPS(simple_exit, struct scx_exit_info *ei)
- {
- UEI_RECORD(uei, ei);
- }
- SCX_OPS_DEFINE(simple_ops,
- .select_cpu = (void *)simple_select_cpu,
- .enqueue = (void *)simple_enqueue,
- .dispatch = (void *)simple_dispatch,
- .running = (void *)simple_running,
- .stopping = (void *)simple_stopping,
- .enable = (void *)simple_enable,
- .init = (void *)simple_init,
- .exit = (void *)simple_exit,
- .name = "simple");
|