scx_pair.bpf.c 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * A demo sched_ext core-scheduler which always makes every sibling CPU pair
  4. * execute from the same CPU cgroup.
  5. *
  6. * This scheduler is a minimal implementation and would need some form of
  7. * priority handling both inside each cgroup and across the cgroups to be
  8. * practically useful.
  9. *
  10. * Each CPU in the system is paired with exactly one other CPU, according to a
  11. * "stride" value that can be specified when the BPF scheduler program is first
  12. * loaded. Throughout the runtime of the scheduler, these CPU pairs guarantee
  13. * that they will only ever schedule tasks that belong to the same CPU cgroup.
  14. *
  15. * Scheduler Initialization
  16. * ------------------------
  17. *
  18. * The scheduler BPF program is first initialized from user space, before it is
  19. * enabled. During this initialization process, each CPU on the system is
  20. * assigned several values that are constant throughout its runtime:
  21. *
  22. * 1. *Pair CPU*: The CPU that it synchronizes with when making scheduling
  23. * decisions. Paired CPUs always schedule tasks from the same
  24. * CPU cgroup, and synchronize with each other to guarantee
  25. * that this constraint is not violated.
  26. * 2. *Pair ID*: Each CPU pair is assigned a Pair ID, which is used to access
  27. * a struct pair_ctx object that is shared between the pair.
  28. * 3. *In-pair-index*: An index, 0 or 1, that is assigned to each core in the
  29. * pair. Each struct pair_ctx has an active_mask field,
  30. * which is a bitmap used to indicate whether each core
  31. * in the pair currently has an actively running task.
  32. * This index specifies which entry in the bitmap corresponds
  33. * to each CPU in the pair.
  34. *
  35. * During this initialization, the CPUs are paired according to a "stride" that
  36. * may be specified when invoking the user space program that initializes and
  37. * loads the scheduler. By default, the stride is 1/2 the total number of CPUs.
  38. *
  39. * Tasks and cgroups
  40. * -----------------
  41. *
  42. * Every cgroup in the system is registered with the scheduler using the
  43. * pair_cgroup_init() callback, and every task in the system is associated with
  44. * exactly one cgroup. At a high level, the idea with the pair scheduler is to
  45. * always schedule tasks from the same cgroup within a given CPU pair. When a
  46. * task is enqueued (i.e. passed to the pair_enqueue() callback function), its
  47. * cgroup ID is read from its task struct, and then a corresponding queue map
  48. * is used to FIFO-enqueue the task for that cgroup.
  49. *
  50. * If you look through the implementation of the scheduler, you'll notice that
  51. * there is quite a bit of complexity involved with looking up the per-cgroup
  52. * FIFO queue that we enqueue tasks in. For example, there is a cgrp_q_idx_hash
  53. * BPF hash map that is used to map a cgroup ID to a globally unique ID that's
  54. * allocated in the BPF program. This is done because we use separate maps to
  55. * store the FIFO queue of tasks, and the length of that map, per cgroup. This
  56. * complexity is only present because of current deficiencies in BPF that will
  57. * soon be addressed. The main point to keep in mind is that newly enqueued
  58. * tasks are added to their cgroup's FIFO queue.
  59. *
  60. * Dispatching tasks
  61. * -----------------
  62. *
  63. * This section will describe how enqueued tasks are dispatched and scheduled.
  64. * Tasks are dispatched in pair_dispatch(), and at a high level the workflow is
  65. * as follows:
  66. *
  67. * 1. Fetch the struct pair_ctx for the current CPU. As mentioned above, this is
  68. * the structure that's used to synchronize amongst the two pair CPUs in their
  69. * scheduling decisions. After any of the following events have occurred:
  70. *
  71. * - The cgroup's slice run has expired, or
  72. * - The cgroup becomes empty, or
  73. * - Either CPU in the pair is preempted by a higher priority scheduling class
  74. *
  75. * The cgroup transitions to the draining state and stops executing new tasks
  76. * from the cgroup.
  77. *
  78. * 2. If the pair is still executing a task, mark the pair_ctx as draining, and
  79. * wait for the pair CPU to be preempted.
  80. *
  81. * 3. Otherwise, if the pair CPU is not running a task, we can move onto
  82. * scheduling new tasks. Pop the next cgroup id from the top_q queue.
  83. *
  84. * 4. Pop a task from that cgroup's FIFO task queue, and begin executing it.
  85. *
  86. * Note again that this scheduling behavior is simple, but the implementation
  87. * is complex mostly because this it hits several BPF shortcomings and has to
  88. * work around in often awkward ways. Most of the shortcomings are expected to
  89. * be resolved in the near future which should allow greatly simplifying this
  90. * scheduler.
  91. *
  92. * Dealing with preemption
  93. * -----------------------
  94. *
  95. * SCX is the lowest priority sched_class, and could be preempted by them at
  96. * any time. To address this, the scheduler implements pair_cpu_release() and
  97. * pair_cpu_acquire() callbacks which are invoked by the core scheduler when
  98. * the scheduler loses and gains control of the CPU respectively.
  99. *
  100. * In pair_cpu_release(), we mark the pair_ctx as having been preempted, and
  101. * then invoke:
  102. *
  103. * scx_bpf_kick_cpu(pair_cpu, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
  104. *
  105. * This preempts the pair CPU, and waits until it has re-entered the scheduler
  106. * before returning. This is necessary to ensure that the higher priority
  107. * sched_class that preempted our scheduler does not schedule a task
  108. * concurrently with our pair CPU.
  109. *
  110. * When the CPU is re-acquired in pair_cpu_acquire(), we unmark the preemption
  111. * in the pair_ctx, and send another resched IPI to the pair CPU to re-enable
  112. * pair scheduling.
  113. *
  114. * Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
  115. * Copyright (c) 2022 Tejun Heo <tj@kernel.org>
  116. * Copyright (c) 2022 David Vernet <dvernet@meta.com>
  117. */
  118. #include <scx/common.bpf.h>
  119. #include "scx_pair.h"
  120. char _license[] SEC("license") = "GPL";
  121. /* !0 for veristat, set during init */
  122. const volatile u32 nr_cpu_ids = 1;
  123. /* a pair of CPUs stay on a cgroup for this duration */
  124. const volatile u32 pair_batch_dur_ns;
  125. /* cpu ID -> pair cpu ID */
  126. const volatile s32 RESIZABLE_ARRAY(rodata, pair_cpu);
  127. /* cpu ID -> pair_id */
  128. const volatile u32 RESIZABLE_ARRAY(rodata, pair_id);
  129. /* CPU ID -> CPU # in the pair (0 or 1) */
  130. const volatile u32 RESIZABLE_ARRAY(rodata, in_pair_idx);
  131. struct pair_ctx {
  132. struct bpf_spin_lock lock;
  133. /* the cgroup the pair is currently executing */
  134. u64 cgid;
  135. /* the pair started executing the current cgroup at */
  136. u64 started_at;
  137. /* whether the current cgroup is draining */
  138. bool draining;
  139. /* the CPUs that are currently active on the cgroup */
  140. u32 active_mask;
  141. /*
  142. * the CPUs that are currently preempted and running tasks in a
  143. * different scheduler.
  144. */
  145. u32 preempted_mask;
  146. };
  147. struct {
  148. __uint(type, BPF_MAP_TYPE_ARRAY);
  149. __type(key, u32);
  150. __type(value, struct pair_ctx);
  151. } pair_ctx SEC(".maps");
  152. /* queue of cgrp_q's possibly with tasks on them */
  153. struct {
  154. __uint(type, BPF_MAP_TYPE_QUEUE);
  155. /*
  156. * Because it's difficult to build strong synchronization encompassing
  157. * multiple non-trivial operations in BPF, this queue is managed in an
  158. * opportunistic way so that we guarantee that a cgroup w/ active tasks
  159. * is always on it but possibly multiple times. Once we have more robust
  160. * synchronization constructs and e.g. linked list, we should be able to
  161. * do this in a prettier way but for now just size it big enough.
  162. */
  163. __uint(max_entries, 4 * MAX_CGRPS);
  164. __type(value, u64);
  165. } top_q SEC(".maps");
  166. /* per-cgroup q which FIFOs the tasks from the cgroup */
  167. struct cgrp_q {
  168. __uint(type, BPF_MAP_TYPE_QUEUE);
  169. __uint(max_entries, MAX_QUEUED);
  170. __type(value, u32);
  171. };
  172. /*
  173. * Ideally, we want to allocate cgrp_q and cgrq_q_len in the cgroup local
  174. * storage; however, a cgroup local storage can only be accessed from the BPF
  175. * progs attached to the cgroup. For now, work around by allocating array of
  176. * cgrp_q's and then allocating per-cgroup indices.
  177. *
  178. * Another caveat: It's difficult to populate a large array of maps statically
  179. * or from BPF. Initialize it from userland.
  180. */
  181. struct {
  182. __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
  183. __uint(max_entries, MAX_CGRPS);
  184. __type(key, s32);
  185. __array(values, struct cgrp_q);
  186. } cgrp_q_arr SEC(".maps");
  187. static u64 cgrp_q_len[MAX_CGRPS];
  188. /*
  189. * This and cgrp_q_idx_hash combine into a poor man's IDR. This likely would be
  190. * useful to have as a map type.
  191. */
  192. static u32 cgrp_q_idx_cursor;
  193. static u64 cgrp_q_idx_busy[MAX_CGRPS];
  194. /*
  195. * All added up, the following is what we do:
  196. *
  197. * 1. When a cgroup is enabled, RR cgroup_q_idx_busy array doing cmpxchg looking
  198. * for a free ID. If not found, fail cgroup creation with -EBUSY.
  199. *
  200. * 2. Hash the cgroup ID to the allocated cgrp_q_idx in the following
  201. * cgrp_q_idx_hash.
  202. *
  203. * 3. Whenever a cgrp_q needs to be accessed, first look up the cgrp_q_idx from
  204. * cgrp_q_idx_hash and then access the corresponding entry in cgrp_q_arr.
  205. *
  206. * This is sadly complicated for something pretty simple. Hopefully, we should
  207. * be able to simplify in the future.
  208. */
  209. struct {
  210. __uint(type, BPF_MAP_TYPE_HASH);
  211. __uint(max_entries, MAX_CGRPS);
  212. __uint(key_size, sizeof(u64)); /* cgrp ID */
  213. __uint(value_size, sizeof(s32)); /* cgrp_q idx */
  214. } cgrp_q_idx_hash SEC(".maps");
  215. /* statistics */
  216. u64 nr_total, nr_dispatched, nr_missing, nr_kicks, nr_preemptions;
  217. u64 nr_exps, nr_exp_waits, nr_exp_empty;
  218. u64 nr_cgrp_next, nr_cgrp_coll, nr_cgrp_empty;
  219. UEI_DEFINE(uei);
  220. void BPF_STRUCT_OPS(pair_enqueue, struct task_struct *p, u64 enq_flags)
  221. {
  222. struct cgroup *cgrp;
  223. struct cgrp_q *cgq;
  224. s32 pid = p->pid;
  225. u64 cgid;
  226. u32 *q_idx;
  227. u64 *cgq_len;
  228. __sync_fetch_and_add(&nr_total, 1);
  229. cgrp = scx_bpf_task_cgroup(p);
  230. cgid = cgrp->kn->id;
  231. bpf_cgroup_release(cgrp);
  232. /* find the cgroup's q and push @p into it */
  233. q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
  234. if (!q_idx) {
  235. scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
  236. return;
  237. }
  238. cgq = bpf_map_lookup_elem(&cgrp_q_arr, q_idx);
  239. if (!cgq) {
  240. scx_bpf_error("failed to lookup q_arr for cgroup[%llu] q_idx[%u]",
  241. cgid, *q_idx);
  242. return;
  243. }
  244. if (bpf_map_push_elem(cgq, &pid, 0)) {
  245. scx_bpf_error("cgroup[%llu] queue overflow", cgid);
  246. return;
  247. }
  248. /* bump q len, if going 0 -> 1, queue cgroup into the top_q */
  249. cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
  250. if (!cgq_len) {
  251. scx_bpf_error("MEMBER_VTPR malfunction");
  252. return;
  253. }
  254. if (!__sync_fetch_and_add(cgq_len, 1) &&
  255. bpf_map_push_elem(&top_q, &cgid, 0)) {
  256. scx_bpf_error("top_q overflow");
  257. return;
  258. }
  259. }
  260. static int lookup_pairc_and_mask(s32 cpu, struct pair_ctx **pairc, u32 *mask)
  261. {
  262. u32 *vptr;
  263. vptr = (u32 *)ARRAY_ELEM_PTR(pair_id, cpu, nr_cpu_ids);
  264. if (!vptr)
  265. return -EINVAL;
  266. *pairc = bpf_map_lookup_elem(&pair_ctx, vptr);
  267. if (!(*pairc))
  268. return -EINVAL;
  269. vptr = (u32 *)ARRAY_ELEM_PTR(in_pair_idx, cpu, nr_cpu_ids);
  270. if (!vptr)
  271. return -EINVAL;
  272. *mask = 1U << *vptr;
  273. return 0;
  274. }
  275. __attribute__((noinline))
  276. static int try_dispatch(s32 cpu)
  277. {
  278. struct pair_ctx *pairc;
  279. struct bpf_map *cgq_map;
  280. struct task_struct *p;
  281. u64 now = scx_bpf_now();
  282. bool kick_pair = false;
  283. bool expired, pair_preempted;
  284. u32 *vptr, in_pair_mask;
  285. s32 pid, q_idx;
  286. u64 cgid;
  287. int ret;
  288. ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
  289. if (ret) {
  290. scx_bpf_error("failed to lookup pairc and in_pair_mask for cpu[%d]",
  291. cpu);
  292. return -ENOENT;
  293. }
  294. bpf_spin_lock(&pairc->lock);
  295. pairc->active_mask &= ~in_pair_mask;
  296. expired = time_before(pairc->started_at + pair_batch_dur_ns, now);
  297. if (expired || pairc->draining) {
  298. u64 new_cgid = 0;
  299. __sync_fetch_and_add(&nr_exps, 1);
  300. /*
  301. * We're done with the current cgid. An obvious optimization
  302. * would be not draining if the next cgroup is the current one.
  303. * For now, be dumb and always expire.
  304. */
  305. pairc->draining = true;
  306. pair_preempted = pairc->preempted_mask;
  307. if (pairc->active_mask || pair_preempted) {
  308. /*
  309. * The other CPU is still active, or is no longer under
  310. * our control due to e.g. being preempted by a higher
  311. * priority sched_class. We want to wait until this
  312. * cgroup expires, or until control of our pair CPU has
  313. * been returned to us.
  314. *
  315. * If the pair controls its CPU, and the time already
  316. * expired, kick. When the other CPU arrives at
  317. * dispatch and clears its active mask, it'll push the
  318. * pair to the next cgroup and kick this CPU.
  319. */
  320. __sync_fetch_and_add(&nr_exp_waits, 1);
  321. bpf_spin_unlock(&pairc->lock);
  322. if (expired && !pair_preempted)
  323. kick_pair = true;
  324. goto out_maybe_kick;
  325. }
  326. bpf_spin_unlock(&pairc->lock);
  327. /*
  328. * Pick the next cgroup. It'd be easier / cleaner to not drop
  329. * pairc->lock and use stronger synchronization here especially
  330. * given that we'll be switching cgroups significantly less
  331. * frequently than tasks. Unfortunately, bpf_spin_lock can't
  332. * really protect anything non-trivial. Let's do opportunistic
  333. * operations instead.
  334. */
  335. bpf_repeat(BPF_MAX_LOOPS) {
  336. u32 *q_idx;
  337. u64 *cgq_len;
  338. if (bpf_map_pop_elem(&top_q, &new_cgid)) {
  339. /* no active cgroup, go idle */
  340. __sync_fetch_and_add(&nr_exp_empty, 1);
  341. return 0;
  342. }
  343. q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &new_cgid);
  344. if (!q_idx)
  345. continue;
  346. /*
  347. * This is the only place where empty cgroups are taken
  348. * off the top_q.
  349. */
  350. cgq_len = MEMBER_VPTR(cgrp_q_len, [*q_idx]);
  351. if (!cgq_len || !*cgq_len)
  352. continue;
  353. /*
  354. * If it has any tasks, requeue as we may race and not
  355. * execute it.
  356. */
  357. bpf_map_push_elem(&top_q, &new_cgid, 0);
  358. break;
  359. }
  360. bpf_spin_lock(&pairc->lock);
  361. /*
  362. * The other CPU may already have started on a new cgroup while
  363. * we dropped the lock. Make sure that we're still draining and
  364. * start on the new cgroup.
  365. */
  366. if (pairc->draining && !pairc->active_mask) {
  367. __sync_fetch_and_add(&nr_cgrp_next, 1);
  368. pairc->cgid = new_cgid;
  369. pairc->started_at = now;
  370. pairc->draining = false;
  371. kick_pair = true;
  372. } else {
  373. __sync_fetch_and_add(&nr_cgrp_coll, 1);
  374. }
  375. }
  376. cgid = pairc->cgid;
  377. pairc->active_mask |= in_pair_mask;
  378. bpf_spin_unlock(&pairc->lock);
  379. /* again, it'd be better to do all these with the lock held, oh well */
  380. vptr = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
  381. if (!vptr) {
  382. scx_bpf_error("failed to lookup q_idx for cgroup[%llu]", cgid);
  383. return -ENOENT;
  384. }
  385. q_idx = *vptr;
  386. /* claim one task from cgrp_q w/ q_idx */
  387. bpf_repeat(BPF_MAX_LOOPS) {
  388. u64 *cgq_len, len;
  389. cgq_len = MEMBER_VPTR(cgrp_q_len, [q_idx]);
  390. if (!cgq_len || !(len = *(volatile u64 *)cgq_len)) {
  391. /* the cgroup must be empty, expire and repeat */
  392. __sync_fetch_and_add(&nr_cgrp_empty, 1);
  393. bpf_spin_lock(&pairc->lock);
  394. pairc->draining = true;
  395. pairc->active_mask &= ~in_pair_mask;
  396. bpf_spin_unlock(&pairc->lock);
  397. return -EAGAIN;
  398. }
  399. if (__sync_val_compare_and_swap(cgq_len, len, len - 1) != len)
  400. continue;
  401. break;
  402. }
  403. cgq_map = bpf_map_lookup_elem(&cgrp_q_arr, &q_idx);
  404. if (!cgq_map) {
  405. scx_bpf_error("failed to lookup cgq_map for cgroup[%llu] q_idx[%d]",
  406. cgid, q_idx);
  407. return -ENOENT;
  408. }
  409. if (bpf_map_pop_elem(cgq_map, &pid)) {
  410. scx_bpf_error("cgq_map is empty for cgroup[%llu] q_idx[%d]",
  411. cgid, q_idx);
  412. return -ENOENT;
  413. }
  414. p = bpf_task_from_pid(pid);
  415. if (p) {
  416. __sync_fetch_and_add(&nr_dispatched, 1);
  417. scx_bpf_dsq_insert(p, SCX_DSQ_GLOBAL, SCX_SLICE_DFL, 0);
  418. bpf_task_release(p);
  419. } else {
  420. /* we don't handle dequeues, retry on lost tasks */
  421. __sync_fetch_and_add(&nr_missing, 1);
  422. return -EAGAIN;
  423. }
  424. out_maybe_kick:
  425. if (kick_pair) {
  426. s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
  427. if (pair) {
  428. __sync_fetch_and_add(&nr_kicks, 1);
  429. scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
  430. }
  431. }
  432. return 0;
  433. }
  434. void BPF_STRUCT_OPS(pair_dispatch, s32 cpu, struct task_struct *prev)
  435. {
  436. bpf_repeat(BPF_MAX_LOOPS) {
  437. if (try_dispatch(cpu) != -EAGAIN)
  438. break;
  439. }
  440. }
  441. void BPF_STRUCT_OPS(pair_cpu_acquire, s32 cpu, struct scx_cpu_acquire_args *args)
  442. {
  443. int ret;
  444. u32 in_pair_mask;
  445. struct pair_ctx *pairc;
  446. bool kick_pair;
  447. ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
  448. if (ret)
  449. return;
  450. bpf_spin_lock(&pairc->lock);
  451. pairc->preempted_mask &= ~in_pair_mask;
  452. /* Kick the pair CPU, unless it was also preempted. */
  453. kick_pair = !pairc->preempted_mask;
  454. bpf_spin_unlock(&pairc->lock);
  455. if (kick_pair) {
  456. s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
  457. if (pair) {
  458. __sync_fetch_and_add(&nr_kicks, 1);
  459. scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT);
  460. }
  461. }
  462. }
  463. void BPF_STRUCT_OPS(pair_cpu_release, s32 cpu, struct scx_cpu_release_args *args)
  464. {
  465. int ret;
  466. u32 in_pair_mask;
  467. struct pair_ctx *pairc;
  468. bool kick_pair;
  469. ret = lookup_pairc_and_mask(cpu, &pairc, &in_pair_mask);
  470. if (ret)
  471. return;
  472. bpf_spin_lock(&pairc->lock);
  473. pairc->preempted_mask |= in_pair_mask;
  474. pairc->active_mask &= ~in_pair_mask;
  475. /* Kick the pair CPU if it's still running. */
  476. kick_pair = pairc->active_mask;
  477. pairc->draining = true;
  478. bpf_spin_unlock(&pairc->lock);
  479. if (kick_pair) {
  480. s32 *pair = (s32 *)ARRAY_ELEM_PTR(pair_cpu, cpu, nr_cpu_ids);
  481. if (pair) {
  482. __sync_fetch_and_add(&nr_kicks, 1);
  483. scx_bpf_kick_cpu(*pair, SCX_KICK_PREEMPT | SCX_KICK_WAIT);
  484. }
  485. }
  486. __sync_fetch_and_add(&nr_preemptions, 1);
  487. }
  488. s32 BPF_STRUCT_OPS(pair_cgroup_init, struct cgroup *cgrp)
  489. {
  490. u64 cgid = cgrp->kn->id;
  491. s32 i, q_idx;
  492. bpf_for(i, 0, MAX_CGRPS) {
  493. q_idx = __sync_fetch_and_add(&cgrp_q_idx_cursor, 1) % MAX_CGRPS;
  494. if (!__sync_val_compare_and_swap(&cgrp_q_idx_busy[q_idx], 0, 1))
  495. break;
  496. }
  497. if (i == MAX_CGRPS)
  498. return -EBUSY;
  499. if (bpf_map_update_elem(&cgrp_q_idx_hash, &cgid, &q_idx, BPF_ANY)) {
  500. u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [q_idx]);
  501. if (busy)
  502. *busy = 0;
  503. return -EBUSY;
  504. }
  505. return 0;
  506. }
  507. void BPF_STRUCT_OPS(pair_cgroup_exit, struct cgroup *cgrp)
  508. {
  509. u64 cgid = cgrp->kn->id;
  510. s32 *q_idx;
  511. q_idx = bpf_map_lookup_elem(&cgrp_q_idx_hash, &cgid);
  512. if (q_idx) {
  513. u64 *busy = MEMBER_VPTR(cgrp_q_idx_busy, [*q_idx]);
  514. if (busy)
  515. *busy = 0;
  516. bpf_map_delete_elem(&cgrp_q_idx_hash, &cgid);
  517. }
  518. }
  519. void BPF_STRUCT_OPS(pair_exit, struct scx_exit_info *ei)
  520. {
  521. UEI_RECORD(uei, ei);
  522. }
  523. SCX_OPS_DEFINE(pair_ops,
  524. .enqueue = (void *)pair_enqueue,
  525. .dispatch = (void *)pair_dispatch,
  526. .cpu_acquire = (void *)pair_cpu_acquire,
  527. .cpu_release = (void *)pair_cpu_release,
  528. .cgroup_init = (void *)pair_cgroup_init,
  529. .cgroup_exit = (void *)pair_cgroup_exit,
  530. .exit = (void *)pair_exit,
  531. .name = "pair");