scx_flatcg.c 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
  4. * Copyright (c) 2023 Tejun Heo <tj@kernel.org>
  5. * Copyright (c) 2023 David Vernet <dvernet@meta.com>
  6. */
  7. #include <stdio.h>
  8. #include <signal.h>
  9. #include <assert.h>
  10. #include <unistd.h>
  11. #include <libgen.h>
  12. #include <limits.h>
  13. #include <inttypes.h>
  14. #include <fcntl.h>
  15. #include <time.h>
  16. #include <bpf/bpf.h>
  17. #include <scx/common.h>
  18. #include "scx_flatcg.h"
  19. #include "scx_flatcg.bpf.skel.h"
  20. #ifndef FILEID_KERNFS
  21. #define FILEID_KERNFS 0xfe
  22. #endif
  23. const char help_fmt[] =
  24. "A flattened cgroup hierarchy sched_ext scheduler.\n"
  25. "\n"
  26. "See the top-level comment in .bpf.c for more details.\n"
  27. "\n"
  28. "Usage: %s [-s SLICE_US] [-i INTERVAL] [-f] [-v]\n"
  29. "\n"
  30. " -s SLICE_US Override slice duration\n"
  31. " -i INTERVAL Report interval\n"
  32. " -f Use FIFO scheduling instead of weighted vtime scheduling\n"
  33. " -v Print libbpf debug messages\n"
  34. " -h Display this help and exit\n";
  35. static bool verbose;
  36. static volatile int exit_req;
  37. static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
  38. {
  39. if (level == LIBBPF_DEBUG && !verbose)
  40. return 0;
  41. return vfprintf(stderr, format, args);
  42. }
  43. static void sigint_handler(int dummy)
  44. {
  45. exit_req = 1;
  46. }
  47. static float read_cpu_util(__u64 *last_sum, __u64 *last_idle)
  48. {
  49. FILE *fp;
  50. char buf[4096];
  51. char *line, *cur = NULL, *tok;
  52. __u64 sum = 0, idle = 0;
  53. __u64 delta_sum, delta_idle;
  54. int idx;
  55. fp = fopen("/proc/stat", "r");
  56. if (!fp) {
  57. perror("fopen(\"/proc/stat\")");
  58. return 0.0;
  59. }
  60. if (!fgets(buf, sizeof(buf), fp)) {
  61. perror("fgets(\"/proc/stat\")");
  62. fclose(fp);
  63. return 0.0;
  64. }
  65. fclose(fp);
  66. line = buf;
  67. for (idx = 0; (tok = strtok_r(line, " \n", &cur)); idx++) {
  68. char *endp = NULL;
  69. __u64 v;
  70. if (idx == 0) {
  71. line = NULL;
  72. continue;
  73. }
  74. v = strtoull(tok, &endp, 0);
  75. if (!endp || *endp != '\0') {
  76. fprintf(stderr, "failed to parse %dth field of /proc/stat (\"%s\")\n",
  77. idx, tok);
  78. continue;
  79. }
  80. sum += v;
  81. if (idx == 4)
  82. idle = v;
  83. }
  84. delta_sum = sum - *last_sum;
  85. delta_idle = idle - *last_idle;
  86. *last_sum = sum;
  87. *last_idle = idle;
  88. return delta_sum ? (float)(delta_sum - delta_idle) / delta_sum : 0.0;
  89. }
  90. static void fcg_read_stats(struct scx_flatcg *skel, __u64 *stats)
  91. {
  92. __u64 *cnts;
  93. __u32 idx;
  94. cnts = calloc(skel->rodata->nr_cpus, sizeof(__u64));
  95. if (!cnts)
  96. return;
  97. memset(stats, 0, sizeof(stats[0]) * FCG_NR_STATS);
  98. for (idx = 0; idx < FCG_NR_STATS; idx++) {
  99. int ret, cpu;
  100. ret = bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats),
  101. &idx, cnts);
  102. if (ret < 0)
  103. continue;
  104. for (cpu = 0; cpu < skel->rodata->nr_cpus; cpu++)
  105. stats[idx] += cnts[cpu];
  106. }
  107. free(cnts);
  108. }
  109. int main(int argc, char **argv)
  110. {
  111. struct scx_flatcg *skel;
  112. struct bpf_link *link;
  113. struct timespec intv_ts = { .tv_sec = 2, .tv_nsec = 0 };
  114. bool dump_cgrps = false;
  115. __u64 last_cpu_sum = 0, last_cpu_idle = 0;
  116. __u64 last_stats[FCG_NR_STATS] = {};
  117. unsigned long seq = 0;
  118. __s32 opt;
  119. __u64 ecode;
  120. libbpf_set_print(libbpf_print_fn);
  121. signal(SIGINT, sigint_handler);
  122. signal(SIGTERM, sigint_handler);
  123. restart:
  124. optind = 1;
  125. skel = SCX_OPS_OPEN(flatcg_ops, scx_flatcg);
  126. skel->rodata->nr_cpus = libbpf_num_possible_cpus();
  127. assert(skel->rodata->nr_cpus > 0);
  128. skel->rodata->cgrp_slice_ns = __COMPAT_ENUM_OR_ZERO("scx_public_consts", "SCX_SLICE_DFL");
  129. while ((opt = getopt(argc, argv, "s:i:dfvh")) != -1) {
  130. double v;
  131. switch (opt) {
  132. case 's':
  133. v = strtod(optarg, NULL);
  134. skel->rodata->cgrp_slice_ns = v * 1000;
  135. break;
  136. case 'i':
  137. v = strtod(optarg, NULL);
  138. intv_ts.tv_sec = v;
  139. intv_ts.tv_nsec = (v - (float)intv_ts.tv_sec) * 1000000000;
  140. break;
  141. case 'd':
  142. dump_cgrps = true;
  143. break;
  144. case 'f':
  145. skel->rodata->fifo_sched = true;
  146. break;
  147. case 'v':
  148. verbose = true;
  149. break;
  150. case 'h':
  151. default:
  152. fprintf(stderr, help_fmt, basename(argv[0]));
  153. return opt != 'h';
  154. }
  155. }
  156. printf("slice=%.1lfms intv=%.1lfs dump_cgrps=%d",
  157. (double)skel->rodata->cgrp_slice_ns / 1000000.0,
  158. (double)intv_ts.tv_sec + (double)intv_ts.tv_nsec / 1000000000.0,
  159. dump_cgrps);
  160. SCX_OPS_LOAD(skel, flatcg_ops, scx_flatcg, uei);
  161. link = SCX_OPS_ATTACH(skel, flatcg_ops, scx_flatcg);
  162. while (!exit_req && !UEI_EXITED(skel, uei)) {
  163. __u64 acc_stats[FCG_NR_STATS];
  164. __u64 stats[FCG_NR_STATS];
  165. float cpu_util;
  166. int i;
  167. cpu_util = read_cpu_util(&last_cpu_sum, &last_cpu_idle);
  168. fcg_read_stats(skel, acc_stats);
  169. for (i = 0; i < FCG_NR_STATS; i++)
  170. stats[i] = acc_stats[i] - last_stats[i];
  171. memcpy(last_stats, acc_stats, sizeof(acc_stats));
  172. printf("\n[SEQ %6lu cpu=%5.1lf hweight_gen=%" PRIu64 "]\n",
  173. seq++, cpu_util * 100.0, skel->data->hweight_gen);
  174. printf(" act:%6llu deact:%6llu global:%6llu local:%6llu\n",
  175. stats[FCG_STAT_ACT],
  176. stats[FCG_STAT_DEACT],
  177. stats[FCG_STAT_GLOBAL],
  178. stats[FCG_STAT_LOCAL]);
  179. printf("HWT cache:%6llu update:%6llu skip:%6llu race:%6llu\n",
  180. stats[FCG_STAT_HWT_CACHE],
  181. stats[FCG_STAT_HWT_UPDATES],
  182. stats[FCG_STAT_HWT_SKIP],
  183. stats[FCG_STAT_HWT_RACE]);
  184. printf("ENQ skip:%6llu race:%6llu\n",
  185. stats[FCG_STAT_ENQ_SKIP],
  186. stats[FCG_STAT_ENQ_RACE]);
  187. printf("CNS keep:%6llu expire:%6llu empty:%6llu gone:%6llu\n",
  188. stats[FCG_STAT_CNS_KEEP],
  189. stats[FCG_STAT_CNS_EXPIRE],
  190. stats[FCG_STAT_CNS_EMPTY],
  191. stats[FCG_STAT_CNS_GONE]);
  192. printf("PNC next:%6llu empty:%6llu nocgrp:%6llu gone:%6llu race:%6llu fail:%6llu\n",
  193. stats[FCG_STAT_PNC_NEXT],
  194. stats[FCG_STAT_PNC_EMPTY],
  195. stats[FCG_STAT_PNC_NO_CGRP],
  196. stats[FCG_STAT_PNC_GONE],
  197. stats[FCG_STAT_PNC_RACE],
  198. stats[FCG_STAT_PNC_FAIL]);
  199. printf("BAD remove:%6llu\n",
  200. acc_stats[FCG_STAT_BAD_REMOVAL]);
  201. fflush(stdout);
  202. nanosleep(&intv_ts, NULL);
  203. }
  204. bpf_link__destroy(link);
  205. ecode = UEI_REPORT(skel, uei);
  206. scx_flatcg__destroy(skel);
  207. if (UEI_ECODE_RESTART(ecode))
  208. goto restart;
  209. return 0;
  210. }