delaytop.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * delaytop.c - system-wide delay monitoring tool.
  4. *
  5. * This tool provides real-time monitoring and statistics of
  6. * system, container, and task-level delays, including CPU,
  7. * memory, IO, and IRQ. It supports both interactive (top-like),
  8. * and can output delay information for the whole system, specific
  9. * containers (cgroups), or individual tasks (PIDs).
  10. *
  11. * Key features:
  12. * - Collects per-task delay accounting statistics via taskstats.
  13. * - Collects system-wide PSI information.
  14. * - Supports sorting, filtering.
  15. * - Supports both interactive (screen refresh).
  16. *
  17. * Copyright (C) Fan Yu, ZTE Corp. 2025
  18. * Copyright (C) Wang Yaxin, ZTE Corp. 2025
  19. *
  20. * Compile with
  21. * gcc -I/usr/src/linux/include delaytop.c -o delaytop
  22. */
  23. #include <stdio.h>
  24. #include <stdlib.h>
  25. #include <string.h>
  26. #include <errno.h>
  27. #include <unistd.h>
  28. #include <fcntl.h>
  29. #include <getopt.h>
  30. #include <signal.h>
  31. #include <time.h>
  32. #include <dirent.h>
  33. #include <ctype.h>
  34. #include <stdbool.h>
  35. #include <sys/types.h>
  36. #include <sys/stat.h>
  37. #include <sys/socket.h>
  38. #include <sys/select.h>
  39. #include <termios.h>
  40. #include <limits.h>
  41. #include <linux/genetlink.h>
  42. #include <linux/taskstats.h>
  43. #include <linux/cgroupstats.h>
  44. #include <stddef.h>
  45. #define PSI_PATH "/proc/pressure"
  46. #define PSI_CPU_PATH "/proc/pressure/cpu"
  47. #define PSI_MEMORY_PATH "/proc/pressure/memory"
  48. #define PSI_IO_PATH "/proc/pressure/io"
  49. #define PSI_IRQ_PATH "/proc/pressure/irq"
  50. #define NLA_NEXT(na) ((struct nlattr *)((char *)(na) + NLA_ALIGN((na)->nla_len)))
  51. #define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
  52. #define NLA_PAYLOAD(len) (len - NLA_HDRLEN)
  53. #define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
  54. #define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
  55. #define TASK_COMM_LEN 16
  56. #define MAX_MSG_SIZE 1024
  57. #define MAX_TASKS 1000
  58. #define MAX_BUF_LEN 256
  59. #define SET_TASK_STAT(task_count, field) tasks[task_count].field = stats.field
  60. #define BOOL_FPRINT(stream, fmt, ...) \
  61. ({ \
  62. int ret = fprintf(stream, fmt, ##__VA_ARGS__); \
  63. ret >= 0; \
  64. })
  65. #define TASK_AVG(task, field) average_ms((task).field##_delay_total, (task).field##_count)
  66. #define PSI_LINE_FORMAT "%-12s %6.1f%%/%6.1f%%/%6.1f%%/%8llu(ms)\n"
  67. #define DELAY_FMT_DEFAULT "%8.2f %8.2f %8.2f %8.2f\n"
  68. #define DELAY_FMT_MEMVERBOSE "%8.2f %8.2f %8.2f %8.2f %8.2f %8.2f\n"
  69. #define SORT_FIELD(name, cmd, modes) \
  70. {#name, #cmd, \
  71. offsetof(struct task_info, name##_delay_total), \
  72. offsetof(struct task_info, name##_count), \
  73. modes}
  74. #define END_FIELD {NULL, 0, 0}
  75. /* Display mode types */
  76. #define MODE_TYPE_ALL (0xFFFFFFFF)
  77. #define MODE_DEFAULT (1 << 0)
  78. #define MODE_MEMVERBOSE (1 << 1)
  79. /* PSI statistics structure */
  80. struct psi_stats {
  81. double cpu_some_avg10, cpu_some_avg60, cpu_some_avg300;
  82. unsigned long long cpu_some_total;
  83. double cpu_full_avg10, cpu_full_avg60, cpu_full_avg300;
  84. unsigned long long cpu_full_total;
  85. double memory_some_avg10, memory_some_avg60, memory_some_avg300;
  86. unsigned long long memory_some_total;
  87. double memory_full_avg10, memory_full_avg60, memory_full_avg300;
  88. unsigned long long memory_full_total;
  89. double io_some_avg10, io_some_avg60, io_some_avg300;
  90. unsigned long long io_some_total;
  91. double io_full_avg10, io_full_avg60, io_full_avg300;
  92. unsigned long long io_full_total;
  93. double irq_full_avg10, irq_full_avg60, irq_full_avg300;
  94. unsigned long long irq_full_total;
  95. };
  96. /* Task delay information structure */
  97. struct task_info {
  98. int pid;
  99. int tgid;
  100. char command[TASK_COMM_LEN];
  101. unsigned long long cpu_count;
  102. unsigned long long cpu_delay_total;
  103. unsigned long long blkio_count;
  104. unsigned long long blkio_delay_total;
  105. unsigned long long swapin_count;
  106. unsigned long long swapin_delay_total;
  107. unsigned long long freepages_count;
  108. unsigned long long freepages_delay_total;
  109. unsigned long long thrashing_count;
  110. unsigned long long thrashing_delay_total;
  111. unsigned long long compact_count;
  112. unsigned long long compact_delay_total;
  113. unsigned long long wpcopy_count;
  114. unsigned long long wpcopy_delay_total;
  115. unsigned long long irq_count;
  116. unsigned long long irq_delay_total;
  117. unsigned long long mem_count;
  118. unsigned long long mem_delay_total;
  119. };
  120. /* Container statistics structure */
  121. struct container_stats {
  122. int nr_sleeping; /* Number of sleeping processes */
  123. int nr_running; /* Number of running processes */
  124. int nr_stopped; /* Number of stopped processes */
  125. int nr_uninterruptible; /* Number of uninterruptible processes */
  126. int nr_io_wait; /* Number of processes in IO wait */
  127. };
  128. /* Delay field structure */
  129. struct field_desc {
  130. const char *name; /* Field name for cmdline argument */
  131. const char *cmd_char; /* Interactive command */
  132. unsigned long total_offset; /* Offset of total delay in task_info */
  133. unsigned long count_offset; /* Offset of count in task_info */
  134. size_t supported_modes; /* Supported display modes */
  135. };
  136. /* Program settings structure */
  137. struct config {
  138. int delay; /* Update interval in seconds */
  139. int iterations; /* Number of iterations, 0 == infinite */
  140. int max_processes; /* Maximum number of processes to show */
  141. int output_one_time; /* Output once and exit */
  142. int monitor_pid; /* Monitor specific PID */
  143. char *container_path; /* Path to container cgroup */
  144. const struct field_desc *sort_field; /* Current sort field */
  145. size_t display_mode; /* Current display mode */
  146. };
  147. /* Global variables */
  148. static struct config cfg;
  149. static struct psi_stats psi;
  150. static struct task_info tasks[MAX_TASKS];
  151. static int task_count;
  152. static int running = 1;
  153. static struct container_stats container_stats;
  154. static const struct field_desc sort_fields[] = {
  155. SORT_FIELD(cpu, c, MODE_DEFAULT),
  156. SORT_FIELD(blkio, i, MODE_DEFAULT),
  157. SORT_FIELD(irq, q, MODE_DEFAULT),
  158. SORT_FIELD(mem, m, MODE_DEFAULT | MODE_MEMVERBOSE),
  159. SORT_FIELD(swapin, s, MODE_MEMVERBOSE),
  160. SORT_FIELD(freepages, r, MODE_MEMVERBOSE),
  161. SORT_FIELD(thrashing, t, MODE_MEMVERBOSE),
  162. SORT_FIELD(compact, p, MODE_MEMVERBOSE),
  163. SORT_FIELD(wpcopy, w, MODE_MEMVERBOSE),
  164. END_FIELD
  165. };
  166. static int sort_selected;
  167. /* Netlink socket variables */
  168. static int nl_sd = -1;
  169. static int family_id;
  170. /* Set terminal to non-canonical mode for q-to-quit */
  171. static struct termios orig_termios;
  172. static void enable_raw_mode(void)
  173. {
  174. struct termios raw;
  175. tcgetattr(STDIN_FILENO, &orig_termios);
  176. raw = orig_termios;
  177. raw.c_lflag &= ~(ICANON | ECHO);
  178. tcsetattr(STDIN_FILENO, TCSAFLUSH, &raw);
  179. }
  180. static void disable_raw_mode(void)
  181. {
  182. tcsetattr(STDIN_FILENO, TCSAFLUSH, &orig_termios);
  183. }
  184. /* Find field descriptor by command line */
  185. static const struct field_desc *get_field_by_cmd_char(char ch)
  186. {
  187. const struct field_desc *field;
  188. for (field = sort_fields; field->name != NULL; field++) {
  189. if (field->cmd_char[0] == ch)
  190. return field;
  191. }
  192. return NULL;
  193. }
  194. /* Find field descriptor by name with string comparison */
  195. static const struct field_desc *get_field_by_name(const char *name)
  196. {
  197. const struct field_desc *field;
  198. size_t field_len;
  199. for (field = sort_fields; field->name != NULL; field++) {
  200. field_len = strlen(field->name);
  201. if (field_len != strlen(name))
  202. continue;
  203. if (strncmp(field->name, name, field_len) == 0)
  204. return field;
  205. }
  206. return NULL;
  207. }
  208. /* Find display name for a field descriptor */
  209. static const char *get_name_by_field(const struct field_desc *field)
  210. {
  211. return field ? field->name : "UNKNOWN";
  212. }
  213. /* Generate string of available field names */
  214. static void display_available_fields(size_t mode)
  215. {
  216. const struct field_desc *field;
  217. char buf[MAX_BUF_LEN];
  218. buf[0] = '\0';
  219. for (field = sort_fields; field->name != NULL; field++) {
  220. if (!(field->supported_modes & mode))
  221. continue;
  222. strncat(buf, "|", MAX_BUF_LEN - strlen(buf) - 1);
  223. strncat(buf, field->name, MAX_BUF_LEN - strlen(buf) - 1);
  224. buf[MAX_BUF_LEN - 1] = '\0';
  225. }
  226. fprintf(stderr, "Available fields: %s\n", buf);
  227. }
  228. /* Display usage information and command line options */
  229. static void usage(void)
  230. {
  231. printf("Usage: delaytop [Options]\n"
  232. "Options:\n"
  233. " -h, --help Show this help message and exit\n"
  234. " -d, --delay=SECONDS Set refresh interval (default: 2 seconds, min: 1)\n"
  235. " -n, --iterations=COUNT Set number of updates (default: 0 = infinite)\n"
  236. " -P, --processes=NUMBER Set maximum number of processes to show (default: 20, max: 1000)\n"
  237. " -o, --once Display once and exit\n"
  238. " -p, --pid=PID Monitor only the specified PID\n"
  239. " -C, --container=PATH Monitor the container at specified cgroup path\n"
  240. " -s, --sort=FIELD Sort by delay field (default: cpu)\n"
  241. " -M, --memverbose Display memory detailed information\n");
  242. exit(0);
  243. }
  244. /* Parse command line arguments and set configuration */
  245. static void parse_args(int argc, char **argv)
  246. {
  247. int c;
  248. const struct field_desc *field;
  249. struct option long_options[] = {
  250. {"help", no_argument, 0, 'h'},
  251. {"delay", required_argument, 0, 'd'},
  252. {"iterations", required_argument, 0, 'n'},
  253. {"pid", required_argument, 0, 'p'},
  254. {"once", no_argument, 0, 'o'},
  255. {"processes", required_argument, 0, 'P'},
  256. {"sort", required_argument, 0, 's'},
  257. {"container", required_argument, 0, 'C'},
  258. {"memverbose", no_argument, 0, 'M'},
  259. {0, 0, 0, 0}
  260. };
  261. /* Set defaults */
  262. cfg.delay = 2;
  263. cfg.iterations = 0;
  264. cfg.max_processes = 20;
  265. cfg.sort_field = &sort_fields[0]; /* Default sorted by CPU delay */
  266. cfg.output_one_time = 0;
  267. cfg.monitor_pid = 0; /* 0 means monitor all PIDs */
  268. cfg.container_path = NULL;
  269. cfg.display_mode = MODE_DEFAULT;
  270. while (1) {
  271. int option_index = 0;
  272. c = getopt_long(argc, argv, "hd:n:p:oP:C:s:M", long_options, &option_index);
  273. if (c == -1)
  274. break;
  275. switch (c) {
  276. case 'h':
  277. usage();
  278. break;
  279. case 'd':
  280. cfg.delay = atoi(optarg);
  281. if (cfg.delay < 1) {
  282. fprintf(stderr, "Error: delay must be >= 1.\n");
  283. exit(1);
  284. }
  285. break;
  286. case 'n':
  287. cfg.iterations = atoi(optarg);
  288. if (cfg.iterations < 0) {
  289. fprintf(stderr, "Error: iterations must be >= 0.\n");
  290. exit(1);
  291. }
  292. break;
  293. case 'p':
  294. cfg.monitor_pid = atoi(optarg);
  295. if (cfg.monitor_pid < 1) {
  296. fprintf(stderr, "Error: pid must be >= 1.\n");
  297. exit(1);
  298. }
  299. break;
  300. case 'o':
  301. cfg.output_one_time = 1;
  302. break;
  303. case 'P':
  304. cfg.max_processes = atoi(optarg);
  305. if (cfg.max_processes < 1) {
  306. fprintf(stderr, "Error: processes must be >= 1.\n");
  307. exit(1);
  308. }
  309. if (cfg.max_processes > MAX_TASKS) {
  310. fprintf(stderr, "Warning: processes capped to %d.\n",
  311. MAX_TASKS);
  312. cfg.max_processes = MAX_TASKS;
  313. }
  314. break;
  315. case 'C':
  316. cfg.container_path = strdup(optarg);
  317. break;
  318. case 's':
  319. if (strlen(optarg) == 0) {
  320. fprintf(stderr, "Error: empty sort field\n");
  321. exit(1);
  322. }
  323. field = get_field_by_name(optarg);
  324. /* Show available fields if invalid option provided */
  325. if (!field) {
  326. fprintf(stderr, "Error: invalid sort field '%s'\n", optarg);
  327. display_available_fields(MODE_TYPE_ALL);
  328. exit(1);
  329. }
  330. cfg.sort_field = field;
  331. break;
  332. case 'M':
  333. cfg.display_mode = MODE_MEMVERBOSE;
  334. cfg.sort_field = get_field_by_name("mem");
  335. break;
  336. default:
  337. fprintf(stderr, "Try 'delaytop --help' for more information.\n");
  338. exit(1);
  339. }
  340. }
  341. }
  342. /* Calculate average delay in milliseconds for overall memory */
  343. static void set_mem_delay_total(struct task_info *t)
  344. {
  345. t->mem_delay_total = t->swapin_delay_total +
  346. t->freepages_delay_total +
  347. t->thrashing_delay_total +
  348. t->compact_delay_total +
  349. t->wpcopy_delay_total;
  350. }
  351. static void set_mem_count(struct task_info *t)
  352. {
  353. t->mem_count = t->swapin_count +
  354. t->freepages_count +
  355. t->thrashing_count +
  356. t->compact_count +
  357. t->wpcopy_count;
  358. }
  359. /* Create a raw netlink socket and bind */
  360. static int create_nl_socket(void)
  361. {
  362. int fd;
  363. struct sockaddr_nl local;
  364. fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
  365. if (fd < 0)
  366. return -1;
  367. memset(&local, 0, sizeof(local));
  368. local.nl_family = AF_NETLINK;
  369. if (bind(fd, (struct sockaddr *) &local, sizeof(local)) < 0) {
  370. fprintf(stderr, "Failed to bind socket when create nl_socket\n");
  371. close(fd);
  372. return -1;
  373. }
  374. return fd;
  375. }
  376. /* Send a command via netlink */
  377. static int send_cmd(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
  378. __u8 genl_cmd, __u16 nla_type,
  379. void *nla_data, int nla_len)
  380. {
  381. struct sockaddr_nl nladdr;
  382. struct nlattr *na;
  383. int r, buflen;
  384. char *buf;
  385. struct {
  386. struct nlmsghdr n;
  387. struct genlmsghdr g;
  388. char buf[MAX_MSG_SIZE];
  389. } msg;
  390. msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
  391. msg.n.nlmsg_type = nlmsg_type;
  392. msg.n.nlmsg_flags = NLM_F_REQUEST;
  393. msg.n.nlmsg_seq = 0;
  394. msg.n.nlmsg_pid = nlmsg_pid;
  395. msg.g.cmd = genl_cmd;
  396. msg.g.version = 0x1;
  397. na = (struct nlattr *) GENLMSG_DATA(&msg);
  398. na->nla_type = nla_type;
  399. na->nla_len = nla_len + NLA_HDRLEN;
  400. memcpy(NLA_DATA(na), nla_data, nla_len);
  401. msg.n.nlmsg_len += NLMSG_ALIGN(na->nla_len);
  402. buf = (char *) &msg;
  403. buflen = msg.n.nlmsg_len;
  404. memset(&nladdr, 0, sizeof(nladdr));
  405. nladdr.nl_family = AF_NETLINK;
  406. while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *) &nladdr,
  407. sizeof(nladdr))) < buflen) {
  408. if (r > 0) {
  409. buf += r;
  410. buflen -= r;
  411. } else if (errno != EAGAIN)
  412. return -1;
  413. }
  414. return 0;
  415. }
  416. /* Get family ID for taskstats via netlink */
  417. static int get_family_id(int sd)
  418. {
  419. struct {
  420. struct nlmsghdr n;
  421. struct genlmsghdr g;
  422. char buf[256];
  423. } ans;
  424. int id = 0, rc;
  425. struct nlattr *na;
  426. int rep_len;
  427. char name[100];
  428. strncpy(name, TASKSTATS_GENL_NAME, sizeof(name) - 1);
  429. name[sizeof(name) - 1] = '\0';
  430. rc = send_cmd(sd, GENL_ID_CTRL, getpid(), CTRL_CMD_GETFAMILY,
  431. CTRL_ATTR_FAMILY_NAME, (void *)name,
  432. strlen(TASKSTATS_GENL_NAME)+1);
  433. if (rc < 0) {
  434. fprintf(stderr, "Failed to send cmd for family id\n");
  435. return 0;
  436. }
  437. rep_len = recv(sd, &ans, sizeof(ans), 0);
  438. if (ans.n.nlmsg_type == NLMSG_ERROR ||
  439. (rep_len < 0) || !NLMSG_OK((&ans.n), rep_len)) {
  440. fprintf(stderr, "Failed to receive response for family id\n");
  441. return 0;
  442. }
  443. na = (struct nlattr *) GENLMSG_DATA(&ans);
  444. na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
  445. if (na->nla_type == CTRL_ATTR_FAMILY_ID)
  446. id = *(__u16 *) NLA_DATA(na);
  447. return id;
  448. }
  449. static int read_psi_stats(void)
  450. {
  451. FILE *fp;
  452. char line[256];
  453. int ret = 0;
  454. int error_count = 0;
  455. /* Check if PSI path exists */
  456. if (access(PSI_PATH, F_OK) != 0) {
  457. fprintf(stderr, "Error: PSI interface not found at %s\n", PSI_PATH);
  458. fprintf(stderr, "Please ensure your kernel supports PSI (Pressure Stall Information)\n");
  459. return -1;
  460. }
  461. /* Zero all fields */
  462. memset(&psi, 0, sizeof(psi));
  463. /* CPU pressure */
  464. fp = fopen(PSI_CPU_PATH, "r");
  465. if (fp) {
  466. while (fgets(line, sizeof(line), fp)) {
  467. if (strncmp(line, "some", 4) == 0) {
  468. ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
  469. &psi.cpu_some_avg10, &psi.cpu_some_avg60,
  470. &psi.cpu_some_avg300, &psi.cpu_some_total);
  471. if (ret != 4) {
  472. fprintf(stderr, "Failed to parse CPU some PSI data\n");
  473. error_count++;
  474. }
  475. } else if (strncmp(line, "full", 4) == 0) {
  476. ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
  477. &psi.cpu_full_avg10, &psi.cpu_full_avg60,
  478. &psi.cpu_full_avg300, &psi.cpu_full_total);
  479. if (ret != 4) {
  480. fprintf(stderr, "Failed to parse CPU full PSI data\n");
  481. error_count++;
  482. }
  483. }
  484. }
  485. fclose(fp);
  486. } else {
  487. fprintf(stderr, "Warning: Failed to open %s\n", PSI_CPU_PATH);
  488. error_count++;
  489. }
  490. /* Memory pressure */
  491. fp = fopen(PSI_MEMORY_PATH, "r");
  492. if (fp) {
  493. while (fgets(line, sizeof(line), fp)) {
  494. if (strncmp(line, "some", 4) == 0) {
  495. ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
  496. &psi.memory_some_avg10, &psi.memory_some_avg60,
  497. &psi.memory_some_avg300, &psi.memory_some_total);
  498. if (ret != 4) {
  499. fprintf(stderr, "Failed to parse Memory some PSI data\n");
  500. error_count++;
  501. }
  502. } else if (strncmp(line, "full", 4) == 0) {
  503. ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
  504. &psi.memory_full_avg10, &psi.memory_full_avg60,
  505. &psi.memory_full_avg300, &psi.memory_full_total);
  506. if (ret != 4) {
  507. fprintf(stderr, "Failed to parse Memory full PSI data\n");
  508. error_count++;
  509. }
  510. }
  511. }
  512. fclose(fp);
  513. } else {
  514. fprintf(stderr, "Warning: Failed to open %s\n", PSI_MEMORY_PATH);
  515. error_count++;
  516. }
  517. /* IO pressure */
  518. fp = fopen(PSI_IO_PATH, "r");
  519. if (fp) {
  520. while (fgets(line, sizeof(line), fp)) {
  521. if (strncmp(line, "some", 4) == 0) {
  522. ret = sscanf(line, "some avg10=%lf avg60=%lf avg300=%lf total=%llu",
  523. &psi.io_some_avg10, &psi.io_some_avg60,
  524. &psi.io_some_avg300, &psi.io_some_total);
  525. if (ret != 4) {
  526. fprintf(stderr, "Failed to parse IO some PSI data\n");
  527. error_count++;
  528. }
  529. } else if (strncmp(line, "full", 4) == 0) {
  530. ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
  531. &psi.io_full_avg10, &psi.io_full_avg60,
  532. &psi.io_full_avg300, &psi.io_full_total);
  533. if (ret != 4) {
  534. fprintf(stderr, "Failed to parse IO full PSI data\n");
  535. error_count++;
  536. }
  537. }
  538. }
  539. fclose(fp);
  540. } else {
  541. fprintf(stderr, "Warning: Failed to open %s\n", PSI_IO_PATH);
  542. error_count++;
  543. }
  544. /* IRQ pressure (only full) */
  545. fp = fopen(PSI_IRQ_PATH, "r");
  546. if (fp) {
  547. while (fgets(line, sizeof(line), fp)) {
  548. if (strncmp(line, "full", 4) == 0) {
  549. ret = sscanf(line, "full avg10=%lf avg60=%lf avg300=%lf total=%llu",
  550. &psi.irq_full_avg10, &psi.irq_full_avg60,
  551. &psi.irq_full_avg300, &psi.irq_full_total);
  552. if (ret != 4) {
  553. fprintf(stderr, "Failed to parse IRQ full PSI data\n");
  554. error_count++;
  555. }
  556. }
  557. }
  558. fclose(fp);
  559. } else {
  560. fprintf(stderr, "Warning: Failed to open %s\n", PSI_IRQ_PATH);
  561. error_count++;
  562. }
  563. /* Return error count: 0 means success, >0 means warnings, -1 means fatal error */
  564. if (error_count > 0) {
  565. fprintf(stderr, "PSI stats reading completed with %d warnings\n", error_count);
  566. return error_count;
  567. }
  568. return 0;
  569. }
  570. static int read_comm(int pid, char *comm_buf, size_t buf_size)
  571. {
  572. char path[64];
  573. int ret = -1;
  574. size_t len;
  575. FILE *fp;
  576. snprintf(path, sizeof(path), "/proc/%d/comm", pid);
  577. fp = fopen(path, "r");
  578. if (!fp) {
  579. fprintf(stderr, "Failed to open comm file /proc/%d/comm\n", pid);
  580. return ret;
  581. }
  582. if (fgets(comm_buf, buf_size, fp)) {
  583. len = strlen(comm_buf);
  584. if (len > 0 && comm_buf[len - 1] == '\n')
  585. comm_buf[len - 1] = '\0';
  586. ret = 0;
  587. }
  588. fclose(fp);
  589. return ret;
  590. }
  591. static void fetch_and_fill_task_info(int pid, const char *comm)
  592. {
  593. struct {
  594. struct nlmsghdr n;
  595. struct genlmsghdr g;
  596. char buf[MAX_MSG_SIZE];
  597. } resp;
  598. struct taskstats stats;
  599. struct nlattr *nested;
  600. struct nlattr *na;
  601. int nested_len;
  602. int nl_len;
  603. int rc;
  604. /* Send request for task stats */
  605. if (send_cmd(nl_sd, family_id, getpid(), TASKSTATS_CMD_GET,
  606. TASKSTATS_CMD_ATTR_PID, &pid, sizeof(pid)) < 0) {
  607. fprintf(stderr, "Failed to send request for task stats\n");
  608. return;
  609. }
  610. /* Receive response */
  611. rc = recv(nl_sd, &resp, sizeof(resp), 0);
  612. if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
  613. fprintf(stderr, "Failed to receive response for task stats\n");
  614. return;
  615. }
  616. /* Parse response */
  617. nl_len = GENLMSG_PAYLOAD(&resp.n);
  618. na = (struct nlattr *) GENLMSG_DATA(&resp);
  619. while (nl_len > 0) {
  620. if (na->nla_type == TASKSTATS_TYPE_AGGR_PID) {
  621. nested = (struct nlattr *) NLA_DATA(na);
  622. nested_len = NLA_PAYLOAD(na->nla_len);
  623. while (nested_len > 0) {
  624. if (nested->nla_type == TASKSTATS_TYPE_STATS) {
  625. memcpy(&stats, NLA_DATA(nested), sizeof(stats));
  626. if (task_count < MAX_TASKS) {
  627. tasks[task_count].pid = pid;
  628. tasks[task_count].tgid = pid;
  629. strncpy(tasks[task_count].command, comm,
  630. TASK_COMM_LEN - 1);
  631. tasks[task_count].command[TASK_COMM_LEN - 1] = '\0';
  632. SET_TASK_STAT(task_count, cpu_count);
  633. SET_TASK_STAT(task_count, cpu_delay_total);
  634. SET_TASK_STAT(task_count, blkio_count);
  635. SET_TASK_STAT(task_count, blkio_delay_total);
  636. SET_TASK_STAT(task_count, swapin_count);
  637. SET_TASK_STAT(task_count, swapin_delay_total);
  638. SET_TASK_STAT(task_count, freepages_count);
  639. SET_TASK_STAT(task_count, freepages_delay_total);
  640. SET_TASK_STAT(task_count, thrashing_count);
  641. SET_TASK_STAT(task_count, thrashing_delay_total);
  642. SET_TASK_STAT(task_count, compact_count);
  643. SET_TASK_STAT(task_count, compact_delay_total);
  644. SET_TASK_STAT(task_count, wpcopy_count);
  645. SET_TASK_STAT(task_count, wpcopy_delay_total);
  646. SET_TASK_STAT(task_count, irq_count);
  647. SET_TASK_STAT(task_count, irq_delay_total);
  648. set_mem_count(&tasks[task_count]);
  649. set_mem_delay_total(&tasks[task_count]);
  650. task_count++;
  651. }
  652. break;
  653. }
  654. nested_len -= NLA_ALIGN(nested->nla_len);
  655. nested = NLA_NEXT(nested);
  656. }
  657. }
  658. nl_len -= NLA_ALIGN(na->nla_len);
  659. na = NLA_NEXT(na);
  660. }
  661. return;
  662. }
  663. static void get_task_delays(void)
  664. {
  665. char comm[TASK_COMM_LEN];
  666. struct dirent *entry;
  667. DIR *dir;
  668. int pid;
  669. task_count = 0;
  670. if (cfg.monitor_pid > 0) {
  671. if (read_comm(cfg.monitor_pid, comm, sizeof(comm)) == 0)
  672. fetch_and_fill_task_info(cfg.monitor_pid, comm);
  673. return;
  674. }
  675. dir = opendir("/proc");
  676. if (!dir) {
  677. fprintf(stderr, "Error opening /proc directory\n");
  678. return;
  679. }
  680. while ((entry = readdir(dir)) != NULL && task_count < MAX_TASKS) {
  681. if (!isdigit(entry->d_name[0]))
  682. continue;
  683. pid = atoi(entry->d_name);
  684. if (pid == 0)
  685. continue;
  686. if (read_comm(pid, comm, sizeof(comm)) != 0)
  687. continue;
  688. fetch_and_fill_task_info(pid, comm);
  689. }
  690. closedir(dir);
  691. }
  692. /* Calculate average delay in milliseconds */
  693. static double average_ms(unsigned long long total, unsigned long long count)
  694. {
  695. if (count == 0)
  696. return 0;
  697. return (double)total / 1000000.0 / count;
  698. }
  699. /* Comparison function for sorting tasks */
  700. static int compare_tasks(const void *a, const void *b)
  701. {
  702. const struct task_info *t1 = (const struct task_info *)a;
  703. const struct task_info *t2 = (const struct task_info *)b;
  704. unsigned long long total1;
  705. unsigned long long total2;
  706. unsigned long count1;
  707. unsigned long count2;
  708. double avg1, avg2;
  709. total1 = *(unsigned long long *)((char *)t1 + cfg.sort_field->total_offset);
  710. total2 = *(unsigned long long *)((char *)t2 + cfg.sort_field->total_offset);
  711. count1 = *(unsigned long *)((char *)t1 + cfg.sort_field->count_offset);
  712. count2 = *(unsigned long *)((char *)t2 + cfg.sort_field->count_offset);
  713. avg1 = average_ms(total1, count1);
  714. avg2 = average_ms(total2, count2);
  715. if (avg1 != avg2)
  716. return avg2 > avg1 ? 1 : -1;
  717. return 0;
  718. }
  719. /* Sort tasks by selected field */
  720. static void sort_tasks(void)
  721. {
  722. if (task_count > 0)
  723. qsort(tasks, task_count, sizeof(struct task_info), compare_tasks);
  724. }
  725. /* Get container statistics via cgroupstats */
  726. static void get_container_stats(void)
  727. {
  728. int rc, cfd;
  729. struct {
  730. struct nlmsghdr n;
  731. struct genlmsghdr g;
  732. char buf[MAX_MSG_SIZE];
  733. } req, resp;
  734. struct nlattr *na;
  735. int nl_len;
  736. struct cgroupstats stats;
  737. /* Check if container path is set */
  738. if (!cfg.container_path)
  739. return;
  740. /* Open container cgroup */
  741. cfd = open(cfg.container_path, O_RDONLY);
  742. if (cfd < 0) {
  743. fprintf(stderr, "Error opening container path: %s\n", cfg.container_path);
  744. return;
  745. }
  746. /* Send request for container stats */
  747. if (send_cmd(nl_sd, family_id, getpid(), CGROUPSTATS_CMD_GET,
  748. CGROUPSTATS_CMD_ATTR_FD, &cfd, sizeof(__u32)) < 0) {
  749. fprintf(stderr, "Failed to send request for container stats\n");
  750. close(cfd);
  751. return;
  752. }
  753. /* Receive response */
  754. rc = recv(nl_sd, &resp, sizeof(resp), 0);
  755. if (rc < 0 || resp.n.nlmsg_type == NLMSG_ERROR) {
  756. fprintf(stderr, "Failed to receive response for container stats\n");
  757. close(cfd);
  758. return;
  759. }
  760. /* Parse response */
  761. nl_len = GENLMSG_PAYLOAD(&resp.n);
  762. na = (struct nlattr *) GENLMSG_DATA(&resp);
  763. while (nl_len > 0) {
  764. if (na->nla_type == CGROUPSTATS_TYPE_CGROUP_STATS) {
  765. /* Get the cgroupstats structure */
  766. memcpy(&stats, NLA_DATA(na), sizeof(stats));
  767. /* Fill container stats */
  768. container_stats.nr_sleeping = stats.nr_sleeping;
  769. container_stats.nr_running = stats.nr_running;
  770. container_stats.nr_stopped = stats.nr_stopped;
  771. container_stats.nr_uninterruptible = stats.nr_uninterruptible;
  772. container_stats.nr_io_wait = stats.nr_io_wait;
  773. break;
  774. }
  775. nl_len -= NLA_ALIGN(na->nla_len);
  776. na = (struct nlattr *) ((char *) na + NLA_ALIGN(na->nla_len));
  777. }
  778. close(cfd);
  779. }
  780. /* Display results to stdout or log file */
  781. static void display_results(int psi_ret)
  782. {
  783. time_t now = time(NULL);
  784. struct tm *tm_now = localtime(&now);
  785. FILE *out = stdout;
  786. char timestamp[32];
  787. bool suc = true;
  788. int i, count;
  789. /* Clear terminal screen */
  790. suc &= BOOL_FPRINT(out, "\033[H\033[J");
  791. /* PSI output (one-line, no cat style) */
  792. suc &= BOOL_FPRINT(out, "System Pressure Information: (avg10/avg60vg300/total)\n");
  793. if (psi_ret) {
  794. suc &= BOOL_FPRINT(out, " PSI not found: check if psi=1 enabled in cmdline\n");
  795. } else {
  796. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  797. "CPU some:",
  798. psi.cpu_some_avg10,
  799. psi.cpu_some_avg60,
  800. psi.cpu_some_avg300,
  801. psi.cpu_some_total / 1000);
  802. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  803. "CPU full:",
  804. psi.cpu_full_avg10,
  805. psi.cpu_full_avg60,
  806. psi.cpu_full_avg300,
  807. psi.cpu_full_total / 1000);
  808. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  809. "Memory full:",
  810. psi.memory_full_avg10,
  811. psi.memory_full_avg60,
  812. psi.memory_full_avg300,
  813. psi.memory_full_total / 1000);
  814. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  815. "Memory some:",
  816. psi.memory_some_avg10,
  817. psi.memory_some_avg60,
  818. psi.memory_some_avg300,
  819. psi.memory_some_total / 1000);
  820. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  821. "IO full:",
  822. psi.io_full_avg10,
  823. psi.io_full_avg60,
  824. psi.io_full_avg300,
  825. psi.io_full_total / 1000);
  826. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  827. "IO some:",
  828. psi.io_some_avg10,
  829. psi.io_some_avg60,
  830. psi.io_some_avg300,
  831. psi.io_some_total / 1000);
  832. suc &= BOOL_FPRINT(out, PSI_LINE_FORMAT,
  833. "IRQ full:",
  834. psi.irq_full_avg10,
  835. psi.irq_full_avg60,
  836. psi.irq_full_avg300,
  837. psi.irq_full_total / 1000);
  838. }
  839. if (cfg.container_path) {
  840. suc &= BOOL_FPRINT(out, "Container Information (%s):\n", cfg.container_path);
  841. suc &= BOOL_FPRINT(out, "Processes: running=%d, sleeping=%d, ",
  842. container_stats.nr_running, container_stats.nr_sleeping);
  843. suc &= BOOL_FPRINT(out, "stopped=%d, uninterruptible=%d, io_wait=%d\n\n",
  844. container_stats.nr_stopped, container_stats.nr_uninterruptible,
  845. container_stats.nr_io_wait);
  846. }
  847. /* Interacive command */
  848. suc &= BOOL_FPRINT(out, "[o]sort [M]memverbose [q]quit\n");
  849. if (sort_selected) {
  850. if (cfg.display_mode == MODE_MEMVERBOSE)
  851. suc &= BOOL_FPRINT(out,
  852. "sort selection: [m]MEM [r]RCL [t]THR [p]CMP [w]WP\n");
  853. else
  854. suc &= BOOL_FPRINT(out,
  855. "sort selection: [c]CPU [i]IO [m]MEM [q]IRQ\n");
  856. }
  857. /* Task delay output */
  858. suc &= BOOL_FPRINT(out, "Top %d processes (sorted by %s delay):\n",
  859. cfg.max_processes, get_name_by_field(cfg.sort_field));
  860. suc &= BOOL_FPRINT(out, "%8s %8s %-17s", "PID", "TGID", "COMMAND");
  861. if (cfg.display_mode == MODE_MEMVERBOSE) {
  862. suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s %8s %8s\n",
  863. "MEM(ms)", "SWAP(ms)", "RCL(ms)",
  864. "THR(ms)", "CMP(ms)", "WP(ms)");
  865. suc &= BOOL_FPRINT(out, "-----------------------");
  866. suc &= BOOL_FPRINT(out, "-----------------------");
  867. suc &= BOOL_FPRINT(out, "-----------------------");
  868. suc &= BOOL_FPRINT(out, "---------------------\n");
  869. } else {
  870. suc &= BOOL_FPRINT(out, "%8s %8s %8s %8s\n",
  871. "CPU(ms)", "IO(ms)", "IRQ(ms)", "MEM(ms)");
  872. suc &= BOOL_FPRINT(out, "-----------------------");
  873. suc &= BOOL_FPRINT(out, "-----------------------");
  874. suc &= BOOL_FPRINT(out, "--------------------------\n");
  875. }
  876. count = task_count < cfg.max_processes ? task_count : cfg.max_processes;
  877. for (i = 0; i < count; i++) {
  878. suc &= BOOL_FPRINT(out, "%8d %8d %-15s",
  879. tasks[i].pid, tasks[i].tgid, tasks[i].command);
  880. if (cfg.display_mode == MODE_MEMVERBOSE) {
  881. suc &= BOOL_FPRINT(out, DELAY_FMT_MEMVERBOSE,
  882. TASK_AVG(tasks[i], mem),
  883. TASK_AVG(tasks[i], swapin),
  884. TASK_AVG(tasks[i], freepages),
  885. TASK_AVG(tasks[i], thrashing),
  886. TASK_AVG(tasks[i], compact),
  887. TASK_AVG(tasks[i], wpcopy));
  888. } else {
  889. suc &= BOOL_FPRINT(out, DELAY_FMT_DEFAULT,
  890. TASK_AVG(tasks[i], cpu),
  891. TASK_AVG(tasks[i], blkio),
  892. TASK_AVG(tasks[i], irq),
  893. TASK_AVG(tasks[i], mem));
  894. }
  895. }
  896. suc &= BOOL_FPRINT(out, "\n");
  897. if (!suc)
  898. perror("Error writing to output");
  899. }
  900. /* Check for keyboard input with timeout based on cfg.delay */
  901. static char check_for_keypress(void)
  902. {
  903. struct timeval tv = {cfg.delay, 0};
  904. fd_set readfds;
  905. char ch = 0;
  906. FD_ZERO(&readfds);
  907. FD_SET(STDIN_FILENO, &readfds);
  908. int r = select(STDIN_FILENO + 1, &readfds, NULL, NULL, &tv);
  909. if (r > 0 && FD_ISSET(STDIN_FILENO, &readfds)) {
  910. read(STDIN_FILENO, &ch, 1);
  911. return ch;
  912. }
  913. return 0;
  914. }
  915. #define MAX_MODE_SIZE 2
  916. static void toggle_display_mode(void)
  917. {
  918. static const size_t modes[MAX_MODE_SIZE] = {MODE_DEFAULT, MODE_MEMVERBOSE};
  919. static size_t cur_index;
  920. cur_index = (cur_index + 1) % MAX_MODE_SIZE;
  921. cfg.display_mode = modes[cur_index];
  922. }
  923. /* Handle keyboard input: sorting selection, mode toggle, or quit */
  924. static void handle_keypress(char ch, int *running)
  925. {
  926. const struct field_desc *field;
  927. /* Change sort field */
  928. if (sort_selected) {
  929. field = get_field_by_cmd_char(ch);
  930. if (field && (field->supported_modes & cfg.display_mode))
  931. cfg.sort_field = field;
  932. sort_selected = 0;
  933. /* Handle mode changes or quit */
  934. } else {
  935. switch (ch) {
  936. case 'o':
  937. sort_selected = 1;
  938. break;
  939. case 'M':
  940. toggle_display_mode();
  941. for (field = sort_fields; field->name != NULL; field++) {
  942. if (field->supported_modes & cfg.display_mode) {
  943. cfg.sort_field = field;
  944. break;
  945. }
  946. }
  947. break;
  948. case 'q':
  949. case 'Q':
  950. *running = 0;
  951. break;
  952. default:
  953. break;
  954. }
  955. }
  956. }
  957. /* Main function */
  958. int main(int argc, char **argv)
  959. {
  960. const struct field_desc *field;
  961. int iterations = 0;
  962. int psi_ret = 0;
  963. char keypress;
  964. /* Parse command line arguments */
  965. parse_args(argc, argv);
  966. /* Setup netlink socket */
  967. nl_sd = create_nl_socket();
  968. if (nl_sd < 0) {
  969. fprintf(stderr, "Error creating netlink socket\n");
  970. exit(1);
  971. }
  972. /* Get family ID for taskstats via netlink */
  973. family_id = get_family_id(nl_sd);
  974. if (!family_id) {
  975. fprintf(stderr, "Error getting taskstats family ID\n");
  976. close(nl_sd);
  977. exit(1);
  978. }
  979. /* Set terminal to non-canonical mode for interaction */
  980. enable_raw_mode();
  981. /* Main loop */
  982. while (running) {
  983. /* Auto-switch sort field when not matching display mode */
  984. if (!(cfg.sort_field->supported_modes & cfg.display_mode)) {
  985. for (field = sort_fields; field->name != NULL; field++) {
  986. if (field->supported_modes & cfg.display_mode) {
  987. cfg.sort_field = field;
  988. printf("Auto-switched sort field to: %s\n", field->name);
  989. break;
  990. }
  991. }
  992. }
  993. /* Read PSI statistics */
  994. psi_ret = read_psi_stats();
  995. /* Get container stats if container path provided */
  996. if (cfg.container_path)
  997. get_container_stats();
  998. /* Get task delays */
  999. get_task_delays();
  1000. /* Sort tasks */
  1001. sort_tasks();
  1002. /* Display results to stdout or log file */
  1003. display_results(psi_ret);
  1004. /* Check for iterations */
  1005. if (cfg.iterations > 0 && ++iterations >= cfg.iterations)
  1006. break;
  1007. /* Exit if output_one_time is set */
  1008. if (cfg.output_one_time)
  1009. break;
  1010. /* Keypress for interactive usage */
  1011. keypress = check_for_keypress();
  1012. if (keypress)
  1013. handle_keypress(keypress, &running);
  1014. }
  1015. /* Restore terminal mode */
  1016. disable_raw_mode();
  1017. /* Cleanup */
  1018. close(nl_sd);
  1019. if (cfg.container_path)
  1020. free(cfg.container_path);
  1021. return 0;
  1022. }