cpuset-internal.h 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348
  1. /* SPDX-License-Identifier: GPL-2.0-or-later */
  2. #ifndef __CPUSET_INTERNAL_H
  3. #define __CPUSET_INTERNAL_H
  4. #include <linux/cgroup.h>
  5. #include <linux/cpu.h>
  6. #include <linux/cpumask.h>
  7. #include <linux/cpuset.h>
  8. #include <linux/spinlock.h>
  9. #include <linux/union_find.h>
  10. #include <linux/sched/isolation.h>
  11. /* See "Frequency meter" comments, below. */
  12. struct fmeter {
  13. int cnt; /* unprocessed events count */
  14. int val; /* most recent output value */
  15. time64_t time; /* clock (secs) when val computed */
  16. spinlock_t lock; /* guards read or write of above */
  17. };
  18. /*
  19. * Invalid partition error code
  20. */
  21. enum prs_errcode {
  22. PERR_NONE = 0,
  23. PERR_INVCPUS,
  24. PERR_INVPARENT,
  25. PERR_NOTPART,
  26. PERR_NOTEXCL,
  27. PERR_NOCPUS,
  28. PERR_HOTPLUG,
  29. PERR_CPUSEMPTY,
  30. PERR_HKEEPING,
  31. PERR_ACCESS,
  32. PERR_REMOTE,
  33. };
  34. /* bits in struct cpuset flags field */
  35. typedef enum {
  36. CS_CPU_EXCLUSIVE,
  37. CS_MEM_EXCLUSIVE,
  38. CS_MEM_HARDWALL,
  39. CS_MEMORY_MIGRATE,
  40. CS_SCHED_LOAD_BALANCE,
  41. CS_SPREAD_PAGE,
  42. CS_SPREAD_SLAB,
  43. } cpuset_flagbits_t;
  44. /* The various types of files and directories in a cpuset file system */
  45. typedef enum {
  46. FILE_MEMORY_MIGRATE,
  47. FILE_CPULIST,
  48. FILE_MEMLIST,
  49. FILE_EFFECTIVE_CPULIST,
  50. FILE_EFFECTIVE_MEMLIST,
  51. FILE_SUBPARTS_CPULIST,
  52. FILE_EXCLUSIVE_CPULIST,
  53. FILE_EFFECTIVE_XCPULIST,
  54. FILE_ISOLATED_CPULIST,
  55. FILE_CPU_EXCLUSIVE,
  56. FILE_MEM_EXCLUSIVE,
  57. FILE_MEM_HARDWALL,
  58. FILE_SCHED_LOAD_BALANCE,
  59. FILE_PARTITION_ROOT,
  60. FILE_SCHED_RELAX_DOMAIN_LEVEL,
  61. FILE_MEMORY_PRESSURE_ENABLED,
  62. FILE_MEMORY_PRESSURE,
  63. FILE_SPREAD_PAGE,
  64. FILE_SPREAD_SLAB,
  65. } cpuset_filetype_t;
  66. struct cpuset {
  67. struct cgroup_subsys_state css;
  68. unsigned long flags; /* "unsigned long" so bitops work */
  69. /*
  70. * On default hierarchy:
  71. *
  72. * The user-configured masks can only be changed by writing to
  73. * cpuset.cpus and cpuset.mems, and won't be limited by the
  74. * parent masks.
  75. *
  76. * The effective masks is the real masks that apply to the tasks
  77. * in the cpuset. They may be changed if the configured masks are
  78. * changed or hotplug happens.
  79. *
  80. * effective_mask == configured_mask & parent's effective_mask,
  81. * and if it ends up empty, it will inherit the parent's mask.
  82. *
  83. *
  84. * On legacy hierarchy:
  85. *
  86. * The user-configured masks are always the same with effective masks.
  87. */
  88. /* user-configured CPUs and Memory Nodes allow to tasks */
  89. cpumask_var_t cpus_allowed;
  90. nodemask_t mems_allowed;
  91. /* effective CPUs and Memory Nodes allow to tasks */
  92. cpumask_var_t effective_cpus;
  93. nodemask_t effective_mems;
  94. /*
  95. * Exclusive CPUs dedicated to current cgroup (default hierarchy only)
  96. *
  97. * The effective_cpus of a valid partition root comes solely from its
  98. * effective_xcpus and some of the effective_xcpus may be distributed
  99. * to sub-partitions below & hence excluded from its effective_cpus.
  100. * For a valid partition root, its effective_cpus have no relationship
  101. * with cpus_allowed unless its exclusive_cpus isn't set.
  102. *
  103. * This value will only be set if either exclusive_cpus is set or
  104. * when this cpuset becomes a local partition root.
  105. */
  106. cpumask_var_t effective_xcpus;
  107. /*
  108. * Exclusive CPUs as requested by the user (default hierarchy only)
  109. *
  110. * Its value is independent of cpus_allowed and designates the set of
  111. * CPUs that can be granted to the current cpuset or its children when
  112. * it becomes a valid partition root. The effective set of exclusive
  113. * CPUs granted (effective_xcpus) depends on whether those exclusive
  114. * CPUs are passed down by its ancestors and not yet taken up by
  115. * another sibling partition root along the way.
  116. *
  117. * If its value isn't set, it defaults to cpus_allowed.
  118. */
  119. cpumask_var_t exclusive_cpus;
  120. /*
  121. * This is old Memory Nodes tasks took on.
  122. *
  123. * - top_cpuset.old_mems_allowed is initialized to mems_allowed.
  124. * - A new cpuset's old_mems_allowed is initialized when some
  125. * task is moved into it.
  126. * - old_mems_allowed is used in cpuset_migrate_mm() when we change
  127. * cpuset.mems_allowed and have tasks' nodemask updated, and
  128. * then old_mems_allowed is updated to mems_allowed.
  129. */
  130. nodemask_t old_mems_allowed;
  131. /*
  132. * Tasks are being attached to this cpuset. Used to prevent
  133. * zeroing cpus/mems_allowed between ->can_attach() and ->attach().
  134. */
  135. int attach_in_progress;
  136. /* partition root state */
  137. int partition_root_state;
  138. /*
  139. * Whether cpuset is a remote partition.
  140. * It used to be a list anchoring all remote partitions — we can switch back
  141. * to a list if we need to iterate over the remote partitions.
  142. */
  143. bool remote_partition;
  144. /*
  145. * number of SCHED_DEADLINE tasks attached to this cpuset, so that we
  146. * know when to rebuild associated root domain bandwidth information.
  147. */
  148. int nr_deadline_tasks;
  149. int nr_migrate_dl_tasks;
  150. u64 sum_migrate_dl_bw;
  151. /* Invalid partition error code, not lock protected */
  152. enum prs_errcode prs_err;
  153. /* Handle for cpuset.cpus.partition */
  154. struct cgroup_file partition_file;
  155. #ifdef CONFIG_CPUSETS_V1
  156. struct fmeter fmeter; /* memory_pressure filter */
  157. /* for custom sched domain */
  158. int relax_domain_level;
  159. /* Used to merge intersecting subsets for generate_sched_domains */
  160. struct uf_node node;
  161. #endif
  162. };
  163. extern struct cpuset top_cpuset;
  164. static inline struct cpuset *css_cs(struct cgroup_subsys_state *css)
  165. {
  166. return css ? container_of(css, struct cpuset, css) : NULL;
  167. }
  168. /* Retrieve the cpuset for a task */
  169. static inline struct cpuset *task_cs(struct task_struct *task)
  170. {
  171. return css_cs(task_css(task, cpuset_cgrp_id));
  172. }
  173. static inline struct cpuset *parent_cs(struct cpuset *cs)
  174. {
  175. return css_cs(cs->css.parent);
  176. }
  177. /* convenient tests for these bits */
  178. static inline bool is_cpuset_online(struct cpuset *cs)
  179. {
  180. return css_is_online(&cs->css) && !css_is_dying(&cs->css);
  181. }
  182. static inline int is_cpu_exclusive(const struct cpuset *cs)
  183. {
  184. return test_bit(CS_CPU_EXCLUSIVE, &cs->flags);
  185. }
  186. static inline int is_mem_exclusive(const struct cpuset *cs)
  187. {
  188. return test_bit(CS_MEM_EXCLUSIVE, &cs->flags);
  189. }
  190. static inline int is_mem_hardwall(const struct cpuset *cs)
  191. {
  192. return test_bit(CS_MEM_HARDWALL, &cs->flags);
  193. }
  194. static inline int is_sched_load_balance(const struct cpuset *cs)
  195. {
  196. return test_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
  197. }
  198. static inline int is_memory_migrate(const struct cpuset *cs)
  199. {
  200. return test_bit(CS_MEMORY_MIGRATE, &cs->flags);
  201. }
  202. static inline int is_spread_page(const struct cpuset *cs)
  203. {
  204. return test_bit(CS_SPREAD_PAGE, &cs->flags);
  205. }
  206. static inline int is_spread_slab(const struct cpuset *cs)
  207. {
  208. return test_bit(CS_SPREAD_SLAB, &cs->flags);
  209. }
  210. /*
  211. * Helper routine for generate_sched_domains().
  212. * Do cpusets a, b have overlapping effective cpus_allowed masks?
  213. */
  214. static inline int cpusets_overlap(struct cpuset *a, struct cpuset *b)
  215. {
  216. return cpumask_intersects(a->effective_cpus, b->effective_cpus);
  217. }
  218. static inline int nr_cpusets(void)
  219. {
  220. /* jump label reference count + the top-level cpuset */
  221. return static_key_count(&cpusets_enabled_key.key) + 1;
  222. }
  223. static inline bool cpuset_is_populated(struct cpuset *cs)
  224. {
  225. lockdep_assert_cpuset_lock_held();
  226. /* Cpusets in the process of attaching should be considered as populated */
  227. return cgroup_is_populated(cs->css.cgroup) ||
  228. cs->attach_in_progress;
  229. }
  230. /**
  231. * cpuset_for_each_child - traverse online children of a cpuset
  232. * @child_cs: loop cursor pointing to the current child
  233. * @pos_css: used for iteration
  234. * @parent_cs: target cpuset to walk children of
  235. *
  236. * Walk @child_cs through the online children of @parent_cs. Must be used
  237. * with RCU read locked.
  238. */
  239. #define cpuset_for_each_child(child_cs, pos_css, parent_cs) \
  240. css_for_each_child((pos_css), &(parent_cs)->css) \
  241. if (is_cpuset_online(((child_cs) = css_cs((pos_css)))))
  242. /**
  243. * cpuset_for_each_descendant_pre - pre-order walk of a cpuset's descendants
  244. * @des_cs: loop cursor pointing to the current descendant
  245. * @pos_css: used for iteration
  246. * @root_cs: target cpuset to walk ancestor of
  247. *
  248. * Walk @des_cs through the online descendants of @root_cs. Must be used
  249. * with RCU read locked. The caller may modify @pos_css by calling
  250. * css_rightmost_descendant() to skip subtree. @root_cs is included in the
  251. * iteration and the first node to be visited.
  252. */
  253. #define cpuset_for_each_descendant_pre(des_cs, pos_css, root_cs) \
  254. css_for_each_descendant_pre((pos_css), &(root_cs)->css) \
  255. if (is_cpuset_online(((des_cs) = css_cs((pos_css)))))
  256. void rebuild_sched_domains_locked(void);
  257. void cpuset_callback_lock_irq(void);
  258. void cpuset_callback_unlock_irq(void);
  259. void cpuset_update_tasks_cpumask(struct cpuset *cs, struct cpumask *new_cpus);
  260. void cpuset_update_tasks_nodemask(struct cpuset *cs);
  261. int cpuset_update_flag(cpuset_flagbits_t bit, struct cpuset *cs, int turning_on);
  262. ssize_t cpuset_write_resmask(struct kernfs_open_file *of,
  263. char *buf, size_t nbytes, loff_t off);
  264. int cpuset_common_seq_show(struct seq_file *sf, void *v);
  265. void cpuset_full_lock(void);
  266. void cpuset_full_unlock(void);
  267. /*
  268. * cpuset-v1.c
  269. */
  270. #ifdef CONFIG_CPUSETS_V1
  271. extern struct cftype cpuset1_files[];
  272. void cpuset1_update_task_spread_flags(struct cpuset *cs,
  273. struct task_struct *tsk);
  274. void cpuset1_update_tasks_flags(struct cpuset *cs);
  275. void cpuset1_hotplug_update_tasks(struct cpuset *cs,
  276. struct cpumask *new_cpus, nodemask_t *new_mems,
  277. bool cpus_updated, bool mems_updated);
  278. int cpuset1_validate_change(struct cpuset *cur, struct cpuset *trial);
  279. bool cpuset1_cpus_excl_conflict(struct cpuset *cs1, struct cpuset *cs2);
  280. void cpuset1_init(struct cpuset *cs);
  281. void cpuset1_online_css(struct cgroup_subsys_state *css);
  282. int cpuset1_generate_sched_domains(cpumask_var_t **domains,
  283. struct sched_domain_attr **attributes);
  284. #else
  285. static inline void cpuset1_update_task_spread_flags(struct cpuset *cs,
  286. struct task_struct *tsk) {}
  287. static inline void cpuset1_update_tasks_flags(struct cpuset *cs) {}
  288. static inline void cpuset1_hotplug_update_tasks(struct cpuset *cs,
  289. struct cpumask *new_cpus, nodemask_t *new_mems,
  290. bool cpus_updated, bool mems_updated) {}
  291. static inline int cpuset1_validate_change(struct cpuset *cur,
  292. struct cpuset *trial) { return 0; }
  293. static inline bool cpuset1_cpus_excl_conflict(struct cpuset *cs1,
  294. struct cpuset *cs2) { return false; }
  295. static inline void cpuset1_init(struct cpuset *cs) {}
  296. static inline void cpuset1_online_css(struct cgroup_subsys_state *css) {}
  297. static inline int cpuset1_generate_sched_domains(cpumask_var_t **domains,
  298. struct sched_domain_attr **attributes) { return 0; };
  299. #endif /* CONFIG_CPUSETS_V1 */
  300. #endif /* __CPUSET_INTERNAL_H */