monitor.c 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Resource Director Technology(RDT)
  4. * - Monitoring code
  5. *
  6. * Copyright (C) 2017 Intel Corporation
  7. *
  8. * Author:
  9. * Vikas Shivappa <vikas.shivappa@intel.com>
  10. *
  11. * This replaces the cqm.c based on perf but we reuse a lot of
  12. * code and datastructures originally from Peter Zijlstra and Matt Fleming.
  13. *
  14. * More information about RDT be found in the Intel (R) x86 Architecture
  15. * Software Developer Manual June 2016, volume 3, section 17.17.
  16. */
  17. #define pr_fmt(fmt) "resctrl: " fmt
  18. #include <linux/cpu.h>
  19. #include <linux/resctrl.h>
  20. #include <linux/sizes.h>
  21. #include <linux/slab.h>
  22. #include "internal.h"
  23. #define CREATE_TRACE_POINTS
  24. #include "monitor_trace.h"
  25. /**
  26. * struct rmid_entry - dirty tracking for all RMID.
  27. * @closid: The CLOSID for this entry.
  28. * @rmid: The RMID for this entry.
  29. * @busy: The number of domains with cached data using this RMID.
  30. * @list: Member of the rmid_free_lru list when busy == 0.
  31. *
  32. * Depending on the architecture the correct monitor is accessed using
  33. * both @closid and @rmid, or @rmid only.
  34. *
  35. * Take the rdtgroup_mutex when accessing.
  36. */
  37. struct rmid_entry {
  38. u32 closid;
  39. u32 rmid;
  40. int busy;
  41. struct list_head list;
  42. };
  43. /*
  44. * @rmid_free_lru - A least recently used list of free RMIDs
  45. * These RMIDs are guaranteed to have an occupancy less than the
  46. * threshold occupancy
  47. */
  48. static LIST_HEAD(rmid_free_lru);
  49. /*
  50. * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
  51. * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
  52. * Indexed by CLOSID. Protected by rdtgroup_mutex.
  53. */
  54. static u32 *closid_num_dirty_rmid;
  55. /*
  56. * @rmid_limbo_count - count of currently unused but (potentially)
  57. * dirty RMIDs.
  58. * This counts RMIDs that no one is currently using but that
  59. * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
  60. * change the threshold occupancy value.
  61. */
  62. static unsigned int rmid_limbo_count;
  63. /*
  64. * @rmid_entry - The entry in the limbo and free lists.
  65. */
  66. static struct rmid_entry *rmid_ptrs;
  67. /*
  68. * This is the threshold cache occupancy in bytes at which we will consider an
  69. * RMID available for re-allocation.
  70. */
  71. unsigned int resctrl_rmid_realloc_threshold;
  72. /*
  73. * This is the maximum value for the reallocation threshold, in bytes.
  74. */
  75. unsigned int resctrl_rmid_realloc_limit;
  76. /*
  77. * x86 and arm64 differ in their handling of monitoring.
  78. * x86's RMID are independent numbers, there is only one source of traffic
  79. * with an RMID value of '1'.
  80. * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
  81. * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
  82. * value is no longer unique.
  83. * To account for this, resctrl uses an index. On x86 this is just the RMID,
  84. * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
  85. *
  86. * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
  87. * must accept an attempt to read every index.
  88. */
  89. static inline struct rmid_entry *__rmid_entry(u32 idx)
  90. {
  91. struct rmid_entry *entry;
  92. u32 closid, rmid;
  93. entry = &rmid_ptrs[idx];
  94. resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
  95. WARN_ON_ONCE(entry->closid != closid);
  96. WARN_ON_ONCE(entry->rmid != rmid);
  97. return entry;
  98. }
  99. static void limbo_release_entry(struct rmid_entry *entry)
  100. {
  101. lockdep_assert_held(&rdtgroup_mutex);
  102. rmid_limbo_count--;
  103. list_add_tail(&entry->list, &rmid_free_lru);
  104. if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
  105. closid_num_dirty_rmid[entry->closid]--;
  106. }
  107. /*
  108. * Check the RMIDs that are marked as busy for this domain. If the
  109. * reported LLC occupancy is below the threshold clear the busy bit and
  110. * decrement the count. If the busy count gets to zero on an RMID, we
  111. * free the RMID
  112. */
  113. void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
  114. {
  115. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  116. u32 idx_limit = resctrl_arch_system_num_rmid_idx();
  117. struct rmid_entry *entry;
  118. u32 idx, cur_idx = 1;
  119. void *arch_mon_ctx;
  120. void *arch_priv;
  121. bool rmid_dirty;
  122. u64 val = 0;
  123. arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
  124. arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
  125. if (IS_ERR(arch_mon_ctx)) {
  126. pr_warn_ratelimited("Failed to allocate monitor context: %ld",
  127. PTR_ERR(arch_mon_ctx));
  128. return;
  129. }
  130. /*
  131. * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
  132. * are marked as busy for occupancy < threshold. If the occupancy
  133. * is less than the threshold decrement the busy counter of the
  134. * RMID and move it to the free list when the counter reaches 0.
  135. */
  136. for (;;) {
  137. idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
  138. if (idx >= idx_limit)
  139. break;
  140. entry = __rmid_entry(idx);
  141. if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
  142. QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
  143. arch_mon_ctx)) {
  144. rmid_dirty = true;
  145. } else {
  146. rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
  147. /*
  148. * x86's CLOSID and RMID are independent numbers, so the entry's
  149. * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
  150. * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
  151. * used to select the configuration. It is thus necessary to track both
  152. * CLOSID and RMID because there may be dependencies between them
  153. * on some architectures.
  154. */
  155. trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
  156. }
  157. if (force_free || !rmid_dirty) {
  158. clear_bit(idx, d->rmid_busy_llc);
  159. if (!--entry->busy)
  160. limbo_release_entry(entry);
  161. }
  162. cur_idx = idx + 1;
  163. }
  164. resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
  165. }
  166. bool has_busy_rmid(struct rdt_l3_mon_domain *d)
  167. {
  168. u32 idx_limit = resctrl_arch_system_num_rmid_idx();
  169. return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
  170. }
  171. static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
  172. {
  173. struct rmid_entry *itr;
  174. u32 itr_idx, cmp_idx;
  175. if (list_empty(&rmid_free_lru))
  176. return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
  177. list_for_each_entry(itr, &rmid_free_lru, list) {
  178. /*
  179. * Get the index of this free RMID, and the index it would need
  180. * to be if it were used with this CLOSID.
  181. * If the CLOSID is irrelevant on this architecture, the two
  182. * index values are always the same on every entry and thus the
  183. * very first entry will be returned.
  184. */
  185. itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
  186. cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
  187. if (itr_idx == cmp_idx)
  188. return itr;
  189. }
  190. return ERR_PTR(-ENOSPC);
  191. }
  192. /**
  193. * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
  194. * RMID are clean, or the CLOSID that has
  195. * the most clean RMID.
  196. *
  197. * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
  198. * may not be able to allocate clean RMID. To avoid this the allocator will
  199. * choose the CLOSID with the most clean RMID.
  200. *
  201. * When the CLOSID and RMID are independent numbers, the first free CLOSID will
  202. * be returned.
  203. */
  204. int resctrl_find_cleanest_closid(void)
  205. {
  206. u32 cleanest_closid = ~0;
  207. int i = 0;
  208. lockdep_assert_held(&rdtgroup_mutex);
  209. if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
  210. return -EIO;
  211. for (i = 0; i < closids_supported(); i++) {
  212. int num_dirty;
  213. if (closid_allocated(i))
  214. continue;
  215. num_dirty = closid_num_dirty_rmid[i];
  216. if (num_dirty == 0)
  217. return i;
  218. if (cleanest_closid == ~0)
  219. cleanest_closid = i;
  220. if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
  221. cleanest_closid = i;
  222. }
  223. if (cleanest_closid == ~0)
  224. return -ENOSPC;
  225. return cleanest_closid;
  226. }
  227. /*
  228. * For MPAM the RMID value is not unique, and has to be considered with
  229. * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
  230. * allows all domains to be managed by a single free list.
  231. * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
  232. */
  233. int alloc_rmid(u32 closid)
  234. {
  235. struct rmid_entry *entry;
  236. lockdep_assert_held(&rdtgroup_mutex);
  237. entry = resctrl_find_free_rmid(closid);
  238. if (IS_ERR(entry))
  239. return PTR_ERR(entry);
  240. list_del(&entry->list);
  241. return entry->rmid;
  242. }
  243. static void add_rmid_to_limbo(struct rmid_entry *entry)
  244. {
  245. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  246. struct rdt_l3_mon_domain *d;
  247. u32 idx;
  248. lockdep_assert_held(&rdtgroup_mutex);
  249. /* Walking r->domains, ensure it can't race with cpuhp */
  250. lockdep_assert_cpus_held();
  251. idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
  252. entry->busy = 0;
  253. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  254. /*
  255. * For the first limbo RMID in the domain,
  256. * setup up the limbo worker.
  257. */
  258. if (!has_busy_rmid(d))
  259. cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
  260. RESCTRL_PICK_ANY_CPU);
  261. set_bit(idx, d->rmid_busy_llc);
  262. entry->busy++;
  263. }
  264. rmid_limbo_count++;
  265. if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
  266. closid_num_dirty_rmid[entry->closid]++;
  267. }
  268. void free_rmid(u32 closid, u32 rmid)
  269. {
  270. u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
  271. struct rmid_entry *entry;
  272. lockdep_assert_held(&rdtgroup_mutex);
  273. /*
  274. * Do not allow the default rmid to be free'd. Comparing by index
  275. * allows architectures that ignore the closid parameter to avoid an
  276. * unnecessary check.
  277. */
  278. if (!resctrl_arch_mon_capable() ||
  279. idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
  280. RESCTRL_RESERVED_RMID))
  281. return;
  282. entry = __rmid_entry(idx);
  283. if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
  284. add_rmid_to_limbo(entry);
  285. else
  286. list_add_tail(&entry->list, &rmid_free_lru);
  287. }
  288. static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
  289. u32 rmid, enum resctrl_event_id evtid)
  290. {
  291. u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
  292. struct mbm_state *state;
  293. if (!resctrl_is_mbm_event(evtid))
  294. return NULL;
  295. state = d->mbm_states[MBM_STATE_IDX(evtid)];
  296. return state ? &state[idx] : NULL;
  297. }
  298. /*
  299. * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
  300. *
  301. * Return:
  302. * Valid counter ID on success, or -ENOENT on failure.
  303. */
  304. static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  305. struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
  306. {
  307. int cntr_id;
  308. if (!r->mon.mbm_cntr_assignable)
  309. return -ENOENT;
  310. if (!resctrl_is_mbm_event(evtid))
  311. return -ENOENT;
  312. for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
  313. if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
  314. d->cntr_cfg[cntr_id].evtid == evtid)
  315. return cntr_id;
  316. }
  317. return -ENOENT;
  318. }
  319. /*
  320. * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
  321. * Caller must ensure that the specified event is not assigned already.
  322. *
  323. * Return:
  324. * Valid counter ID on success, or -ENOSPC on failure.
  325. */
  326. static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  327. struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
  328. {
  329. int cntr_id;
  330. for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
  331. if (!d->cntr_cfg[cntr_id].rdtgrp) {
  332. d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
  333. d->cntr_cfg[cntr_id].evtid = evtid;
  334. return cntr_id;
  335. }
  336. }
  337. return -ENOSPC;
  338. }
  339. /*
  340. * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
  341. */
  342. static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
  343. {
  344. memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
  345. }
  346. static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
  347. {
  348. int cpu = smp_processor_id();
  349. u32 closid = rdtgrp->closid;
  350. u32 rmid = rdtgrp->mon.rmid;
  351. struct rdt_l3_mon_domain *d;
  352. int cntr_id = -ENOENT;
  353. struct mbm_state *m;
  354. u64 tval = 0;
  355. if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) {
  356. rr->err = -EIO;
  357. return -EINVAL;
  358. }
  359. d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
  360. if (rr->is_mbm_cntr) {
  361. cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
  362. if (cntr_id < 0) {
  363. rr->err = -ENOENT;
  364. return -EINVAL;
  365. }
  366. }
  367. if (rr->first) {
  368. if (rr->is_mbm_cntr)
  369. resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
  370. else
  371. resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
  372. m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
  373. if (m)
  374. memset(m, 0, sizeof(struct mbm_state));
  375. return 0;
  376. }
  377. /* Reading a single domain, must be on a CPU in that domain. */
  378. if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
  379. return -EINVAL;
  380. if (rr->is_mbm_cntr)
  381. rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
  382. rr->evt->evtid, &tval);
  383. else
  384. rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
  385. rr->evt->evtid, rr->evt->arch_priv,
  386. &tval, rr->arch_mon_ctx);
  387. if (rr->err)
  388. return rr->err;
  389. rr->val += tval;
  390. return 0;
  391. }
  392. static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
  393. {
  394. int cpu = smp_processor_id();
  395. u32 closid = rdtgrp->closid;
  396. u32 rmid = rdtgrp->mon.rmid;
  397. struct rdt_l3_mon_domain *d;
  398. u64 tval = 0;
  399. int err, ret;
  400. /*
  401. * Summing across domains is only done for systems that implement
  402. * Sub-NUMA Cluster. There is no overlap with systems that support
  403. * assignable counters.
  404. */
  405. if (rr->is_mbm_cntr) {
  406. pr_warn_once("Summing domains using assignable counters is not supported\n");
  407. rr->err = -EINVAL;
  408. return -EINVAL;
  409. }
  410. /* Summing domains that share a cache, must be on a CPU for that cache. */
  411. if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
  412. return -EINVAL;
  413. /*
  414. * Legacy files must report the sum of an event across all
  415. * domains that share the same L3 cache instance.
  416. * Report success if a read from any domain succeeds, -EINVAL
  417. * (translated to "Unavailable" for user space) if reading from
  418. * all domains fail for any reason.
  419. */
  420. ret = -EINVAL;
  421. list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
  422. if (d->ci_id != rr->ci->id)
  423. continue;
  424. err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
  425. rr->evt->evtid, rr->evt->arch_priv,
  426. &tval, rr->arch_mon_ctx);
  427. if (!err) {
  428. rr->val += tval;
  429. ret = 0;
  430. }
  431. }
  432. if (ret)
  433. rr->err = ret;
  434. return ret;
  435. }
  436. static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
  437. {
  438. switch (rr->r->rid) {
  439. case RDT_RESOURCE_L3:
  440. WARN_ON_ONCE(rr->evt->any_cpu);
  441. if (rr->hdr)
  442. return __l3_mon_event_count(rdtgrp, rr);
  443. else
  444. return __l3_mon_event_count_sum(rdtgrp, rr);
  445. case RDT_RESOURCE_PERF_PKG: {
  446. u64 tval = 0;
  447. rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
  448. rdtgrp->mon.rmid, rr->evt->evtid,
  449. rr->evt->arch_priv,
  450. &tval, rr->arch_mon_ctx);
  451. if (rr->err)
  452. return rr->err;
  453. rr->val += tval;
  454. return 0;
  455. }
  456. default:
  457. rr->err = -EINVAL;
  458. return -EINVAL;
  459. }
  460. }
  461. /*
  462. * mbm_bw_count() - Update bw count from values previously read by
  463. * __mon_event_count().
  464. * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify
  465. * the cached mbm_state.
  466. * @rr: The struct rmid_read populated by __mon_event_count().
  467. *
  468. * Supporting function to calculate the memory bandwidth
  469. * and delta bandwidth in MBps. The chunks value previously read by
  470. * __mon_event_count() is compared with the chunks value from the previous
  471. * invocation. This must be called once per second to maintain values in MBps.
  472. */
  473. static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
  474. {
  475. u64 cur_bw, bytes, cur_bytes;
  476. u32 closid = rdtgrp->closid;
  477. u32 rmid = rdtgrp->mon.rmid;
  478. struct rdt_l3_mon_domain *d;
  479. struct mbm_state *m;
  480. if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
  481. return;
  482. d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
  483. m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
  484. if (WARN_ON_ONCE(!m))
  485. return;
  486. cur_bytes = rr->val;
  487. bytes = cur_bytes - m->prev_bw_bytes;
  488. m->prev_bw_bytes = cur_bytes;
  489. cur_bw = bytes / SZ_1M;
  490. m->prev_bw = cur_bw;
  491. }
  492. /*
  493. * This is scheduled by mon_event_read() to read the CQM/MBM counters
  494. * on a domain.
  495. */
  496. void mon_event_count(void *info)
  497. {
  498. struct rdtgroup *rdtgrp, *entry;
  499. struct rmid_read *rr = info;
  500. struct list_head *head;
  501. int ret;
  502. rdtgrp = rr->rgrp;
  503. ret = __mon_event_count(rdtgrp, rr);
  504. /*
  505. * For Ctrl groups read data from child monitor groups and
  506. * add them together. Count events which are read successfully.
  507. * Discard the rmid_read's reporting errors.
  508. */
  509. head = &rdtgrp->mon.crdtgrp_list;
  510. if (rdtgrp->type == RDTCTRL_GROUP) {
  511. list_for_each_entry(entry, head, mon.crdtgrp_list) {
  512. if (__mon_event_count(entry, rr) == 0)
  513. ret = 0;
  514. }
  515. }
  516. /*
  517. * __mon_event_count() calls for newly created monitor groups may
  518. * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
  519. * Discard error if any of the monitor event reads succeeded.
  520. */
  521. if (ret == 0)
  522. rr->err = 0;
  523. }
  524. static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
  525. struct rdt_resource *r)
  526. {
  527. struct rdt_ctrl_domain *d;
  528. lockdep_assert_cpus_held();
  529. list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
  530. /* Find the domain that contains this CPU */
  531. if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
  532. return d;
  533. }
  534. return NULL;
  535. }
  536. /*
  537. * Feedback loop for MBA software controller (mba_sc)
  538. *
  539. * mba_sc is a feedback loop where we periodically read MBM counters and
  540. * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
  541. * that:
  542. *
  543. * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
  544. *
  545. * This uses the MBM counters to measure the bandwidth and MBA throttle
  546. * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
  547. * fact that resctrl rdtgroups have both monitoring and control.
  548. *
  549. * The frequency of the checks is 1s and we just tag along the MBM overflow
  550. * timer. Having 1s interval makes the calculation of bandwidth simpler.
  551. *
  552. * Although MBA's goal is to restrict the bandwidth to a maximum, there may
  553. * be a need to increase the bandwidth to avoid unnecessarily restricting
  554. * the L2 <-> L3 traffic.
  555. *
  556. * Since MBA controls the L2 external bandwidth where as MBM measures the
  557. * L3 external bandwidth the following sequence could lead to such a
  558. * situation.
  559. *
  560. * Consider an rdtgroup which had high L3 <-> memory traffic in initial
  561. * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
  562. * after some time rdtgroup has mostly L2 <-> L3 traffic.
  563. *
  564. * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
  565. * throttle MSRs already have low percentage values. To avoid
  566. * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
  567. */
  568. static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
  569. {
  570. u32 closid, rmid, cur_msr_val, new_msr_val;
  571. struct mbm_state *pmbm_data, *cmbm_data;
  572. struct rdt_ctrl_domain *dom_mba;
  573. enum resctrl_event_id evt_id;
  574. struct rdt_resource *r_mba;
  575. struct list_head *head;
  576. struct rdtgroup *entry;
  577. u32 cur_bw, user_bw;
  578. r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
  579. evt_id = rgrp->mba_mbps_event;
  580. closid = rgrp->closid;
  581. rmid = rgrp->mon.rmid;
  582. pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
  583. if (WARN_ON_ONCE(!pmbm_data))
  584. return;
  585. dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
  586. if (!dom_mba) {
  587. pr_warn_once("Failure to get domain for MBA update\n");
  588. return;
  589. }
  590. cur_bw = pmbm_data->prev_bw;
  591. user_bw = dom_mba->mbps_val[closid];
  592. /* MBA resource doesn't support CDP */
  593. cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
  594. /*
  595. * For Ctrl groups read data from child monitor groups.
  596. */
  597. head = &rgrp->mon.crdtgrp_list;
  598. list_for_each_entry(entry, head, mon.crdtgrp_list) {
  599. cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
  600. if (WARN_ON_ONCE(!cmbm_data))
  601. return;
  602. cur_bw += cmbm_data->prev_bw;
  603. }
  604. /*
  605. * Scale up/down the bandwidth linearly for the ctrl group. The
  606. * bandwidth step is the bandwidth granularity specified by the
  607. * hardware.
  608. * Always increase throttling if current bandwidth is above the
  609. * target set by user.
  610. * But avoid thrashing up and down on every poll by checking
  611. * whether a decrease in throttling is likely to push the group
  612. * back over target. E.g. if currently throttling to 30% of bandwidth
  613. * on a system with 10% granularity steps, check whether moving to
  614. * 40% would go past the limit by multiplying current bandwidth by
  615. * "(30 + 10) / 30".
  616. */
  617. if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
  618. new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
  619. } else if (cur_msr_val < MAX_MBA_BW &&
  620. (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
  621. new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
  622. } else {
  623. return;
  624. }
  625. resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
  626. }
  627. static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  628. struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
  629. {
  630. struct rmid_read rr = {0};
  631. rr.r = r;
  632. rr.hdr = &d->hdr;
  633. rr.evt = &mon_event_all[evtid];
  634. if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
  635. rr.is_mbm_cntr = true;
  636. } else {
  637. rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
  638. if (IS_ERR(rr.arch_mon_ctx)) {
  639. pr_warn_ratelimited("Failed to allocate monitor context: %ld",
  640. PTR_ERR(rr.arch_mon_ctx));
  641. return;
  642. }
  643. }
  644. __mon_event_count(rdtgrp, &rr);
  645. /*
  646. * If the software controller is enabled, compute the
  647. * bandwidth for this event id.
  648. */
  649. if (is_mba_sc(NULL))
  650. mbm_bw_count(rdtgrp, &rr);
  651. if (rr.arch_mon_ctx)
  652. resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
  653. }
  654. static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  655. struct rdtgroup *rdtgrp)
  656. {
  657. /*
  658. * This is protected from concurrent reads from user as both
  659. * the user and overflow handler hold the global mutex.
  660. */
  661. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  662. mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
  663. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  664. mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
  665. }
  666. /*
  667. * Handler to scan the limbo list and move the RMIDs
  668. * to free list whose occupancy < threshold_occupancy.
  669. */
  670. void cqm_handle_limbo(struct work_struct *work)
  671. {
  672. unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
  673. struct rdt_l3_mon_domain *d;
  674. cpus_read_lock();
  675. mutex_lock(&rdtgroup_mutex);
  676. d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
  677. __check_limbo(d, false);
  678. if (has_busy_rmid(d)) {
  679. d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
  680. RESCTRL_PICK_ANY_CPU);
  681. schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
  682. delay);
  683. }
  684. mutex_unlock(&rdtgroup_mutex);
  685. cpus_read_unlock();
  686. }
  687. /**
  688. * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
  689. * domain.
  690. * @dom: The domain the limbo handler should run for.
  691. * @delay_ms: How far in the future the handler should run.
  692. * @exclude_cpu: Which CPU the handler should not run on,
  693. * RESCTRL_PICK_ANY_CPU to pick any CPU.
  694. */
  695. void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
  696. int exclude_cpu)
  697. {
  698. unsigned long delay = msecs_to_jiffies(delay_ms);
  699. int cpu;
  700. cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
  701. dom->cqm_work_cpu = cpu;
  702. if (cpu < nr_cpu_ids)
  703. schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
  704. }
  705. void mbm_handle_overflow(struct work_struct *work)
  706. {
  707. unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
  708. struct rdtgroup *prgrp, *crgrp;
  709. struct rdt_l3_mon_domain *d;
  710. struct list_head *head;
  711. struct rdt_resource *r;
  712. cpus_read_lock();
  713. mutex_lock(&rdtgroup_mutex);
  714. /*
  715. * If the filesystem has been unmounted this work no longer needs to
  716. * run.
  717. */
  718. if (!resctrl_mounted || !resctrl_arch_mon_capable())
  719. goto out_unlock;
  720. r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  721. d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
  722. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  723. mbm_update(r, d, prgrp);
  724. head = &prgrp->mon.crdtgrp_list;
  725. list_for_each_entry(crgrp, head, mon.crdtgrp_list)
  726. mbm_update(r, d, crgrp);
  727. if (is_mba_sc(NULL))
  728. update_mba_bw(prgrp, d);
  729. }
  730. /*
  731. * Re-check for housekeeping CPUs. This allows the overflow handler to
  732. * move off a nohz_full CPU quickly.
  733. */
  734. d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
  735. RESCTRL_PICK_ANY_CPU);
  736. schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
  737. out_unlock:
  738. mutex_unlock(&rdtgroup_mutex);
  739. cpus_read_unlock();
  740. }
  741. /**
  742. * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
  743. * domain.
  744. * @dom: The domain the overflow handler should run for.
  745. * @delay_ms: How far in the future the handler should run.
  746. * @exclude_cpu: Which CPU the handler should not run on,
  747. * RESCTRL_PICK_ANY_CPU to pick any CPU.
  748. */
  749. void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
  750. int exclude_cpu)
  751. {
  752. unsigned long delay = msecs_to_jiffies(delay_ms);
  753. int cpu;
  754. /*
  755. * When a domain comes online there is no guarantee the filesystem is
  756. * mounted. If not, there is no need to catch counter overflow.
  757. */
  758. if (!resctrl_mounted || !resctrl_arch_mon_capable())
  759. return;
  760. cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
  761. dom->mbm_work_cpu = cpu;
  762. if (cpu < nr_cpu_ids)
  763. schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
  764. }
  765. int setup_rmid_lru_list(void)
  766. {
  767. struct rmid_entry *entry = NULL;
  768. u32 idx_limit;
  769. u32 idx;
  770. int i;
  771. if (!resctrl_arch_mon_capable())
  772. return 0;
  773. /*
  774. * Called on every mount, but the number of RMIDs cannot change
  775. * after the first mount, so keep using the same set of rmid_ptrs[]
  776. * until resctrl_exit(). Note that the limbo handler continues to
  777. * access rmid_ptrs[] after resctrl is unmounted.
  778. */
  779. if (rmid_ptrs)
  780. return 0;
  781. idx_limit = resctrl_arch_system_num_rmid_idx();
  782. rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
  783. if (!rmid_ptrs)
  784. return -ENOMEM;
  785. for (i = 0; i < idx_limit; i++) {
  786. entry = &rmid_ptrs[i];
  787. INIT_LIST_HEAD(&entry->list);
  788. resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
  789. list_add_tail(&entry->list, &rmid_free_lru);
  790. }
  791. /*
  792. * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
  793. * are always allocated. These are used for the rdtgroup_default
  794. * control group, which was setup earlier in rdtgroup_setup_default().
  795. */
  796. idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
  797. RESCTRL_RESERVED_RMID);
  798. entry = __rmid_entry(idx);
  799. list_del(&entry->list);
  800. return 0;
  801. }
  802. void free_rmid_lru_list(void)
  803. {
  804. if (!resctrl_arch_mon_capable())
  805. return;
  806. mutex_lock(&rdtgroup_mutex);
  807. kfree(rmid_ptrs);
  808. rmid_ptrs = NULL;
  809. mutex_unlock(&rdtgroup_mutex);
  810. }
  811. #define MON_EVENT(_eventid, _name, _res, _fp) \
  812. [_eventid] = { \
  813. .name = _name, \
  814. .evtid = _eventid, \
  815. .rid = _res, \
  816. .is_floating_point = _fp, \
  817. }
  818. /*
  819. * All available events. Architecture code marks the ones that
  820. * are supported by a system using resctrl_enable_mon_event()
  821. * to set .enabled.
  822. */
  823. struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
  824. MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false),
  825. MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false),
  826. MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false),
  827. MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true),
  828. MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true),
  829. MON_EVENT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", RDT_RESOURCE_PERF_PKG, false),
  830. MON_EVENT(PMT_EVENT_C1_RES, "c1_res", RDT_RESOURCE_PERF_PKG, false),
  831. MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", RDT_RESOURCE_PERF_PKG, false),
  832. MON_EVENT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", RDT_RESOURCE_PERF_PKG, false),
  833. MON_EVENT(PMT_EVENT_AUTO_C6_RES, "c6_res", RDT_RESOURCE_PERF_PKG, false),
  834. MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", RDT_RESOURCE_PERF_PKG, false),
  835. MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false),
  836. };
  837. bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
  838. unsigned int binary_bits, void *arch_priv)
  839. {
  840. if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
  841. binary_bits > MAX_BINARY_BITS))
  842. return false;
  843. if (mon_event_all[eventid].enabled) {
  844. pr_warn("Duplicate enable for event %d\n", eventid);
  845. return false;
  846. }
  847. if (binary_bits && !mon_event_all[eventid].is_floating_point) {
  848. pr_warn("Event %d may not be floating point\n", eventid);
  849. return false;
  850. }
  851. mon_event_all[eventid].any_cpu = any_cpu;
  852. mon_event_all[eventid].binary_bits = binary_bits;
  853. mon_event_all[eventid].arch_priv = arch_priv;
  854. mon_event_all[eventid].enabled = true;
  855. return true;
  856. }
  857. bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
  858. {
  859. return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
  860. mon_event_all[eventid].enabled;
  861. }
  862. u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
  863. {
  864. return mon_event_all[evtid].evt_cfg;
  865. }
  866. /**
  867. * struct mbm_transaction - Memory transaction an MBM event can be configured with.
  868. * @name: Name of memory transaction (read, write ...).
  869. * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
  870. * represent the memory transaction within an event's configuration.
  871. */
  872. struct mbm_transaction {
  873. char name[32];
  874. u32 val;
  875. };
  876. /* Decoded values for each type of memory transaction. */
  877. static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
  878. {"local_reads", READS_TO_LOCAL_MEM},
  879. {"remote_reads", READS_TO_REMOTE_MEM},
  880. {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
  881. {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
  882. {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
  883. {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
  884. {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
  885. };
  886. int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
  887. {
  888. struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
  889. struct rdt_resource *r;
  890. bool sep = false;
  891. int ret = 0, i;
  892. mutex_lock(&rdtgroup_mutex);
  893. rdt_last_cmd_clear();
  894. r = resctrl_arch_get_resource(mevt->rid);
  895. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  896. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  897. ret = -EINVAL;
  898. goto out_unlock;
  899. }
  900. for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
  901. if (mevt->evt_cfg & mbm_transactions[i].val) {
  902. if (sep)
  903. seq_putc(seq, ',');
  904. seq_printf(seq, "%s", mbm_transactions[i].name);
  905. sep = true;
  906. }
  907. }
  908. seq_putc(seq, '\n');
  909. out_unlock:
  910. mutex_unlock(&rdtgroup_mutex);
  911. return ret;
  912. }
  913. int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
  914. void *v)
  915. {
  916. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  917. int ret = 0;
  918. mutex_lock(&rdtgroup_mutex);
  919. rdt_last_cmd_clear();
  920. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  921. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  922. ret = -EINVAL;
  923. goto out_unlock;
  924. }
  925. seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
  926. out_unlock:
  927. mutex_unlock(&rdtgroup_mutex);
  928. return ret;
  929. }
  930. ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
  931. size_t nbytes, loff_t off)
  932. {
  933. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  934. bool value;
  935. int ret;
  936. ret = kstrtobool(buf, &value);
  937. if (ret)
  938. return ret;
  939. mutex_lock(&rdtgroup_mutex);
  940. rdt_last_cmd_clear();
  941. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  942. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  943. ret = -EINVAL;
  944. goto out_unlock;
  945. }
  946. r->mon.mbm_assign_on_mkdir = value;
  947. out_unlock:
  948. mutex_unlock(&rdtgroup_mutex);
  949. return ret ?: nbytes;
  950. }
  951. /*
  952. * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
  953. * domain @d. Called when mbm_assign_mode is changed.
  954. */
  955. static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
  956. {
  957. memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
  958. }
  959. /*
  960. * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
  961. * supported RMIDs.
  962. */
  963. static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
  964. {
  965. u32 idx_limit = resctrl_arch_system_num_rmid_idx();
  966. enum resctrl_event_id evt;
  967. int idx;
  968. for_each_mbm_event_id(evt) {
  969. if (!resctrl_is_mon_event_enabled(evt))
  970. continue;
  971. idx = MBM_STATE_IDX(evt);
  972. memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
  973. }
  974. }
  975. /*
  976. * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
  977. * pair in the domain.
  978. *
  979. * Assign the counter if @assign is true else unassign the counter. Reset the
  980. * associated non-architectural state.
  981. */
  982. static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  983. enum resctrl_event_id evtid, u32 rmid, u32 closid,
  984. u32 cntr_id, bool assign)
  985. {
  986. struct mbm_state *m;
  987. resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
  988. m = get_mbm_state(d, closid, rmid, evtid);
  989. if (m)
  990. memset(m, 0, sizeof(*m));
  991. }
  992. /*
  993. * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
  994. * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
  995. *
  996. * Return:
  997. * 0 on success, < 0 on failure.
  998. */
  999. static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  1000. struct rdtgroup *rdtgrp, struct mon_evt *mevt)
  1001. {
  1002. int cntr_id;
  1003. /* No action required if the counter is assigned already. */
  1004. cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
  1005. if (cntr_id >= 0)
  1006. return 0;
  1007. cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
  1008. if (cntr_id < 0) {
  1009. rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
  1010. mevt->name, d->hdr.id);
  1011. return cntr_id;
  1012. }
  1013. rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
  1014. return 0;
  1015. }
  1016. /*
  1017. * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
  1018. * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
  1019. * NULL; otherwise, assign the counter to the specified domain @d.
  1020. *
  1021. * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
  1022. * will fail. The assignment process will abort at the first failure encountered
  1023. * during domain traversal, which may result in the event being only partially
  1024. * assigned.
  1025. *
  1026. * Return:
  1027. * 0 on success, < 0 on failure.
  1028. */
  1029. static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
  1030. struct mon_evt *mevt)
  1031. {
  1032. struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
  1033. int ret = 0;
  1034. if (!d) {
  1035. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1036. ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
  1037. if (ret)
  1038. return ret;
  1039. }
  1040. } else {
  1041. ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
  1042. }
  1043. return ret;
  1044. }
  1045. /*
  1046. * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
  1047. * a new group is created.
  1048. *
  1049. * Each group can accommodate two counters per domain: one for the total
  1050. * event and one for the local event. Assignments may fail due to the limited
  1051. * number of counters. However, it is not necessary to fail the group creation
  1052. * and thus no failure is returned. Users have the option to modify the
  1053. * counter assignments after the group has been created.
  1054. */
  1055. void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
  1056. {
  1057. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1058. if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
  1059. !r->mon.mbm_assign_on_mkdir)
  1060. return;
  1061. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  1062. rdtgroup_assign_cntr_event(NULL, rdtgrp,
  1063. &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
  1064. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  1065. rdtgroup_assign_cntr_event(NULL, rdtgrp,
  1066. &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
  1067. }
  1068. /*
  1069. * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
  1070. * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
  1071. */
  1072. static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
  1073. struct rdtgroup *rdtgrp, struct mon_evt *mevt)
  1074. {
  1075. int cntr_id;
  1076. cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
  1077. /* If there is no cntr_id assigned, nothing to do */
  1078. if (cntr_id < 0)
  1079. return;
  1080. rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
  1081. mbm_cntr_free(d, cntr_id);
  1082. }
  1083. /*
  1084. * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
  1085. * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
  1086. * the counters from all the domains if @d is NULL else unassign from @d.
  1087. */
  1088. static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
  1089. struct mon_evt *mevt)
  1090. {
  1091. struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
  1092. if (!d) {
  1093. list_for_each_entry(d, &r->mon_domains, hdr.list)
  1094. rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
  1095. } else {
  1096. rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
  1097. }
  1098. }
  1099. /*
  1100. * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
  1101. * Called when a group is deleted.
  1102. */
  1103. void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
  1104. {
  1105. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1106. if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
  1107. return;
  1108. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  1109. rdtgroup_unassign_cntr_event(NULL, rdtgrp,
  1110. &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
  1111. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  1112. rdtgroup_unassign_cntr_event(NULL, rdtgrp,
  1113. &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
  1114. }
  1115. static int resctrl_parse_mem_transactions(char *tok, u32 *val)
  1116. {
  1117. u32 temp_val = 0;
  1118. char *evt_str;
  1119. bool found;
  1120. int i;
  1121. next_config:
  1122. if (!tok || tok[0] == '\0') {
  1123. *val = temp_val;
  1124. return 0;
  1125. }
  1126. /* Start processing the strings for each memory transaction type */
  1127. evt_str = strim(strsep(&tok, ","));
  1128. found = false;
  1129. for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
  1130. if (!strcmp(mbm_transactions[i].name, evt_str)) {
  1131. temp_val |= mbm_transactions[i].val;
  1132. found = true;
  1133. break;
  1134. }
  1135. }
  1136. if (!found) {
  1137. rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
  1138. return -EINVAL;
  1139. }
  1140. goto next_config;
  1141. }
  1142. /*
  1143. * rdtgroup_update_cntr_event - Update the counter assignments for the event
  1144. * in a group.
  1145. * @r: Resource to which update needs to be done.
  1146. * @rdtgrp: Resctrl group.
  1147. * @evtid: MBM monitor event.
  1148. */
  1149. static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
  1150. enum resctrl_event_id evtid)
  1151. {
  1152. struct rdt_l3_mon_domain *d;
  1153. int cntr_id;
  1154. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1155. cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
  1156. if (cntr_id >= 0)
  1157. rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
  1158. rdtgrp->closid, cntr_id, true);
  1159. }
  1160. }
  1161. /*
  1162. * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
  1163. * for all the groups.
  1164. * @mevt MBM Monitor event.
  1165. */
  1166. static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
  1167. {
  1168. struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
  1169. struct rdtgroup *prgrp, *crgrp;
  1170. /*
  1171. * Find all the groups where the event is assigned and update the
  1172. * configuration of existing assignments.
  1173. */
  1174. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  1175. rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
  1176. list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
  1177. rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
  1178. }
  1179. }
  1180. ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
  1181. loff_t off)
  1182. {
  1183. struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
  1184. struct rdt_resource *r;
  1185. u32 evt_cfg = 0;
  1186. int ret = 0;
  1187. /* Valid input requires a trailing newline */
  1188. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1189. return -EINVAL;
  1190. buf[nbytes - 1] = '\0';
  1191. cpus_read_lock();
  1192. mutex_lock(&rdtgroup_mutex);
  1193. rdt_last_cmd_clear();
  1194. r = resctrl_arch_get_resource(mevt->rid);
  1195. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  1196. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  1197. ret = -EINVAL;
  1198. goto out_unlock;
  1199. }
  1200. ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
  1201. if (!ret && mevt->evt_cfg != evt_cfg) {
  1202. mevt->evt_cfg = evt_cfg;
  1203. resctrl_update_cntr_allrdtgrp(mevt);
  1204. }
  1205. out_unlock:
  1206. mutex_unlock(&rdtgroup_mutex);
  1207. cpus_read_unlock();
  1208. return ret ?: nbytes;
  1209. }
  1210. int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
  1211. struct seq_file *s, void *v)
  1212. {
  1213. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1214. bool enabled;
  1215. mutex_lock(&rdtgroup_mutex);
  1216. enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
  1217. if (r->mon.mbm_cntr_assignable) {
  1218. if (enabled)
  1219. seq_puts(s, "[mbm_event]\n");
  1220. else
  1221. seq_puts(s, "[default]\n");
  1222. if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
  1223. if (enabled)
  1224. seq_puts(s, "default\n");
  1225. else
  1226. seq_puts(s, "mbm_event\n");
  1227. }
  1228. } else {
  1229. seq_puts(s, "[default]\n");
  1230. }
  1231. mutex_unlock(&rdtgroup_mutex);
  1232. return 0;
  1233. }
  1234. ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
  1235. size_t nbytes, loff_t off)
  1236. {
  1237. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1238. struct rdt_l3_mon_domain *d;
  1239. int ret = 0;
  1240. bool enable;
  1241. /* Valid input requires a trailing newline */
  1242. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1243. return -EINVAL;
  1244. buf[nbytes - 1] = '\0';
  1245. cpus_read_lock();
  1246. mutex_lock(&rdtgroup_mutex);
  1247. rdt_last_cmd_clear();
  1248. if (!strcmp(buf, "default")) {
  1249. enable = 0;
  1250. } else if (!strcmp(buf, "mbm_event")) {
  1251. if (r->mon.mbm_cntr_assignable) {
  1252. enable = 1;
  1253. } else {
  1254. ret = -EINVAL;
  1255. rdt_last_cmd_puts("mbm_event mode is not supported\n");
  1256. goto out_unlock;
  1257. }
  1258. } else {
  1259. ret = -EINVAL;
  1260. rdt_last_cmd_puts("Unsupported assign mode\n");
  1261. goto out_unlock;
  1262. }
  1263. if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
  1264. ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
  1265. if (ret)
  1266. goto out_unlock;
  1267. /* Update the visibility of BMEC related files */
  1268. resctrl_bmec_files_show(r, NULL, !enable);
  1269. /*
  1270. * Initialize the default memory transaction values for
  1271. * total and local events.
  1272. */
  1273. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  1274. mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
  1275. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  1276. mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
  1277. (READS_TO_LOCAL_MEM |
  1278. READS_TO_LOCAL_S_MEM |
  1279. NON_TEMP_WRITE_TO_LOCAL_MEM);
  1280. /* Enable auto assignment when switching to "mbm_event" mode */
  1281. if (enable)
  1282. r->mon.mbm_assign_on_mkdir = true;
  1283. /*
  1284. * Reset all the non-achitectural RMID state and assignable counters.
  1285. */
  1286. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1287. mbm_cntr_free_all(r, d);
  1288. resctrl_reset_rmid_all(r, d);
  1289. }
  1290. }
  1291. out_unlock:
  1292. mutex_unlock(&rdtgroup_mutex);
  1293. cpus_read_unlock();
  1294. return ret ?: nbytes;
  1295. }
  1296. int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
  1297. struct seq_file *s, void *v)
  1298. {
  1299. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1300. struct rdt_l3_mon_domain *dom;
  1301. bool sep = false;
  1302. cpus_read_lock();
  1303. mutex_lock(&rdtgroup_mutex);
  1304. list_for_each_entry(dom, &r->mon_domains, hdr.list) {
  1305. if (sep)
  1306. seq_putc(s, ';');
  1307. seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
  1308. sep = true;
  1309. }
  1310. seq_putc(s, '\n');
  1311. mutex_unlock(&rdtgroup_mutex);
  1312. cpus_read_unlock();
  1313. return 0;
  1314. }
  1315. int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
  1316. struct seq_file *s, void *v)
  1317. {
  1318. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1319. struct rdt_l3_mon_domain *dom;
  1320. bool sep = false;
  1321. u32 cntrs, i;
  1322. int ret = 0;
  1323. cpus_read_lock();
  1324. mutex_lock(&rdtgroup_mutex);
  1325. rdt_last_cmd_clear();
  1326. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  1327. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  1328. ret = -EINVAL;
  1329. goto out_unlock;
  1330. }
  1331. list_for_each_entry(dom, &r->mon_domains, hdr.list) {
  1332. if (sep)
  1333. seq_putc(s, ';');
  1334. cntrs = 0;
  1335. for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
  1336. if (!dom->cntr_cfg[i].rdtgrp)
  1337. cntrs++;
  1338. }
  1339. seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
  1340. sep = true;
  1341. }
  1342. seq_putc(s, '\n');
  1343. out_unlock:
  1344. mutex_unlock(&rdtgroup_mutex);
  1345. cpus_read_unlock();
  1346. return ret;
  1347. }
  1348. int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
  1349. {
  1350. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1351. struct rdt_l3_mon_domain *d;
  1352. struct rdtgroup *rdtgrp;
  1353. struct mon_evt *mevt;
  1354. int ret = 0;
  1355. bool sep;
  1356. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1357. if (!rdtgrp) {
  1358. ret = -ENOENT;
  1359. goto out_unlock;
  1360. }
  1361. rdt_last_cmd_clear();
  1362. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  1363. rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
  1364. ret = -EINVAL;
  1365. goto out_unlock;
  1366. }
  1367. for_each_mon_event(mevt) {
  1368. if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
  1369. continue;
  1370. sep = false;
  1371. seq_printf(s, "%s:", mevt->name);
  1372. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1373. if (sep)
  1374. seq_putc(s, ';');
  1375. if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
  1376. seq_printf(s, "%d=_", d->hdr.id);
  1377. else
  1378. seq_printf(s, "%d=e", d->hdr.id);
  1379. sep = true;
  1380. }
  1381. seq_putc(s, '\n');
  1382. }
  1383. out_unlock:
  1384. rdtgroup_kn_unlock(of->kn);
  1385. return ret;
  1386. }
  1387. /*
  1388. * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
  1389. * event name.
  1390. */
  1391. static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
  1392. {
  1393. struct mon_evt *mevt;
  1394. for_each_mon_event(mevt) {
  1395. if (mevt->rid == r->rid && mevt->enabled &&
  1396. resctrl_is_mbm_event(mevt->evtid) &&
  1397. !strcmp(mevt->name, name))
  1398. return mevt;
  1399. }
  1400. return NULL;
  1401. }
  1402. static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
  1403. struct rdtgroup *rdtgrp, struct mon_evt *mevt)
  1404. {
  1405. int ret = 0;
  1406. if (!assign || strlen(assign) != 1)
  1407. return -EINVAL;
  1408. switch (*assign) {
  1409. case 'e':
  1410. ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
  1411. break;
  1412. case '_':
  1413. rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
  1414. break;
  1415. default:
  1416. ret = -EINVAL;
  1417. break;
  1418. }
  1419. return ret;
  1420. }
  1421. static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
  1422. char *event, char *tok)
  1423. {
  1424. struct rdt_l3_mon_domain *d;
  1425. unsigned long dom_id = 0;
  1426. char *dom_str, *id_str;
  1427. struct mon_evt *mevt;
  1428. int ret;
  1429. mevt = mbm_get_mon_event_by_name(r, event);
  1430. if (!mevt) {
  1431. rdt_last_cmd_printf("Invalid event %s\n", event);
  1432. return -ENOENT;
  1433. }
  1434. next:
  1435. if (!tok || tok[0] == '\0')
  1436. return 0;
  1437. /* Start processing the strings for each domain */
  1438. dom_str = strim(strsep(&tok, ";"));
  1439. id_str = strsep(&dom_str, "=");
  1440. /* Check for domain id '*' which means all domains */
  1441. if (id_str && *id_str == '*') {
  1442. ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
  1443. if (ret)
  1444. rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
  1445. event, dom_str);
  1446. return ret;
  1447. } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
  1448. rdt_last_cmd_puts("Missing domain id\n");
  1449. return -EINVAL;
  1450. }
  1451. /* Verify if the dom_id is valid */
  1452. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1453. if (d->hdr.id == dom_id) {
  1454. ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
  1455. if (ret) {
  1456. rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
  1457. event, dom_id, dom_str);
  1458. return ret;
  1459. }
  1460. goto next;
  1461. }
  1462. }
  1463. rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
  1464. return -EINVAL;
  1465. }
  1466. ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
  1467. size_t nbytes, loff_t off)
  1468. {
  1469. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1470. struct rdtgroup *rdtgrp;
  1471. char *token, *event;
  1472. int ret = 0;
  1473. /* Valid input requires a trailing newline */
  1474. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1475. return -EINVAL;
  1476. buf[nbytes - 1] = '\0';
  1477. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1478. if (!rdtgrp) {
  1479. rdtgroup_kn_unlock(of->kn);
  1480. return -ENOENT;
  1481. }
  1482. rdt_last_cmd_clear();
  1483. if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
  1484. rdt_last_cmd_puts("mbm_event mode is not enabled\n");
  1485. rdtgroup_kn_unlock(of->kn);
  1486. return -EINVAL;
  1487. }
  1488. while ((token = strsep(&buf, "\n")) != NULL) {
  1489. /*
  1490. * The write command follows the following format:
  1491. * "<Event>:<Domain ID>=<Assignment state>"
  1492. * Extract the event name first.
  1493. */
  1494. event = strsep(&token, ":");
  1495. ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
  1496. if (ret)
  1497. break;
  1498. }
  1499. rdtgroup_kn_unlock(of->kn);
  1500. return ret ?: nbytes;
  1501. }
  1502. static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
  1503. {
  1504. if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
  1505. u32 num_closid = resctrl_arch_get_num_closid(r);
  1506. u32 *tmp;
  1507. /* For ARM memory ordering access to closid_num_dirty_rmid */
  1508. mutex_lock(&rdtgroup_mutex);
  1509. /*
  1510. * If the architecture hasn't provided a sanitised value here,
  1511. * this may result in larger arrays than necessary. Resctrl will
  1512. * use a smaller system wide value based on the resources in
  1513. * use.
  1514. */
  1515. tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
  1516. if (!tmp) {
  1517. mutex_unlock(&rdtgroup_mutex);
  1518. return -ENOMEM;
  1519. }
  1520. closid_num_dirty_rmid = tmp;
  1521. mutex_unlock(&rdtgroup_mutex);
  1522. }
  1523. return 0;
  1524. }
  1525. static void closid_num_dirty_rmid_free(void)
  1526. {
  1527. if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
  1528. mutex_lock(&rdtgroup_mutex);
  1529. kfree(closid_num_dirty_rmid);
  1530. closid_num_dirty_rmid = NULL;
  1531. mutex_unlock(&rdtgroup_mutex);
  1532. }
  1533. }
  1534. /**
  1535. * resctrl_l3_mon_resource_init() - Initialise global monitoring structures.
  1536. *
  1537. * Allocate and initialise global monitor resources that do not belong to a
  1538. * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
  1539. * with the cleanest set of RMIDs.
  1540. * Called once during boot after the struct rdt_resource's have been configured
  1541. * but before the filesystem is mounted.
  1542. * Resctrl's cpuhp callbacks may be called before this point to bring a domain
  1543. * online.
  1544. *
  1545. * Return: 0 for success, or -ENOMEM.
  1546. */
  1547. int resctrl_l3_mon_resource_init(void)
  1548. {
  1549. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1550. int ret;
  1551. if (!r->mon_capable)
  1552. return 0;
  1553. ret = closid_num_dirty_rmid_alloc(r);
  1554. if (ret)
  1555. return ret;
  1556. if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
  1557. mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
  1558. resctrl_file_fflags_init("mbm_total_bytes_config",
  1559. RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
  1560. }
  1561. if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
  1562. mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
  1563. resctrl_file_fflags_init("mbm_local_bytes_config",
  1564. RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
  1565. }
  1566. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  1567. mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
  1568. else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  1569. mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
  1570. if (r->mon.mbm_cntr_assignable) {
  1571. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
  1572. mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
  1573. if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
  1574. mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
  1575. (READS_TO_LOCAL_MEM |
  1576. READS_TO_LOCAL_S_MEM |
  1577. NON_TEMP_WRITE_TO_LOCAL_MEM);
  1578. r->mon.mbm_assign_on_mkdir = true;
  1579. resctrl_file_fflags_init("num_mbm_cntrs",
  1580. RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
  1581. resctrl_file_fflags_init("available_mbm_cntrs",
  1582. RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
  1583. resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
  1584. resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
  1585. RFTYPE_RES_CACHE);
  1586. resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
  1587. }
  1588. return 0;
  1589. }
  1590. void resctrl_l3_mon_resource_exit(void)
  1591. {
  1592. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1593. if (!r->mon_capable)
  1594. return;
  1595. closid_num_dirty_rmid_free();
  1596. }