| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903 |
- // SPDX-License-Identifier: GPL-2.0-only
- /*
- * Resource Director Technology(RDT)
- * - Monitoring code
- *
- * Copyright (C) 2017 Intel Corporation
- *
- * Author:
- * Vikas Shivappa <vikas.shivappa@intel.com>
- *
- * This replaces the cqm.c based on perf but we reuse a lot of
- * code and datastructures originally from Peter Zijlstra and Matt Fleming.
- *
- * More information about RDT be found in the Intel (R) x86 Architecture
- * Software Developer Manual June 2016, volume 3, section 17.17.
- */
- #define pr_fmt(fmt) "resctrl: " fmt
- #include <linux/cpu.h>
- #include <linux/resctrl.h>
- #include <linux/sizes.h>
- #include <linux/slab.h>
- #include "internal.h"
- #define CREATE_TRACE_POINTS
- #include "monitor_trace.h"
- /**
- * struct rmid_entry - dirty tracking for all RMID.
- * @closid: The CLOSID for this entry.
- * @rmid: The RMID for this entry.
- * @busy: The number of domains with cached data using this RMID.
- * @list: Member of the rmid_free_lru list when busy == 0.
- *
- * Depending on the architecture the correct monitor is accessed using
- * both @closid and @rmid, or @rmid only.
- *
- * Take the rdtgroup_mutex when accessing.
- */
- struct rmid_entry {
- u32 closid;
- u32 rmid;
- int busy;
- struct list_head list;
- };
- /*
- * @rmid_free_lru - A least recently used list of free RMIDs
- * These RMIDs are guaranteed to have an occupancy less than the
- * threshold occupancy
- */
- static LIST_HEAD(rmid_free_lru);
- /*
- * @closid_num_dirty_rmid The number of dirty RMID each CLOSID has.
- * Only allocated when CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID is defined.
- * Indexed by CLOSID. Protected by rdtgroup_mutex.
- */
- static u32 *closid_num_dirty_rmid;
- /*
- * @rmid_limbo_count - count of currently unused but (potentially)
- * dirty RMIDs.
- * This counts RMIDs that no one is currently using but that
- * may have a occupancy value > resctrl_rmid_realloc_threshold. User can
- * change the threshold occupancy value.
- */
- static unsigned int rmid_limbo_count;
- /*
- * @rmid_entry - The entry in the limbo and free lists.
- */
- static struct rmid_entry *rmid_ptrs;
- /*
- * This is the threshold cache occupancy in bytes at which we will consider an
- * RMID available for re-allocation.
- */
- unsigned int resctrl_rmid_realloc_threshold;
- /*
- * This is the maximum value for the reallocation threshold, in bytes.
- */
- unsigned int resctrl_rmid_realloc_limit;
- /*
- * x86 and arm64 differ in their handling of monitoring.
- * x86's RMID are independent numbers, there is only one source of traffic
- * with an RMID value of '1'.
- * arm64's PMG extends the PARTID/CLOSID space, there are multiple sources of
- * traffic with a PMG value of '1', one for each CLOSID, meaning the RMID
- * value is no longer unique.
- * To account for this, resctrl uses an index. On x86 this is just the RMID,
- * on arm64 it encodes the CLOSID and RMID. This gives a unique number.
- *
- * The domain's rmid_busy_llc and rmid_ptrs[] are sized by index. The arch code
- * must accept an attempt to read every index.
- */
- static inline struct rmid_entry *__rmid_entry(u32 idx)
- {
- struct rmid_entry *entry;
- u32 closid, rmid;
- entry = &rmid_ptrs[idx];
- resctrl_arch_rmid_idx_decode(idx, &closid, &rmid);
- WARN_ON_ONCE(entry->closid != closid);
- WARN_ON_ONCE(entry->rmid != rmid);
- return entry;
- }
- static void limbo_release_entry(struct rmid_entry *entry)
- {
- lockdep_assert_held(&rdtgroup_mutex);
- rmid_limbo_count--;
- list_add_tail(&entry->list, &rmid_free_lru);
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- closid_num_dirty_rmid[entry->closid]--;
- }
- /*
- * Check the RMIDs that are marked as busy for this domain. If the
- * reported LLC occupancy is below the threshold clear the busy bit and
- * decrement the count. If the busy count gets to zero on an RMID, we
- * free the RMID
- */
- void __check_limbo(struct rdt_l3_mon_domain *d, bool force_free)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- struct rmid_entry *entry;
- u32 idx, cur_idx = 1;
- void *arch_mon_ctx;
- void *arch_priv;
- bool rmid_dirty;
- u64 val = 0;
- arch_priv = mon_event_all[QOS_L3_OCCUP_EVENT_ID].arch_priv;
- arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
- if (IS_ERR(arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(arch_mon_ctx));
- return;
- }
- /*
- * Skip RMID 0 and start from RMID 1 and check all the RMIDs that
- * are marked as busy for occupancy < threshold. If the occupancy
- * is less than the threshold decrement the busy counter of the
- * RMID and move it to the free list when the counter reaches 0.
- */
- for (;;) {
- idx = find_next_bit(d->rmid_busy_llc, idx_limit, cur_idx);
- if (idx >= idx_limit)
- break;
- entry = __rmid_entry(idx);
- if (resctrl_arch_rmid_read(r, &d->hdr, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, arch_priv, &val,
- arch_mon_ctx)) {
- rmid_dirty = true;
- } else {
- rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
- /*
- * x86's CLOSID and RMID are independent numbers, so the entry's
- * CLOSID is an empty CLOSID (X86_RESCTRL_EMPTY_CLOSID). On Arm the
- * RMID (PMG) extends the CLOSID (PARTID) space with bits that aren't
- * used to select the configuration. It is thus necessary to track both
- * CLOSID and RMID because there may be dependencies between them
- * on some architectures.
- */
- trace_mon_llc_occupancy_limbo(entry->closid, entry->rmid, d->hdr.id, val);
- }
- if (force_free || !rmid_dirty) {
- clear_bit(idx, d->rmid_busy_llc);
- if (!--entry->busy)
- limbo_release_entry(entry);
- }
- cur_idx = idx + 1;
- }
- resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
- }
- bool has_busy_rmid(struct rdt_l3_mon_domain *d)
- {
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- return find_first_bit(d->rmid_busy_llc, idx_limit) != idx_limit;
- }
- static struct rmid_entry *resctrl_find_free_rmid(u32 closid)
- {
- struct rmid_entry *itr;
- u32 itr_idx, cmp_idx;
- if (list_empty(&rmid_free_lru))
- return rmid_limbo_count ? ERR_PTR(-EBUSY) : ERR_PTR(-ENOSPC);
- list_for_each_entry(itr, &rmid_free_lru, list) {
- /*
- * Get the index of this free RMID, and the index it would need
- * to be if it were used with this CLOSID.
- * If the CLOSID is irrelevant on this architecture, the two
- * index values are always the same on every entry and thus the
- * very first entry will be returned.
- */
- itr_idx = resctrl_arch_rmid_idx_encode(itr->closid, itr->rmid);
- cmp_idx = resctrl_arch_rmid_idx_encode(closid, itr->rmid);
- if (itr_idx == cmp_idx)
- return itr;
- }
- return ERR_PTR(-ENOSPC);
- }
- /**
- * resctrl_find_cleanest_closid() - Find a CLOSID where all the associated
- * RMID are clean, or the CLOSID that has
- * the most clean RMID.
- *
- * MPAM's equivalent of RMID are per-CLOSID, meaning a freshly allocated CLOSID
- * may not be able to allocate clean RMID. To avoid this the allocator will
- * choose the CLOSID with the most clean RMID.
- *
- * When the CLOSID and RMID are independent numbers, the first free CLOSID will
- * be returned.
- */
- int resctrl_find_cleanest_closid(void)
- {
- u32 cleanest_closid = ~0;
- int i = 0;
- lockdep_assert_held(&rdtgroup_mutex);
- if (!IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- return -EIO;
- for (i = 0; i < closids_supported(); i++) {
- int num_dirty;
- if (closid_allocated(i))
- continue;
- num_dirty = closid_num_dirty_rmid[i];
- if (num_dirty == 0)
- return i;
- if (cleanest_closid == ~0)
- cleanest_closid = i;
- if (num_dirty < closid_num_dirty_rmid[cleanest_closid])
- cleanest_closid = i;
- }
- if (cleanest_closid == ~0)
- return -ENOSPC;
- return cleanest_closid;
- }
- /*
- * For MPAM the RMID value is not unique, and has to be considered with
- * the CLOSID. The (CLOSID, RMID) pair is allocated on all domains, which
- * allows all domains to be managed by a single free list.
- * Each domain also has a rmid_busy_llc to reduce the work of the limbo handler.
- */
- int alloc_rmid(u32 closid)
- {
- struct rmid_entry *entry;
- lockdep_assert_held(&rdtgroup_mutex);
- entry = resctrl_find_free_rmid(closid);
- if (IS_ERR(entry))
- return PTR_ERR(entry);
- list_del(&entry->list);
- return entry->rmid;
- }
- static void add_rmid_to_limbo(struct rmid_entry *entry)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdt_l3_mon_domain *d;
- u32 idx;
- lockdep_assert_held(&rdtgroup_mutex);
- /* Walking r->domains, ensure it can't race with cpuhp */
- lockdep_assert_cpus_held();
- idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);
- entry->busy = 0;
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- /*
- * For the first limbo RMID in the domain,
- * setup up the limbo worker.
- */
- if (!has_busy_rmid(d))
- cqm_setup_limbo_handler(d, CQM_LIMBOCHECK_INTERVAL,
- RESCTRL_PICK_ANY_CPU);
- set_bit(idx, d->rmid_busy_llc);
- entry->busy++;
- }
- rmid_limbo_count++;
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID))
- closid_num_dirty_rmid[entry->closid]++;
- }
- void free_rmid(u32 closid, u32 rmid)
- {
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- struct rmid_entry *entry;
- lockdep_assert_held(&rdtgroup_mutex);
- /*
- * Do not allow the default rmid to be free'd. Comparing by index
- * allows architectures that ignore the closid parameter to avoid an
- * unnecessary check.
- */
- if (!resctrl_arch_mon_capable() ||
- idx == resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
- RESCTRL_RESERVED_RMID))
- return;
- entry = __rmid_entry(idx);
- if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
- add_rmid_to_limbo(entry);
- else
- list_add_tail(&entry->list, &rmid_free_lru);
- }
- static struct mbm_state *get_mbm_state(struct rdt_l3_mon_domain *d, u32 closid,
- u32 rmid, enum resctrl_event_id evtid)
- {
- u32 idx = resctrl_arch_rmid_idx_encode(closid, rmid);
- struct mbm_state *state;
- if (!resctrl_is_mbm_event(evtid))
- return NULL;
- state = d->mbm_states[MBM_STATE_IDX(evtid)];
- return state ? &state[idx] : NULL;
- }
- /*
- * mbm_cntr_get() - Return the counter ID for the matching @evtid and @rdtgrp.
- *
- * Return:
- * Valid counter ID on success, or -ENOENT on failure.
- */
- static int mbm_cntr_get(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
- {
- int cntr_id;
- if (!r->mon.mbm_cntr_assignable)
- return -ENOENT;
- if (!resctrl_is_mbm_event(evtid))
- return -ENOENT;
- for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
- if (d->cntr_cfg[cntr_id].rdtgrp == rdtgrp &&
- d->cntr_cfg[cntr_id].evtid == evtid)
- return cntr_id;
- }
- return -ENOENT;
- }
- /*
- * mbm_cntr_alloc() - Initialize and return a new counter ID in the domain @d.
- * Caller must ensure that the specified event is not assigned already.
- *
- * Return:
- * Valid counter ID on success, or -ENOSPC on failure.
- */
- static int mbm_cntr_alloc(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
- {
- int cntr_id;
- for (cntr_id = 0; cntr_id < r->mon.num_mbm_cntrs; cntr_id++) {
- if (!d->cntr_cfg[cntr_id].rdtgrp) {
- d->cntr_cfg[cntr_id].rdtgrp = rdtgrp;
- d->cntr_cfg[cntr_id].evtid = evtid;
- return cntr_id;
- }
- }
- return -ENOSPC;
- }
- /*
- * mbm_cntr_free() - Clear the counter ID configuration details in the domain @d.
- */
- static void mbm_cntr_free(struct rdt_l3_mon_domain *d, int cntr_id)
- {
- memset(&d->cntr_cfg[cntr_id], 0, sizeof(*d->cntr_cfg));
- }
- static int __l3_mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
- {
- int cpu = smp_processor_id();
- u32 closid = rdtgrp->closid;
- u32 rmid = rdtgrp->mon.rmid;
- struct rdt_l3_mon_domain *d;
- int cntr_id = -ENOENT;
- struct mbm_state *m;
- u64 tval = 0;
- if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3)) {
- rr->err = -EIO;
- return -EINVAL;
- }
- d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
- if (rr->is_mbm_cntr) {
- cntr_id = mbm_cntr_get(rr->r, d, rdtgrp, rr->evt->evtid);
- if (cntr_id < 0) {
- rr->err = -ENOENT;
- return -EINVAL;
- }
- }
- if (rr->first) {
- if (rr->is_mbm_cntr)
- resctrl_arch_reset_cntr(rr->r, d, closid, rmid, cntr_id, rr->evt->evtid);
- else
- resctrl_arch_reset_rmid(rr->r, d, closid, rmid, rr->evt->evtid);
- m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
- if (m)
- memset(m, 0, sizeof(struct mbm_state));
- return 0;
- }
- /* Reading a single domain, must be on a CPU in that domain. */
- if (!cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return -EINVAL;
- if (rr->is_mbm_cntr)
- rr->err = resctrl_arch_cntr_read(rr->r, d, closid, rmid, cntr_id,
- rr->evt->evtid, &tval);
- else
- rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, closid, rmid,
- rr->evt->evtid, rr->evt->arch_priv,
- &tval, rr->arch_mon_ctx);
- if (rr->err)
- return rr->err;
- rr->val += tval;
- return 0;
- }
- static int __l3_mon_event_count_sum(struct rdtgroup *rdtgrp, struct rmid_read *rr)
- {
- int cpu = smp_processor_id();
- u32 closid = rdtgrp->closid;
- u32 rmid = rdtgrp->mon.rmid;
- struct rdt_l3_mon_domain *d;
- u64 tval = 0;
- int err, ret;
- /*
- * Summing across domains is only done for systems that implement
- * Sub-NUMA Cluster. There is no overlap with systems that support
- * assignable counters.
- */
- if (rr->is_mbm_cntr) {
- pr_warn_once("Summing domains using assignable counters is not supported\n");
- rr->err = -EINVAL;
- return -EINVAL;
- }
- /* Summing domains that share a cache, must be on a CPU for that cache. */
- if (!cpumask_test_cpu(cpu, &rr->ci->shared_cpu_map))
- return -EINVAL;
- /*
- * Legacy files must report the sum of an event across all
- * domains that share the same L3 cache instance.
- * Report success if a read from any domain succeeds, -EINVAL
- * (translated to "Unavailable" for user space) if reading from
- * all domains fail for any reason.
- */
- ret = -EINVAL;
- list_for_each_entry(d, &rr->r->mon_domains, hdr.list) {
- if (d->ci_id != rr->ci->id)
- continue;
- err = resctrl_arch_rmid_read(rr->r, &d->hdr, closid, rmid,
- rr->evt->evtid, rr->evt->arch_priv,
- &tval, rr->arch_mon_ctx);
- if (!err) {
- rr->val += tval;
- ret = 0;
- }
- }
- if (ret)
- rr->err = ret;
- return ret;
- }
- static int __mon_event_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
- {
- switch (rr->r->rid) {
- case RDT_RESOURCE_L3:
- WARN_ON_ONCE(rr->evt->any_cpu);
- if (rr->hdr)
- return __l3_mon_event_count(rdtgrp, rr);
- else
- return __l3_mon_event_count_sum(rdtgrp, rr);
- case RDT_RESOURCE_PERF_PKG: {
- u64 tval = 0;
- rr->err = resctrl_arch_rmid_read(rr->r, rr->hdr, rdtgrp->closid,
- rdtgrp->mon.rmid, rr->evt->evtid,
- rr->evt->arch_priv,
- &tval, rr->arch_mon_ctx);
- if (rr->err)
- return rr->err;
- rr->val += tval;
- return 0;
- }
- default:
- rr->err = -EINVAL;
- return -EINVAL;
- }
- }
- /*
- * mbm_bw_count() - Update bw count from values previously read by
- * __mon_event_count().
- * @rdtgrp: resctrl group associated with the CLOSID and RMID to identify
- * the cached mbm_state.
- * @rr: The struct rmid_read populated by __mon_event_count().
- *
- * Supporting function to calculate the memory bandwidth
- * and delta bandwidth in MBps. The chunks value previously read by
- * __mon_event_count() is compared with the chunks value from the previous
- * invocation. This must be called once per second to maintain values in MBps.
- */
- static void mbm_bw_count(struct rdtgroup *rdtgrp, struct rmid_read *rr)
- {
- u64 cur_bw, bytes, cur_bytes;
- u32 closid = rdtgrp->closid;
- u32 rmid = rdtgrp->mon.rmid;
- struct rdt_l3_mon_domain *d;
- struct mbm_state *m;
- if (!domain_header_is_valid(rr->hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
- return;
- d = container_of(rr->hdr, struct rdt_l3_mon_domain, hdr);
- m = get_mbm_state(d, closid, rmid, rr->evt->evtid);
- if (WARN_ON_ONCE(!m))
- return;
- cur_bytes = rr->val;
- bytes = cur_bytes - m->prev_bw_bytes;
- m->prev_bw_bytes = cur_bytes;
- cur_bw = bytes / SZ_1M;
- m->prev_bw = cur_bw;
- }
- /*
- * This is scheduled by mon_event_read() to read the CQM/MBM counters
- * on a domain.
- */
- void mon_event_count(void *info)
- {
- struct rdtgroup *rdtgrp, *entry;
- struct rmid_read *rr = info;
- struct list_head *head;
- int ret;
- rdtgrp = rr->rgrp;
- ret = __mon_event_count(rdtgrp, rr);
- /*
- * For Ctrl groups read data from child monitor groups and
- * add them together. Count events which are read successfully.
- * Discard the rmid_read's reporting errors.
- */
- head = &rdtgrp->mon.crdtgrp_list;
- if (rdtgrp->type == RDTCTRL_GROUP) {
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- if (__mon_event_count(entry, rr) == 0)
- ret = 0;
- }
- }
- /*
- * __mon_event_count() calls for newly created monitor groups may
- * report -EINVAL/Unavailable if the monitor hasn't seen any traffic.
- * Discard error if any of the monitor event reads succeeded.
- */
- if (ret == 0)
- rr->err = 0;
- }
- static struct rdt_ctrl_domain *get_ctrl_domain_from_cpu(int cpu,
- struct rdt_resource *r)
- {
- struct rdt_ctrl_domain *d;
- lockdep_assert_cpus_held();
- list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
- /* Find the domain that contains this CPU */
- if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
- return d;
- }
- return NULL;
- }
- /*
- * Feedback loop for MBA software controller (mba_sc)
- *
- * mba_sc is a feedback loop where we periodically read MBM counters and
- * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
- * that:
- *
- * current bandwidth(cur_bw) < user specified bandwidth(user_bw)
- *
- * This uses the MBM counters to measure the bandwidth and MBA throttle
- * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
- * fact that resctrl rdtgroups have both monitoring and control.
- *
- * The frequency of the checks is 1s and we just tag along the MBM overflow
- * timer. Having 1s interval makes the calculation of bandwidth simpler.
- *
- * Although MBA's goal is to restrict the bandwidth to a maximum, there may
- * be a need to increase the bandwidth to avoid unnecessarily restricting
- * the L2 <-> L3 traffic.
- *
- * Since MBA controls the L2 external bandwidth where as MBM measures the
- * L3 external bandwidth the following sequence could lead to such a
- * situation.
- *
- * Consider an rdtgroup which had high L3 <-> memory traffic in initial
- * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
- * after some time rdtgroup has mostly L2 <-> L3 traffic.
- *
- * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
- * throttle MSRs already have low percentage values. To avoid
- * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
- */
- static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_l3_mon_domain *dom_mbm)
- {
- u32 closid, rmid, cur_msr_val, new_msr_val;
- struct mbm_state *pmbm_data, *cmbm_data;
- struct rdt_ctrl_domain *dom_mba;
- enum resctrl_event_id evt_id;
- struct rdt_resource *r_mba;
- struct list_head *head;
- struct rdtgroup *entry;
- u32 cur_bw, user_bw;
- r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
- evt_id = rgrp->mba_mbps_event;
- closid = rgrp->closid;
- rmid = rgrp->mon.rmid;
- pmbm_data = get_mbm_state(dom_mbm, closid, rmid, evt_id);
- if (WARN_ON_ONCE(!pmbm_data))
- return;
- dom_mba = get_ctrl_domain_from_cpu(smp_processor_id(), r_mba);
- if (!dom_mba) {
- pr_warn_once("Failure to get domain for MBA update\n");
- return;
- }
- cur_bw = pmbm_data->prev_bw;
- user_bw = dom_mba->mbps_val[closid];
- /* MBA resource doesn't support CDP */
- cur_msr_val = resctrl_arch_get_config(r_mba, dom_mba, closid, CDP_NONE);
- /*
- * For Ctrl groups read data from child monitor groups.
- */
- head = &rgrp->mon.crdtgrp_list;
- list_for_each_entry(entry, head, mon.crdtgrp_list) {
- cmbm_data = get_mbm_state(dom_mbm, entry->closid, entry->mon.rmid, evt_id);
- if (WARN_ON_ONCE(!cmbm_data))
- return;
- cur_bw += cmbm_data->prev_bw;
- }
- /*
- * Scale up/down the bandwidth linearly for the ctrl group. The
- * bandwidth step is the bandwidth granularity specified by the
- * hardware.
- * Always increase throttling if current bandwidth is above the
- * target set by user.
- * But avoid thrashing up and down on every poll by checking
- * whether a decrease in throttling is likely to push the group
- * back over target. E.g. if currently throttling to 30% of bandwidth
- * on a system with 10% granularity steps, check whether moving to
- * 40% would go past the limit by multiplying current bandwidth by
- * "(30 + 10) / 30".
- */
- if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
- new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
- } else if (cur_msr_val < MAX_MBA_BW &&
- (user_bw > (cur_bw * (cur_msr_val + r_mba->membw.min_bw) / cur_msr_val))) {
- new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
- } else {
- return;
- }
- resctrl_arch_update_one(r_mba, dom_mba, closid, CDP_NONE, new_msr_val);
- }
- static void mbm_update_one_event(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, enum resctrl_event_id evtid)
- {
- struct rmid_read rr = {0};
- rr.r = r;
- rr.hdr = &d->hdr;
- rr.evt = &mon_event_all[evtid];
- if (resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rr.is_mbm_cntr = true;
- } else {
- rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, evtid);
- if (IS_ERR(rr.arch_mon_ctx)) {
- pr_warn_ratelimited("Failed to allocate monitor context: %ld",
- PTR_ERR(rr.arch_mon_ctx));
- return;
- }
- }
- __mon_event_count(rdtgrp, &rr);
- /*
- * If the software controller is enabled, compute the
- * bandwidth for this event id.
- */
- if (is_mba_sc(NULL))
- mbm_bw_count(rdtgrp, &rr);
- if (rr.arch_mon_ctx)
- resctrl_arch_mon_ctx_free(rr.r, evtid, rr.arch_mon_ctx);
- }
- static void mbm_update(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp)
- {
- /*
- * This is protected from concurrent reads from user as both
- * the user and overflow handler hold the global mutex.
- */
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_TOTAL_EVENT_ID);
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- mbm_update_one_event(r, d, rdtgrp, QOS_L3_MBM_LOCAL_EVENT_ID);
- }
- /*
- * Handler to scan the limbo list and move the RMIDs
- * to free list whose occupancy < threshold_occupancy.
- */
- void cqm_handle_limbo(struct work_struct *work)
- {
- unsigned long delay = msecs_to_jiffies(CQM_LIMBOCHECK_INTERVAL);
- struct rdt_l3_mon_domain *d;
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- d = container_of(work, struct rdt_l3_mon_domain, cqm_limbo.work);
- __check_limbo(d, false);
- if (has_busy_rmid(d)) {
- d->cqm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
- RESCTRL_PICK_ANY_CPU);
- schedule_delayed_work_on(d->cqm_work_cpu, &d->cqm_limbo,
- delay);
- }
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- }
- /**
- * cqm_setup_limbo_handler() - Schedule the limbo handler to run for this
- * domain.
- * @dom: The domain the limbo handler should run for.
- * @delay_ms: How far in the future the handler should run.
- * @exclude_cpu: Which CPU the handler should not run on,
- * RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
- void cqm_setup_limbo_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
- int exclude_cpu)
- {
- unsigned long delay = msecs_to_jiffies(delay_ms);
- int cpu;
- cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
- dom->cqm_work_cpu = cpu;
- if (cpu < nr_cpu_ids)
- schedule_delayed_work_on(cpu, &dom->cqm_limbo, delay);
- }
- void mbm_handle_overflow(struct work_struct *work)
- {
- unsigned long delay = msecs_to_jiffies(MBM_OVERFLOW_INTERVAL);
- struct rdtgroup *prgrp, *crgrp;
- struct rdt_l3_mon_domain *d;
- struct list_head *head;
- struct rdt_resource *r;
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- /*
- * If the filesystem has been unmounted this work no longer needs to
- * run.
- */
- if (!resctrl_mounted || !resctrl_arch_mon_capable())
- goto out_unlock;
- r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- d = container_of(work, struct rdt_l3_mon_domain, mbm_over.work);
- list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- mbm_update(r, d, prgrp);
- head = &prgrp->mon.crdtgrp_list;
- list_for_each_entry(crgrp, head, mon.crdtgrp_list)
- mbm_update(r, d, crgrp);
- if (is_mba_sc(NULL))
- update_mba_bw(prgrp, d);
- }
- /*
- * Re-check for housekeeping CPUs. This allows the overflow handler to
- * move off a nohz_full CPU quickly.
- */
- d->mbm_work_cpu = cpumask_any_housekeeping(&d->hdr.cpu_mask,
- RESCTRL_PICK_ANY_CPU);
- schedule_delayed_work_on(d->mbm_work_cpu, &d->mbm_over, delay);
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- }
- /**
- * mbm_setup_overflow_handler() - Schedule the overflow handler to run for this
- * domain.
- * @dom: The domain the overflow handler should run for.
- * @delay_ms: How far in the future the handler should run.
- * @exclude_cpu: Which CPU the handler should not run on,
- * RESCTRL_PICK_ANY_CPU to pick any CPU.
- */
- void mbm_setup_overflow_handler(struct rdt_l3_mon_domain *dom, unsigned long delay_ms,
- int exclude_cpu)
- {
- unsigned long delay = msecs_to_jiffies(delay_ms);
- int cpu;
- /*
- * When a domain comes online there is no guarantee the filesystem is
- * mounted. If not, there is no need to catch counter overflow.
- */
- if (!resctrl_mounted || !resctrl_arch_mon_capable())
- return;
- cpu = cpumask_any_housekeeping(&dom->hdr.cpu_mask, exclude_cpu);
- dom->mbm_work_cpu = cpu;
- if (cpu < nr_cpu_ids)
- schedule_delayed_work_on(cpu, &dom->mbm_over, delay);
- }
- int setup_rmid_lru_list(void)
- {
- struct rmid_entry *entry = NULL;
- u32 idx_limit;
- u32 idx;
- int i;
- if (!resctrl_arch_mon_capable())
- return 0;
- /*
- * Called on every mount, but the number of RMIDs cannot change
- * after the first mount, so keep using the same set of rmid_ptrs[]
- * until resctrl_exit(). Note that the limbo handler continues to
- * access rmid_ptrs[] after resctrl is unmounted.
- */
- if (rmid_ptrs)
- return 0;
- idx_limit = resctrl_arch_system_num_rmid_idx();
- rmid_ptrs = kzalloc_objs(struct rmid_entry, idx_limit);
- if (!rmid_ptrs)
- return -ENOMEM;
- for (i = 0; i < idx_limit; i++) {
- entry = &rmid_ptrs[i];
- INIT_LIST_HEAD(&entry->list);
- resctrl_arch_rmid_idx_decode(i, &entry->closid, &entry->rmid);
- list_add_tail(&entry->list, &rmid_free_lru);
- }
- /*
- * RESCTRL_RESERVED_CLOSID and RESCTRL_RESERVED_RMID are special and
- * are always allocated. These are used for the rdtgroup_default
- * control group, which was setup earlier in rdtgroup_setup_default().
- */
- idx = resctrl_arch_rmid_idx_encode(RESCTRL_RESERVED_CLOSID,
- RESCTRL_RESERVED_RMID);
- entry = __rmid_entry(idx);
- list_del(&entry->list);
- return 0;
- }
- void free_rmid_lru_list(void)
- {
- if (!resctrl_arch_mon_capable())
- return;
- mutex_lock(&rdtgroup_mutex);
- kfree(rmid_ptrs);
- rmid_ptrs = NULL;
- mutex_unlock(&rdtgroup_mutex);
- }
- #define MON_EVENT(_eventid, _name, _res, _fp) \
- [_eventid] = { \
- .name = _name, \
- .evtid = _eventid, \
- .rid = _res, \
- .is_floating_point = _fp, \
- }
- /*
- * All available events. Architecture code marks the ones that
- * are supported by a system using resctrl_enable_mon_event()
- * to set .enabled.
- */
- struct mon_evt mon_event_all[QOS_NUM_EVENTS] = {
- MON_EVENT(QOS_L3_OCCUP_EVENT_ID, "llc_occupancy", RDT_RESOURCE_L3, false),
- MON_EVENT(QOS_L3_MBM_TOTAL_EVENT_ID, "mbm_total_bytes", RDT_RESOURCE_L3, false),
- MON_EVENT(QOS_L3_MBM_LOCAL_EVENT_ID, "mbm_local_bytes", RDT_RESOURCE_L3, false),
- MON_EVENT(PMT_EVENT_ENERGY, "core_energy", RDT_RESOURCE_PERF_PKG, true),
- MON_EVENT(PMT_EVENT_ACTIVITY, "activity", RDT_RESOURCE_PERF_PKG, true),
- MON_EVENT(PMT_EVENT_STALLS_LLC_HIT, "stalls_llc_hit", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_C1_RES, "c1_res", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_UNHALTED_CORE_CYCLES, "unhalted_core_cycles", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_STALLS_LLC_MISS, "stalls_llc_miss", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_AUTO_C6_RES, "c6_res", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_UNHALTED_REF_CYCLES, "unhalted_ref_cycles", RDT_RESOURCE_PERF_PKG, false),
- MON_EVENT(PMT_EVENT_UOPS_RETIRED, "uops_retired", RDT_RESOURCE_PERF_PKG, false),
- };
- bool resctrl_enable_mon_event(enum resctrl_event_id eventid, bool any_cpu,
- unsigned int binary_bits, void *arch_priv)
- {
- if (WARN_ON_ONCE(eventid < QOS_FIRST_EVENT || eventid >= QOS_NUM_EVENTS ||
- binary_bits > MAX_BINARY_BITS))
- return false;
- if (mon_event_all[eventid].enabled) {
- pr_warn("Duplicate enable for event %d\n", eventid);
- return false;
- }
- if (binary_bits && !mon_event_all[eventid].is_floating_point) {
- pr_warn("Event %d may not be floating point\n", eventid);
- return false;
- }
- mon_event_all[eventid].any_cpu = any_cpu;
- mon_event_all[eventid].binary_bits = binary_bits;
- mon_event_all[eventid].arch_priv = arch_priv;
- mon_event_all[eventid].enabled = true;
- return true;
- }
- bool resctrl_is_mon_event_enabled(enum resctrl_event_id eventid)
- {
- return eventid >= QOS_FIRST_EVENT && eventid < QOS_NUM_EVENTS &&
- mon_event_all[eventid].enabled;
- }
- u32 resctrl_get_mon_evt_cfg(enum resctrl_event_id evtid)
- {
- return mon_event_all[evtid].evt_cfg;
- }
- /**
- * struct mbm_transaction - Memory transaction an MBM event can be configured with.
- * @name: Name of memory transaction (read, write ...).
- * @val: The bit (eg. READS_TO_LOCAL_MEM or READS_TO_REMOTE_MEM) used to
- * represent the memory transaction within an event's configuration.
- */
- struct mbm_transaction {
- char name[32];
- u32 val;
- };
- /* Decoded values for each type of memory transaction. */
- static struct mbm_transaction mbm_transactions[NUM_MBM_TRANSACTIONS] = {
- {"local_reads", READS_TO_LOCAL_MEM},
- {"remote_reads", READS_TO_REMOTE_MEM},
- {"local_non_temporal_writes", NON_TEMP_WRITE_TO_LOCAL_MEM},
- {"remote_non_temporal_writes", NON_TEMP_WRITE_TO_REMOTE_MEM},
- {"local_reads_slow_memory", READS_TO_LOCAL_S_MEM},
- {"remote_reads_slow_memory", READS_TO_REMOTE_S_MEM},
- {"dirty_victim_writes_all", DIRTY_VICTIMS_TO_ALL_MEM},
- };
- int event_filter_show(struct kernfs_open_file *of, struct seq_file *seq, void *v)
- {
- struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r;
- bool sep = false;
- int ret = 0, i;
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- r = resctrl_arch_get_resource(mevt->rid);
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
- if (mevt->evt_cfg & mbm_transactions[i].val) {
- if (sep)
- seq_putc(seq, ',');
- seq_printf(seq, "%s", mbm_transactions[i].name);
- sep = true;
- }
- }
- seq_putc(seq, '\n');
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- return ret;
- }
- int resctrl_mbm_assign_on_mkdir_show(struct kernfs_open_file *of, struct seq_file *s,
- void *v)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- int ret = 0;
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- seq_printf(s, "%u\n", r->mon.mbm_assign_on_mkdir);
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- return ret;
- }
- ssize_t resctrl_mbm_assign_on_mkdir_write(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- bool value;
- int ret;
- ret = kstrtobool(buf, &value);
- if (ret)
- return ret;
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- r->mon.mbm_assign_on_mkdir = value;
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- return ret ?: nbytes;
- }
- /*
- * mbm_cntr_free_all() - Clear all the counter ID configuration details in the
- * domain @d. Called when mbm_assign_mode is changed.
- */
- static void mbm_cntr_free_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
- {
- memset(d->cntr_cfg, 0, sizeof(*d->cntr_cfg) * r->mon.num_mbm_cntrs);
- }
- /*
- * resctrl_reset_rmid_all() - Reset all non-architecture states for all the
- * supported RMIDs.
- */
- static void resctrl_reset_rmid_all(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
- {
- u32 idx_limit = resctrl_arch_system_num_rmid_idx();
- enum resctrl_event_id evt;
- int idx;
- for_each_mbm_event_id(evt) {
- if (!resctrl_is_mon_event_enabled(evt))
- continue;
- idx = MBM_STATE_IDX(evt);
- memset(d->mbm_states[idx], 0, sizeof(*d->mbm_states[0]) * idx_limit);
- }
- }
- /*
- * rdtgroup_assign_cntr() - Assign/unassign the counter ID for the event, RMID
- * pair in the domain.
- *
- * Assign the counter if @assign is true else unassign the counter. Reset the
- * associated non-architectural state.
- */
- static void rdtgroup_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- enum resctrl_event_id evtid, u32 rmid, u32 closid,
- u32 cntr_id, bool assign)
- {
- struct mbm_state *m;
- resctrl_arch_config_cntr(r, d, evtid, rmid, closid, cntr_id, assign);
- m = get_mbm_state(d, closid, rmid, evtid);
- if (m)
- memset(m, 0, sizeof(*m));
- }
- /*
- * rdtgroup_alloc_assign_cntr() - Allocate a counter ID and assign it to the event
- * pointed to by @mevt and the resctrl group @rdtgrp within the domain @d.
- *
- * Return:
- * 0 on success, < 0 on failure.
- */
- static int rdtgroup_alloc_assign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, struct mon_evt *mevt)
- {
- int cntr_id;
- /* No action required if the counter is assigned already. */
- cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
- if (cntr_id >= 0)
- return 0;
- cntr_id = mbm_cntr_alloc(r, d, rdtgrp, mevt->evtid);
- if (cntr_id < 0) {
- rdt_last_cmd_printf("Failed to allocate counter for %s in domain %d\n",
- mevt->name, d->hdr.id);
- return cntr_id;
- }
- rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, true);
- return 0;
- }
- /*
- * rdtgroup_assign_cntr_event() - Assign a hardware counter for the event in
- * @mevt to the resctrl group @rdtgrp. Assign counters to all domains if @d is
- * NULL; otherwise, assign the counter to the specified domain @d.
- *
- * If all counters in a domain are already in use, rdtgroup_alloc_assign_cntr()
- * will fail. The assignment process will abort at the first failure encountered
- * during domain traversal, which may result in the event being only partially
- * assigned.
- *
- * Return:
- * 0 on success, < 0 on failure.
- */
- static int rdtgroup_assign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
- struct mon_evt *mevt)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
- int ret = 0;
- if (!d) {
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
- if (ret)
- return ret;
- }
- } else {
- ret = rdtgroup_alloc_assign_cntr(r, d, rdtgrp, mevt);
- }
- return ret;
- }
- /*
- * rdtgroup_assign_cntrs() - Assign counters to MBM events. Called when
- * a new group is created.
- *
- * Each group can accommodate two counters per domain: one for the total
- * event and one for the local event. Assignments may fail due to the limited
- * number of counters. However, it is not necessary to fail the group creation
- * and thus no failure is returned. Users have the option to modify the
- * counter assignments after the group has been created.
- */
- void rdtgroup_assign_cntrs(struct rdtgroup *rdtgrp)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r) ||
- !r->mon.mbm_assign_on_mkdir)
- return;
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- rdtgroup_assign_cntr_event(NULL, rdtgrp,
- &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- rdtgroup_assign_cntr_event(NULL, rdtgrp,
- &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
- }
- /*
- * rdtgroup_free_unassign_cntr() - Unassign and reset the counter ID configuration
- * for the event pointed to by @mevt within the domain @d and resctrl group @rdtgrp.
- */
- static void rdtgroup_free_unassign_cntr(struct rdt_resource *r, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, struct mon_evt *mevt)
- {
- int cntr_id;
- cntr_id = mbm_cntr_get(r, d, rdtgrp, mevt->evtid);
- /* If there is no cntr_id assigned, nothing to do */
- if (cntr_id < 0)
- return;
- rdtgroup_assign_cntr(r, d, mevt->evtid, rdtgrp->mon.rmid, rdtgrp->closid, cntr_id, false);
- mbm_cntr_free(d, cntr_id);
- }
- /*
- * rdtgroup_unassign_cntr_event() - Unassign a hardware counter associated with
- * the event structure @mevt from the domain @d and the group @rdtgrp. Unassign
- * the counters from all the domains if @d is NULL else unassign from @d.
- */
- static void rdtgroup_unassign_cntr_event(struct rdt_l3_mon_domain *d, struct rdtgroup *rdtgrp,
- struct mon_evt *mevt)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
- if (!d) {
- list_for_each_entry(d, &r->mon_domains, hdr.list)
- rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
- } else {
- rdtgroup_free_unassign_cntr(r, d, rdtgrp, mevt);
- }
- }
- /*
- * rdtgroup_unassign_cntrs() - Unassign the counters associated with MBM events.
- * Called when a group is deleted.
- */
- void rdtgroup_unassign_cntrs(struct rdtgroup *rdtgrp)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- if (!r->mon_capable || !resctrl_arch_mbm_cntr_assign_enabled(r))
- return;
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- rdtgroup_unassign_cntr_event(NULL, rdtgrp,
- &mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID]);
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- rdtgroup_unassign_cntr_event(NULL, rdtgrp,
- &mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID]);
- }
- static int resctrl_parse_mem_transactions(char *tok, u32 *val)
- {
- u32 temp_val = 0;
- char *evt_str;
- bool found;
- int i;
- next_config:
- if (!tok || tok[0] == '\0') {
- *val = temp_val;
- return 0;
- }
- /* Start processing the strings for each memory transaction type */
- evt_str = strim(strsep(&tok, ","));
- found = false;
- for (i = 0; i < NUM_MBM_TRANSACTIONS; i++) {
- if (!strcmp(mbm_transactions[i].name, evt_str)) {
- temp_val |= mbm_transactions[i].val;
- found = true;
- break;
- }
- }
- if (!found) {
- rdt_last_cmd_printf("Invalid memory transaction type %s\n", evt_str);
- return -EINVAL;
- }
- goto next_config;
- }
- /*
- * rdtgroup_update_cntr_event - Update the counter assignments for the event
- * in a group.
- * @r: Resource to which update needs to be done.
- * @rdtgrp: Resctrl group.
- * @evtid: MBM monitor event.
- */
- static void rdtgroup_update_cntr_event(struct rdt_resource *r, struct rdtgroup *rdtgrp,
- enum resctrl_event_id evtid)
- {
- struct rdt_l3_mon_domain *d;
- int cntr_id;
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- cntr_id = mbm_cntr_get(r, d, rdtgrp, evtid);
- if (cntr_id >= 0)
- rdtgroup_assign_cntr(r, d, evtid, rdtgrp->mon.rmid,
- rdtgrp->closid, cntr_id, true);
- }
- }
- /*
- * resctrl_update_cntr_allrdtgrp - Update the counter assignments for the event
- * for all the groups.
- * @mevt MBM Monitor event.
- */
- static void resctrl_update_cntr_allrdtgrp(struct mon_evt *mevt)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(mevt->rid);
- struct rdtgroup *prgrp, *crgrp;
- /*
- * Find all the groups where the event is assigned and update the
- * configuration of existing assignments.
- */
- list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
- rdtgroup_update_cntr_event(r, prgrp, mevt->evtid);
- list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
- rdtgroup_update_cntr_event(r, crgrp, mevt->evtid);
- }
- }
- ssize_t event_filter_write(struct kernfs_open_file *of, char *buf, size_t nbytes,
- loff_t off)
- {
- struct mon_evt *mevt = rdt_kn_parent_priv(of->kn);
- struct rdt_resource *r;
- u32 evt_cfg = 0;
- int ret = 0;
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- r = resctrl_arch_get_resource(mevt->rid);
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- ret = resctrl_parse_mem_transactions(buf, &evt_cfg);
- if (!ret && mevt->evt_cfg != evt_cfg) {
- mevt->evt_cfg = evt_cfg;
- resctrl_update_cntr_allrdtgrp(mevt);
- }
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return ret ?: nbytes;
- }
- int resctrl_mbm_assign_mode_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- bool enabled;
- mutex_lock(&rdtgroup_mutex);
- enabled = resctrl_arch_mbm_cntr_assign_enabled(r);
- if (r->mon.mbm_cntr_assignable) {
- if (enabled)
- seq_puts(s, "[mbm_event]\n");
- else
- seq_puts(s, "[default]\n");
- if (!IS_ENABLED(CONFIG_RESCTRL_ASSIGN_FIXED)) {
- if (enabled)
- seq_puts(s, "default\n");
- else
- seq_puts(s, "mbm_event\n");
- }
- } else {
- seq_puts(s, "[default]\n");
- }
- mutex_unlock(&rdtgroup_mutex);
- return 0;
- }
- ssize_t resctrl_mbm_assign_mode_write(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- struct rdt_l3_mon_domain *d;
- int ret = 0;
- bool enable;
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- if (!strcmp(buf, "default")) {
- enable = 0;
- } else if (!strcmp(buf, "mbm_event")) {
- if (r->mon.mbm_cntr_assignable) {
- enable = 1;
- } else {
- ret = -EINVAL;
- rdt_last_cmd_puts("mbm_event mode is not supported\n");
- goto out_unlock;
- }
- } else {
- ret = -EINVAL;
- rdt_last_cmd_puts("Unsupported assign mode\n");
- goto out_unlock;
- }
- if (enable != resctrl_arch_mbm_cntr_assign_enabled(r)) {
- ret = resctrl_arch_mbm_cntr_assign_set(r, enable);
- if (ret)
- goto out_unlock;
- /* Update the visibility of BMEC related files */
- resctrl_bmec_files_show(r, NULL, !enable);
- /*
- * Initialize the default memory transaction values for
- * total and local events.
- */
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
- (READS_TO_LOCAL_MEM |
- READS_TO_LOCAL_S_MEM |
- NON_TEMP_WRITE_TO_LOCAL_MEM);
- /* Enable auto assignment when switching to "mbm_event" mode */
- if (enable)
- r->mon.mbm_assign_on_mkdir = true;
- /*
- * Reset all the non-achitectural RMID state and assignable counters.
- */
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- mbm_cntr_free_all(r, d);
- resctrl_reset_rmid_all(r, d);
- }
- }
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return ret ?: nbytes;
- }
- int resctrl_num_mbm_cntrs_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- struct rdt_l3_mon_domain *dom;
- bool sep = false;
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- list_for_each_entry(dom, &r->mon_domains, hdr.list) {
- if (sep)
- seq_putc(s, ';');
- seq_printf(s, "%d=%d", dom->hdr.id, r->mon.num_mbm_cntrs);
- sep = true;
- }
- seq_putc(s, '\n');
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return 0;
- }
- int resctrl_available_mbm_cntrs_show(struct kernfs_open_file *of,
- struct seq_file *s, void *v)
- {
- struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
- struct rdt_l3_mon_domain *dom;
- bool sep = false;
- u32 cntrs, i;
- int ret = 0;
- cpus_read_lock();
- mutex_lock(&rdtgroup_mutex);
- rdt_last_cmd_clear();
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- list_for_each_entry(dom, &r->mon_domains, hdr.list) {
- if (sep)
- seq_putc(s, ';');
- cntrs = 0;
- for (i = 0; i < r->mon.num_mbm_cntrs; i++) {
- if (!dom->cntr_cfg[i].rdtgrp)
- cntrs++;
- }
- seq_printf(s, "%d=%u", dom->hdr.id, cntrs);
- sep = true;
- }
- seq_putc(s, '\n');
- out_unlock:
- mutex_unlock(&rdtgroup_mutex);
- cpus_read_unlock();
- return ret;
- }
- int mbm_L3_assignments_show(struct kernfs_open_file *of, struct seq_file *s, void *v)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdt_l3_mon_domain *d;
- struct rdtgroup *rdtgrp;
- struct mon_evt *mevt;
- int ret = 0;
- bool sep;
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- ret = -ENOENT;
- goto out_unlock;
- }
- rdt_last_cmd_clear();
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event counter assignment mode is not enabled\n");
- ret = -EINVAL;
- goto out_unlock;
- }
- for_each_mon_event(mevt) {
- if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
- continue;
- sep = false;
- seq_printf(s, "%s:", mevt->name);
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (sep)
- seq_putc(s, ';');
- if (mbm_cntr_get(r, d, rdtgrp, mevt->evtid) < 0)
- seq_printf(s, "%d=_", d->hdr.id);
- else
- seq_printf(s, "%d=e", d->hdr.id);
- sep = true;
- }
- seq_putc(s, '\n');
- }
- out_unlock:
- rdtgroup_kn_unlock(of->kn);
- return ret;
- }
- /*
- * mbm_get_mon_event_by_name() - Return the mon_evt entry for the matching
- * event name.
- */
- static struct mon_evt *mbm_get_mon_event_by_name(struct rdt_resource *r, char *name)
- {
- struct mon_evt *mevt;
- for_each_mon_event(mevt) {
- if (mevt->rid == r->rid && mevt->enabled &&
- resctrl_is_mbm_event(mevt->evtid) &&
- !strcmp(mevt->name, name))
- return mevt;
- }
- return NULL;
- }
- static int rdtgroup_modify_assign_state(char *assign, struct rdt_l3_mon_domain *d,
- struct rdtgroup *rdtgrp, struct mon_evt *mevt)
- {
- int ret = 0;
- if (!assign || strlen(assign) != 1)
- return -EINVAL;
- switch (*assign) {
- case 'e':
- ret = rdtgroup_assign_cntr_event(d, rdtgrp, mevt);
- break;
- case '_':
- rdtgroup_unassign_cntr_event(d, rdtgrp, mevt);
- break;
- default:
- ret = -EINVAL;
- break;
- }
- return ret;
- }
- static int resctrl_parse_mbm_assignment(struct rdt_resource *r, struct rdtgroup *rdtgrp,
- char *event, char *tok)
- {
- struct rdt_l3_mon_domain *d;
- unsigned long dom_id = 0;
- char *dom_str, *id_str;
- struct mon_evt *mevt;
- int ret;
- mevt = mbm_get_mon_event_by_name(r, event);
- if (!mevt) {
- rdt_last_cmd_printf("Invalid event %s\n", event);
- return -ENOENT;
- }
- next:
- if (!tok || tok[0] == '\0')
- return 0;
- /* Start processing the strings for each domain */
- dom_str = strim(strsep(&tok, ";"));
- id_str = strsep(&dom_str, "=");
- /* Check for domain id '*' which means all domains */
- if (id_str && *id_str == '*') {
- ret = rdtgroup_modify_assign_state(dom_str, NULL, rdtgrp, mevt);
- if (ret)
- rdt_last_cmd_printf("Assign operation '%s:*=%s' failed\n",
- event, dom_str);
- return ret;
- } else if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
- rdt_last_cmd_puts("Missing domain id\n");
- return -EINVAL;
- }
- /* Verify if the dom_id is valid */
- list_for_each_entry(d, &r->mon_domains, hdr.list) {
- if (d->hdr.id == dom_id) {
- ret = rdtgroup_modify_assign_state(dom_str, d, rdtgrp, mevt);
- if (ret) {
- rdt_last_cmd_printf("Assign operation '%s:%ld=%s' failed\n",
- event, dom_id, dom_str);
- return ret;
- }
- goto next;
- }
- }
- rdt_last_cmd_printf("Invalid domain id %ld\n", dom_id);
- return -EINVAL;
- }
- ssize_t mbm_L3_assignments_write(struct kernfs_open_file *of, char *buf,
- size_t nbytes, loff_t off)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- struct rdtgroup *rdtgrp;
- char *token, *event;
- int ret = 0;
- /* Valid input requires a trailing newline */
- if (nbytes == 0 || buf[nbytes - 1] != '\n')
- return -EINVAL;
- buf[nbytes - 1] = '\0';
- rdtgrp = rdtgroup_kn_lock_live(of->kn);
- if (!rdtgrp) {
- rdtgroup_kn_unlock(of->kn);
- return -ENOENT;
- }
- rdt_last_cmd_clear();
- if (!resctrl_arch_mbm_cntr_assign_enabled(r)) {
- rdt_last_cmd_puts("mbm_event mode is not enabled\n");
- rdtgroup_kn_unlock(of->kn);
- return -EINVAL;
- }
- while ((token = strsep(&buf, "\n")) != NULL) {
- /*
- * The write command follows the following format:
- * "<Event>:<Domain ID>=<Assignment state>"
- * Extract the event name first.
- */
- event = strsep(&token, ":");
- ret = resctrl_parse_mbm_assignment(r, rdtgrp, event, token);
- if (ret)
- break;
- }
- rdtgroup_kn_unlock(of->kn);
- return ret ?: nbytes;
- }
- static int closid_num_dirty_rmid_alloc(struct rdt_resource *r)
- {
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
- u32 num_closid = resctrl_arch_get_num_closid(r);
- u32 *tmp;
- /* For ARM memory ordering access to closid_num_dirty_rmid */
- mutex_lock(&rdtgroup_mutex);
- /*
- * If the architecture hasn't provided a sanitised value here,
- * this may result in larger arrays than necessary. Resctrl will
- * use a smaller system wide value based on the resources in
- * use.
- */
- tmp = kcalloc(num_closid, sizeof(*tmp), GFP_KERNEL);
- if (!tmp) {
- mutex_unlock(&rdtgroup_mutex);
- return -ENOMEM;
- }
- closid_num_dirty_rmid = tmp;
- mutex_unlock(&rdtgroup_mutex);
- }
- return 0;
- }
- static void closid_num_dirty_rmid_free(void)
- {
- if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID)) {
- mutex_lock(&rdtgroup_mutex);
- kfree(closid_num_dirty_rmid);
- closid_num_dirty_rmid = NULL;
- mutex_unlock(&rdtgroup_mutex);
- }
- }
- /**
- * resctrl_l3_mon_resource_init() - Initialise global monitoring structures.
- *
- * Allocate and initialise global monitor resources that do not belong to a
- * specific domain. i.e. the closid_num_dirty_rmid[] used to find the CLOSID
- * with the cleanest set of RMIDs.
- * Called once during boot after the struct rdt_resource's have been configured
- * but before the filesystem is mounted.
- * Resctrl's cpuhp callbacks may be called before this point to bring a domain
- * online.
- *
- * Return: 0 for success, or -ENOMEM.
- */
- int resctrl_l3_mon_resource_init(void)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- int ret;
- if (!r->mon_capable)
- return 0;
- ret = closid_num_dirty_rmid_alloc(r);
- if (ret)
- return ret;
- if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_TOTAL_EVENT_ID)) {
- mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].configurable = true;
- resctrl_file_fflags_init("mbm_total_bytes_config",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- }
- if (resctrl_arch_is_evt_configurable(QOS_L3_MBM_LOCAL_EVENT_ID)) {
- mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].configurable = true;
- resctrl_file_fflags_init("mbm_local_bytes_config",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- }
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- mba_mbps_default_event = QOS_L3_MBM_LOCAL_EVENT_ID;
- else if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- mba_mbps_default_event = QOS_L3_MBM_TOTAL_EVENT_ID;
- if (r->mon.mbm_cntr_assignable) {
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID))
- mon_event_all[QOS_L3_MBM_TOTAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask;
- if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID))
- mon_event_all[QOS_L3_MBM_LOCAL_EVENT_ID].evt_cfg = r->mon.mbm_cfg_mask &
- (READS_TO_LOCAL_MEM |
- READS_TO_LOCAL_S_MEM |
- NON_TEMP_WRITE_TO_LOCAL_MEM);
- r->mon.mbm_assign_on_mkdir = true;
- resctrl_file_fflags_init("num_mbm_cntrs",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- resctrl_file_fflags_init("available_mbm_cntrs",
- RFTYPE_MON_INFO | RFTYPE_RES_CACHE);
- resctrl_file_fflags_init("event_filter", RFTYPE_ASSIGN_CONFIG);
- resctrl_file_fflags_init("mbm_assign_on_mkdir", RFTYPE_MON_INFO |
- RFTYPE_RES_CACHE);
- resctrl_file_fflags_init("mbm_L3_assignments", RFTYPE_MON_BASE);
- }
- return 0;
- }
- void resctrl_l3_mon_resource_exit(void)
- {
- struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
- if (!r->mon_capable)
- return;
- closid_num_dirty_rmid_free();
- }
|