rdtgroup.c 114 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078407940804081408240834084408540864087408840894090409140924093409440954096409740984099410041014102410341044105410641074108410941104111411241134114411541164117411841194120412141224123412441254126412741284129413041314132413341344135413641374138413941404141414241434144414541464147414841494150415141524153415441554156415741584159416041614162416341644165416641674168416941704171417241734174417541764177417841794180418141824183418441854186418741884189419041914192419341944195419641974198419942004201420242034204420542064207420842094210421142124213421442154216421742184219422042214222422342244225422642274228422942304231423242334234423542364237423842394240424142424243424442454246424742484249425042514252425342544255425642574258425942604261426242634264426542664267426842694270427142724273427442754276427742784279428042814282428342844285428642874288428942904291429242934294429542964297429842994300430143024303430443054306430743084309431043114312431343144315431643174318431943204321432243234324432543264327432843294330433143324333433443354336433743384339434043414342434343444345434643474348434943504351435243534354435543564357435843594360436143624363436443654366436743684369437043714372437343744375437643774378437943804381438243834384438543864387438843894390439143924393439443954396439743984399440044014402440344044405440644074408440944104411441244134414441544164417441844194420442144224423442444254426442744284429443044314432443344344435443644374438443944404441444244434444444544464447444844494450445144524453445444554456445744584459446044614462446344644465446644674468446944704471447244734474447544764477447844794480448144824483448444854486448744884489449044914492449344944495449644974498449945004501450245034504450545064507450845094510451145124513451445154516451745184519452045214522452345244525452645274528452945304531453245334534453545364537453845394540454145424543454445454546454745484549455045514552455345544555455645574558455945604561456245634564456545664567456845694570457145724573457445754576457745784579458045814582458345844585458645874588458945904591459245934594459545964597459845994600460146024603460446054606460746084609461046114612461346144615461646174618461946204621462246234624462546264627462846294630463146324633463446354636463746384639464046414642464346444645464646474648464946504651465246534654465546564657465846594660466146624663
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * User interface for Resource Allocation in Resource Director Technology(RDT)
  4. *
  5. * Copyright (C) 2016 Intel Corporation
  6. *
  7. * Author: Fenghua Yu <fenghua.yu@intel.com>
  8. *
  9. * More information about RDT be found in the Intel (R) x86 Architecture
  10. * Software Developer Manual.
  11. */
  12. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  13. #include <linux/cpu.h>
  14. #include <linux/debugfs.h>
  15. #include <linux/fs.h>
  16. #include <linux/fs_parser.h>
  17. #include <linux/sysfs.h>
  18. #include <linux/kernfs.h>
  19. #include <linux/once.h>
  20. #include <linux/resctrl.h>
  21. #include <linux/seq_buf.h>
  22. #include <linux/seq_file.h>
  23. #include <linux/sched/task.h>
  24. #include <linux/slab.h>
  25. #include <linux/user_namespace.h>
  26. #include <uapi/linux/magic.h>
  27. #include "internal.h"
  28. /* Mutex to protect rdtgroup access. */
  29. DEFINE_MUTEX(rdtgroup_mutex);
  30. static struct kernfs_root *rdt_root;
  31. struct rdtgroup rdtgroup_default;
  32. LIST_HEAD(rdt_all_groups);
  33. /* list of entries for the schemata file */
  34. LIST_HEAD(resctrl_schema_all);
  35. /*
  36. * List of struct mon_data containing private data of event files for use by
  37. * rdtgroup_mondata_show(). Protected by rdtgroup_mutex.
  38. */
  39. static LIST_HEAD(mon_data_kn_priv_list);
  40. /* The filesystem can only be mounted once. */
  41. bool resctrl_mounted;
  42. /* Kernel fs node for "info" directory under root */
  43. static struct kernfs_node *kn_info;
  44. /* Kernel fs node for "mon_groups" directory under root */
  45. static struct kernfs_node *kn_mongrp;
  46. /* Kernel fs node for "mon_data" directory under root */
  47. static struct kernfs_node *kn_mondata;
  48. /*
  49. * Used to store the max resource name width to display the schemata names in
  50. * a tabular format.
  51. */
  52. int max_name_width;
  53. static struct seq_buf last_cmd_status;
  54. static char last_cmd_status_buf[512];
  55. static int rdtgroup_setup_root(struct rdt_fs_context *ctx);
  56. static void rdtgroup_destroy_root(void);
  57. struct dentry *debugfs_resctrl;
  58. /*
  59. * Memory bandwidth monitoring event to use for the default CTRL_MON group
  60. * and each new CTRL_MON group created by the user. Only relevant when
  61. * the filesystem is mounted with the "mba_MBps" option so it does not
  62. * matter that it remains uninitialized on systems that do not support
  63. * the "mba_MBps" option.
  64. */
  65. enum resctrl_event_id mba_mbps_default_event;
  66. static bool resctrl_debug;
  67. void rdt_last_cmd_clear(void)
  68. {
  69. lockdep_assert_held(&rdtgroup_mutex);
  70. seq_buf_clear(&last_cmd_status);
  71. }
  72. void rdt_last_cmd_puts(const char *s)
  73. {
  74. lockdep_assert_held(&rdtgroup_mutex);
  75. seq_buf_puts(&last_cmd_status, s);
  76. }
  77. void rdt_last_cmd_printf(const char *fmt, ...)
  78. {
  79. va_list ap;
  80. va_start(ap, fmt);
  81. lockdep_assert_held(&rdtgroup_mutex);
  82. seq_buf_vprintf(&last_cmd_status, fmt, ap);
  83. va_end(ap);
  84. }
  85. void rdt_staged_configs_clear(void)
  86. {
  87. struct rdt_ctrl_domain *dom;
  88. struct rdt_resource *r;
  89. lockdep_assert_held(&rdtgroup_mutex);
  90. for_each_alloc_capable_rdt_resource(r) {
  91. list_for_each_entry(dom, &r->ctrl_domains, hdr.list)
  92. memset(dom->staged_config, 0, sizeof(dom->staged_config));
  93. }
  94. }
  95. static bool resctrl_is_mbm_enabled(void)
  96. {
  97. return (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID) ||
  98. resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID));
  99. }
  100. /*
  101. * Trivial allocator for CLOSIDs. Use BITMAP APIs to manipulate a bitmap
  102. * of free CLOSIDs.
  103. *
  104. * Using a global CLOSID across all resources has some advantages and
  105. * some drawbacks:
  106. * + We can simply set current's closid to assign a task to a resource
  107. * group.
  108. * + Context switch code can avoid extra memory references deciding which
  109. * CLOSID to load into the PQR_ASSOC MSR
  110. * - We give up some options in configuring resource groups across multi-socket
  111. * systems.
  112. * - Our choices on how to configure each resource become progressively more
  113. * limited as the number of resources grows.
  114. */
  115. static unsigned long *closid_free_map;
  116. static int closid_free_map_len;
  117. int closids_supported(void)
  118. {
  119. return closid_free_map_len;
  120. }
  121. static int closid_init(void)
  122. {
  123. struct resctrl_schema *s;
  124. u32 rdt_min_closid = ~0;
  125. /* Monitor only platforms still call closid_init() */
  126. if (list_empty(&resctrl_schema_all))
  127. return 0;
  128. /* Compute rdt_min_closid across all resources */
  129. list_for_each_entry(s, &resctrl_schema_all, list)
  130. rdt_min_closid = min(rdt_min_closid, s->num_closid);
  131. closid_free_map = bitmap_alloc(rdt_min_closid, GFP_KERNEL);
  132. if (!closid_free_map)
  133. return -ENOMEM;
  134. bitmap_fill(closid_free_map, rdt_min_closid);
  135. /* RESCTRL_RESERVED_CLOSID is always reserved for the default group */
  136. __clear_bit(RESCTRL_RESERVED_CLOSID, closid_free_map);
  137. closid_free_map_len = rdt_min_closid;
  138. return 0;
  139. }
  140. static void closid_exit(void)
  141. {
  142. bitmap_free(closid_free_map);
  143. closid_free_map = NULL;
  144. }
  145. static int closid_alloc(void)
  146. {
  147. int cleanest_closid;
  148. u32 closid;
  149. lockdep_assert_held(&rdtgroup_mutex);
  150. if (IS_ENABLED(CONFIG_RESCTRL_RMID_DEPENDS_ON_CLOSID) &&
  151. resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) {
  152. cleanest_closid = resctrl_find_cleanest_closid();
  153. if (cleanest_closid < 0)
  154. return cleanest_closid;
  155. closid = cleanest_closid;
  156. } else {
  157. closid = find_first_bit(closid_free_map, closid_free_map_len);
  158. if (closid == closid_free_map_len)
  159. return -ENOSPC;
  160. }
  161. __clear_bit(closid, closid_free_map);
  162. return closid;
  163. }
  164. void closid_free(int closid)
  165. {
  166. lockdep_assert_held(&rdtgroup_mutex);
  167. __set_bit(closid, closid_free_map);
  168. }
  169. /**
  170. * closid_allocated - test if provided closid is in use
  171. * @closid: closid to be tested
  172. *
  173. * Return: true if @closid is currently associated with a resource group,
  174. * false if @closid is free
  175. */
  176. bool closid_allocated(unsigned int closid)
  177. {
  178. lockdep_assert_held(&rdtgroup_mutex);
  179. return !test_bit(closid, closid_free_map);
  180. }
  181. bool closid_alloc_fixed(u32 closid)
  182. {
  183. return __test_and_clear_bit(closid, closid_free_map);
  184. }
  185. /**
  186. * rdtgroup_mode_by_closid - Return mode of resource group with closid
  187. * @closid: closid if the resource group
  188. *
  189. * Each resource group is associated with a @closid. Here the mode
  190. * of a resource group can be queried by searching for it using its closid.
  191. *
  192. * Return: mode as &enum rdtgrp_mode of resource group with closid @closid
  193. */
  194. enum rdtgrp_mode rdtgroup_mode_by_closid(int closid)
  195. {
  196. struct rdtgroup *rdtgrp;
  197. list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
  198. if (rdtgrp->closid == closid)
  199. return rdtgrp->mode;
  200. }
  201. return RDT_NUM_MODES;
  202. }
  203. static const char * const rdt_mode_str[] = {
  204. [RDT_MODE_SHAREABLE] = "shareable",
  205. [RDT_MODE_EXCLUSIVE] = "exclusive",
  206. [RDT_MODE_PSEUDO_LOCKSETUP] = "pseudo-locksetup",
  207. [RDT_MODE_PSEUDO_LOCKED] = "pseudo-locked",
  208. };
  209. /**
  210. * rdtgroup_mode_str - Return the string representation of mode
  211. * @mode: the resource group mode as &enum rdtgroup_mode
  212. *
  213. * Return: string representation of valid mode, "unknown" otherwise
  214. */
  215. static const char *rdtgroup_mode_str(enum rdtgrp_mode mode)
  216. {
  217. if (mode < RDT_MODE_SHAREABLE || mode >= RDT_NUM_MODES)
  218. return "unknown";
  219. return rdt_mode_str[mode];
  220. }
  221. /* set uid and gid of rdtgroup dirs and files to that of the creator */
  222. static int rdtgroup_kn_set_ugid(struct kernfs_node *kn)
  223. {
  224. struct iattr iattr = { .ia_valid = ATTR_UID | ATTR_GID,
  225. .ia_uid = current_fsuid(),
  226. .ia_gid = current_fsgid(), };
  227. if (uid_eq(iattr.ia_uid, GLOBAL_ROOT_UID) &&
  228. gid_eq(iattr.ia_gid, GLOBAL_ROOT_GID))
  229. return 0;
  230. return kernfs_setattr(kn, &iattr);
  231. }
  232. static int rdtgroup_add_file(struct kernfs_node *parent_kn, struct rftype *rft)
  233. {
  234. struct kernfs_node *kn;
  235. int ret;
  236. kn = __kernfs_create_file(parent_kn, rft->name, rft->mode,
  237. GLOBAL_ROOT_UID, GLOBAL_ROOT_GID,
  238. 0, rft->kf_ops, rft, NULL, NULL);
  239. if (IS_ERR(kn))
  240. return PTR_ERR(kn);
  241. ret = rdtgroup_kn_set_ugid(kn);
  242. if (ret) {
  243. kernfs_remove(kn);
  244. return ret;
  245. }
  246. return 0;
  247. }
  248. static int rdtgroup_seqfile_show(struct seq_file *m, void *arg)
  249. {
  250. struct kernfs_open_file *of = m->private;
  251. struct rftype *rft = of->kn->priv;
  252. if (rft->seq_show)
  253. return rft->seq_show(of, m, arg);
  254. return 0;
  255. }
  256. static ssize_t rdtgroup_file_write(struct kernfs_open_file *of, char *buf,
  257. size_t nbytes, loff_t off)
  258. {
  259. struct rftype *rft = of->kn->priv;
  260. if (rft->write)
  261. return rft->write(of, buf, nbytes, off);
  262. return -EINVAL;
  263. }
  264. static const struct kernfs_ops rdtgroup_kf_single_ops = {
  265. .atomic_write_len = PAGE_SIZE,
  266. .write = rdtgroup_file_write,
  267. .seq_show = rdtgroup_seqfile_show,
  268. };
  269. static const struct kernfs_ops kf_mondata_ops = {
  270. .atomic_write_len = PAGE_SIZE,
  271. .seq_show = rdtgroup_mondata_show,
  272. };
  273. static bool is_cpu_list(struct kernfs_open_file *of)
  274. {
  275. struct rftype *rft = of->kn->priv;
  276. return rft->flags & RFTYPE_FLAGS_CPUS_LIST;
  277. }
  278. static int rdtgroup_cpus_show(struct kernfs_open_file *of,
  279. struct seq_file *s, void *v)
  280. {
  281. struct rdtgroup *rdtgrp;
  282. struct cpumask *mask;
  283. int ret = 0;
  284. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  285. if (rdtgrp) {
  286. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  287. if (!rdtgrp->plr->d) {
  288. rdt_last_cmd_clear();
  289. rdt_last_cmd_puts("Cache domain offline\n");
  290. ret = -ENODEV;
  291. } else {
  292. mask = &rdtgrp->plr->d->hdr.cpu_mask;
  293. seq_printf(s, is_cpu_list(of) ?
  294. "%*pbl\n" : "%*pb\n",
  295. cpumask_pr_args(mask));
  296. }
  297. } else {
  298. seq_printf(s, is_cpu_list(of) ? "%*pbl\n" : "%*pb\n",
  299. cpumask_pr_args(&rdtgrp->cpu_mask));
  300. }
  301. } else {
  302. ret = -ENOENT;
  303. }
  304. rdtgroup_kn_unlock(of->kn);
  305. return ret;
  306. }
  307. /*
  308. * Update the PGR_ASSOC MSR on all cpus in @cpu_mask,
  309. *
  310. * Per task closids/rmids must have been set up before calling this function.
  311. * @r may be NULL.
  312. */
  313. static void
  314. update_closid_rmid(const struct cpumask *cpu_mask, struct rdtgroup *r)
  315. {
  316. struct resctrl_cpu_defaults defaults, *p = NULL;
  317. if (r) {
  318. defaults.closid = r->closid;
  319. defaults.rmid = r->mon.rmid;
  320. p = &defaults;
  321. }
  322. on_each_cpu_mask(cpu_mask, resctrl_arch_sync_cpu_closid_rmid, p, 1);
  323. }
  324. static int cpus_mon_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
  325. cpumask_var_t tmpmask)
  326. {
  327. struct rdtgroup *prgrp = rdtgrp->mon.parent, *crgrp;
  328. struct list_head *head;
  329. /* Check whether cpus belong to parent ctrl group */
  330. cpumask_andnot(tmpmask, newmask, &prgrp->cpu_mask);
  331. if (!cpumask_empty(tmpmask)) {
  332. rdt_last_cmd_puts("Can only add CPUs to mongroup that belong to parent\n");
  333. return -EINVAL;
  334. }
  335. /* Check whether cpus are dropped from this group */
  336. cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
  337. if (!cpumask_empty(tmpmask)) {
  338. /* Give any dropped cpus to parent rdtgroup */
  339. cpumask_or(&prgrp->cpu_mask, &prgrp->cpu_mask, tmpmask);
  340. update_closid_rmid(tmpmask, prgrp);
  341. }
  342. /*
  343. * If we added cpus, remove them from previous group that owned them
  344. * and update per-cpu rmid
  345. */
  346. cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
  347. if (!cpumask_empty(tmpmask)) {
  348. head = &prgrp->mon.crdtgrp_list;
  349. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  350. if (crgrp == rdtgrp)
  351. continue;
  352. cpumask_andnot(&crgrp->cpu_mask, &crgrp->cpu_mask,
  353. tmpmask);
  354. }
  355. update_closid_rmid(tmpmask, rdtgrp);
  356. }
  357. /* Done pushing/pulling - update this group with new mask */
  358. cpumask_copy(&rdtgrp->cpu_mask, newmask);
  359. return 0;
  360. }
  361. static void cpumask_rdtgrp_clear(struct rdtgroup *r, struct cpumask *m)
  362. {
  363. struct rdtgroup *crgrp;
  364. cpumask_andnot(&r->cpu_mask, &r->cpu_mask, m);
  365. /* update the child mon group masks as well*/
  366. list_for_each_entry(crgrp, &r->mon.crdtgrp_list, mon.crdtgrp_list)
  367. cpumask_and(&crgrp->cpu_mask, &r->cpu_mask, &crgrp->cpu_mask);
  368. }
  369. static int cpus_ctrl_write(struct rdtgroup *rdtgrp, cpumask_var_t newmask,
  370. cpumask_var_t tmpmask, cpumask_var_t tmpmask1)
  371. {
  372. struct rdtgroup *r, *crgrp;
  373. struct list_head *head;
  374. /* Check whether cpus are dropped from this group */
  375. cpumask_andnot(tmpmask, &rdtgrp->cpu_mask, newmask);
  376. if (!cpumask_empty(tmpmask)) {
  377. /* Can't drop from default group */
  378. if (rdtgrp == &rdtgroup_default) {
  379. rdt_last_cmd_puts("Can't drop CPUs from default group\n");
  380. return -EINVAL;
  381. }
  382. /* Give any dropped cpus to rdtgroup_default */
  383. cpumask_or(&rdtgroup_default.cpu_mask,
  384. &rdtgroup_default.cpu_mask, tmpmask);
  385. update_closid_rmid(tmpmask, &rdtgroup_default);
  386. }
  387. /*
  388. * If we added cpus, remove them from previous group and
  389. * the prev group's child groups that owned them
  390. * and update per-cpu closid/rmid.
  391. */
  392. cpumask_andnot(tmpmask, newmask, &rdtgrp->cpu_mask);
  393. if (!cpumask_empty(tmpmask)) {
  394. list_for_each_entry(r, &rdt_all_groups, rdtgroup_list) {
  395. if (r == rdtgrp)
  396. continue;
  397. cpumask_and(tmpmask1, &r->cpu_mask, tmpmask);
  398. if (!cpumask_empty(tmpmask1))
  399. cpumask_rdtgrp_clear(r, tmpmask1);
  400. }
  401. update_closid_rmid(tmpmask, rdtgrp);
  402. }
  403. /* Done pushing/pulling - update this group with new mask */
  404. cpumask_copy(&rdtgrp->cpu_mask, newmask);
  405. /*
  406. * Clear child mon group masks since there is a new parent mask
  407. * now and update the rmid for the cpus the child lost.
  408. */
  409. head = &rdtgrp->mon.crdtgrp_list;
  410. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  411. cpumask_and(tmpmask, &rdtgrp->cpu_mask, &crgrp->cpu_mask);
  412. update_closid_rmid(tmpmask, rdtgrp);
  413. cpumask_clear(&crgrp->cpu_mask);
  414. }
  415. return 0;
  416. }
  417. static ssize_t rdtgroup_cpus_write(struct kernfs_open_file *of,
  418. char *buf, size_t nbytes, loff_t off)
  419. {
  420. cpumask_var_t tmpmask, newmask, tmpmask1;
  421. struct rdtgroup *rdtgrp;
  422. int ret;
  423. if (!buf)
  424. return -EINVAL;
  425. if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
  426. return -ENOMEM;
  427. if (!zalloc_cpumask_var(&newmask, GFP_KERNEL)) {
  428. free_cpumask_var(tmpmask);
  429. return -ENOMEM;
  430. }
  431. if (!zalloc_cpumask_var(&tmpmask1, GFP_KERNEL)) {
  432. free_cpumask_var(tmpmask);
  433. free_cpumask_var(newmask);
  434. return -ENOMEM;
  435. }
  436. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  437. if (!rdtgrp) {
  438. ret = -ENOENT;
  439. goto unlock;
  440. }
  441. rdt_last_cmd_clear();
  442. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
  443. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  444. ret = -EINVAL;
  445. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  446. goto unlock;
  447. }
  448. if (is_cpu_list(of))
  449. ret = cpulist_parse(buf, newmask);
  450. else
  451. ret = cpumask_parse(buf, newmask);
  452. if (ret) {
  453. rdt_last_cmd_puts("Bad CPU list/mask\n");
  454. goto unlock;
  455. }
  456. /* check that user didn't specify any offline cpus */
  457. cpumask_andnot(tmpmask, newmask, cpu_online_mask);
  458. if (!cpumask_empty(tmpmask)) {
  459. ret = -EINVAL;
  460. rdt_last_cmd_puts("Can only assign online CPUs\n");
  461. goto unlock;
  462. }
  463. if (rdtgrp->type == RDTCTRL_GROUP)
  464. ret = cpus_ctrl_write(rdtgrp, newmask, tmpmask, tmpmask1);
  465. else if (rdtgrp->type == RDTMON_GROUP)
  466. ret = cpus_mon_write(rdtgrp, newmask, tmpmask);
  467. else
  468. ret = -EINVAL;
  469. unlock:
  470. rdtgroup_kn_unlock(of->kn);
  471. free_cpumask_var(tmpmask);
  472. free_cpumask_var(newmask);
  473. free_cpumask_var(tmpmask1);
  474. return ret ?: nbytes;
  475. }
  476. /**
  477. * rdtgroup_remove - the helper to remove resource group safely
  478. * @rdtgrp: resource group to remove
  479. *
  480. * On resource group creation via a mkdir, an extra kernfs_node reference is
  481. * taken to ensure that the rdtgroup structure remains accessible for the
  482. * rdtgroup_kn_unlock() calls where it is removed.
  483. *
  484. * Drop the extra reference here, then free the rdtgroup structure.
  485. *
  486. * Return: void
  487. */
  488. static void rdtgroup_remove(struct rdtgroup *rdtgrp)
  489. {
  490. kernfs_put(rdtgrp->kn);
  491. kfree(rdtgrp);
  492. }
  493. static void _update_task_closid_rmid(void *task)
  494. {
  495. /*
  496. * If the task is still current on this CPU, update PQR_ASSOC MSR.
  497. * Otherwise, the MSR is updated when the task is scheduled in.
  498. */
  499. if (task == current)
  500. resctrl_arch_sched_in(task);
  501. }
  502. static void update_task_closid_rmid(struct task_struct *t)
  503. {
  504. if (IS_ENABLED(CONFIG_SMP) && task_curr(t))
  505. smp_call_function_single(task_cpu(t), _update_task_closid_rmid, t, 1);
  506. else
  507. _update_task_closid_rmid(t);
  508. }
  509. static bool task_in_rdtgroup(struct task_struct *tsk, struct rdtgroup *rdtgrp)
  510. {
  511. u32 closid, rmid = rdtgrp->mon.rmid;
  512. if (rdtgrp->type == RDTCTRL_GROUP)
  513. closid = rdtgrp->closid;
  514. else if (rdtgrp->type == RDTMON_GROUP)
  515. closid = rdtgrp->mon.parent->closid;
  516. else
  517. return false;
  518. return resctrl_arch_match_closid(tsk, closid) &&
  519. resctrl_arch_match_rmid(tsk, closid, rmid);
  520. }
  521. static int __rdtgroup_move_task(struct task_struct *tsk,
  522. struct rdtgroup *rdtgrp)
  523. {
  524. /* If the task is already in rdtgrp, no need to move the task. */
  525. if (task_in_rdtgroup(tsk, rdtgrp))
  526. return 0;
  527. /*
  528. * Set the task's closid/rmid before the PQR_ASSOC MSR can be
  529. * updated by them.
  530. *
  531. * For ctrl_mon groups, move both closid and rmid.
  532. * For monitor groups, can move the tasks only from
  533. * their parent CTRL group.
  534. */
  535. if (rdtgrp->type == RDTMON_GROUP &&
  536. !resctrl_arch_match_closid(tsk, rdtgrp->mon.parent->closid)) {
  537. rdt_last_cmd_puts("Can't move task to different control group\n");
  538. return -EINVAL;
  539. }
  540. if (rdtgrp->type == RDTMON_GROUP)
  541. resctrl_arch_set_closid_rmid(tsk, rdtgrp->mon.parent->closid,
  542. rdtgrp->mon.rmid);
  543. else
  544. resctrl_arch_set_closid_rmid(tsk, rdtgrp->closid,
  545. rdtgrp->mon.rmid);
  546. /*
  547. * Ensure the task's closid and rmid are written before determining if
  548. * the task is current that will decide if it will be interrupted.
  549. * This pairs with the full barrier between the rq->curr update and
  550. * resctrl_arch_sched_in() during context switch.
  551. */
  552. smp_mb();
  553. /*
  554. * By now, the task's closid and rmid are set. If the task is current
  555. * on a CPU, the PQR_ASSOC MSR needs to be updated to make the resource
  556. * group go into effect. If the task is not current, the MSR will be
  557. * updated when the task is scheduled in.
  558. */
  559. update_task_closid_rmid(tsk);
  560. return 0;
  561. }
  562. static bool is_closid_match(struct task_struct *t, struct rdtgroup *r)
  563. {
  564. return (resctrl_arch_alloc_capable() && (r->type == RDTCTRL_GROUP) &&
  565. resctrl_arch_match_closid(t, r->closid));
  566. }
  567. static bool is_rmid_match(struct task_struct *t, struct rdtgroup *r)
  568. {
  569. return (resctrl_arch_mon_capable() && (r->type == RDTMON_GROUP) &&
  570. resctrl_arch_match_rmid(t, r->mon.parent->closid,
  571. r->mon.rmid));
  572. }
  573. /**
  574. * rdtgroup_tasks_assigned - Test if tasks have been assigned to resource group
  575. * @r: Resource group
  576. *
  577. * Return: 1 if tasks have been assigned to @r, 0 otherwise
  578. */
  579. int rdtgroup_tasks_assigned(struct rdtgroup *r)
  580. {
  581. struct task_struct *p, *t;
  582. int ret = 0;
  583. lockdep_assert_held(&rdtgroup_mutex);
  584. rcu_read_lock();
  585. for_each_process_thread(p, t) {
  586. if (is_closid_match(t, r) || is_rmid_match(t, r)) {
  587. ret = 1;
  588. break;
  589. }
  590. }
  591. rcu_read_unlock();
  592. return ret;
  593. }
  594. static int rdtgroup_task_write_permission(struct task_struct *task,
  595. struct kernfs_open_file *of)
  596. {
  597. const struct cred *tcred = get_task_cred(task);
  598. const struct cred *cred = current_cred();
  599. int ret = 0;
  600. /*
  601. * Even if we're attaching all tasks in the thread group, we only
  602. * need to check permissions on one of them.
  603. */
  604. if (!uid_eq(cred->euid, GLOBAL_ROOT_UID) &&
  605. !uid_eq(cred->euid, tcred->uid) &&
  606. !uid_eq(cred->euid, tcred->suid)) {
  607. rdt_last_cmd_printf("No permission to move task %d\n", task->pid);
  608. ret = -EPERM;
  609. }
  610. put_cred(tcred);
  611. return ret;
  612. }
  613. static int rdtgroup_move_task(pid_t pid, struct rdtgroup *rdtgrp,
  614. struct kernfs_open_file *of)
  615. {
  616. struct task_struct *tsk;
  617. int ret;
  618. rcu_read_lock();
  619. if (pid) {
  620. tsk = find_task_by_vpid(pid);
  621. if (!tsk) {
  622. rcu_read_unlock();
  623. rdt_last_cmd_printf("No task %d\n", pid);
  624. return -ESRCH;
  625. }
  626. } else {
  627. tsk = current;
  628. }
  629. get_task_struct(tsk);
  630. rcu_read_unlock();
  631. ret = rdtgroup_task_write_permission(tsk, of);
  632. if (!ret)
  633. ret = __rdtgroup_move_task(tsk, rdtgrp);
  634. put_task_struct(tsk);
  635. return ret;
  636. }
  637. static ssize_t rdtgroup_tasks_write(struct kernfs_open_file *of,
  638. char *buf, size_t nbytes, loff_t off)
  639. {
  640. struct rdtgroup *rdtgrp;
  641. char *pid_str;
  642. int ret = 0;
  643. pid_t pid;
  644. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  645. if (!rdtgrp) {
  646. rdtgroup_kn_unlock(of->kn);
  647. return -ENOENT;
  648. }
  649. rdt_last_cmd_clear();
  650. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED ||
  651. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  652. ret = -EINVAL;
  653. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  654. goto unlock;
  655. }
  656. while (buf && buf[0] != '\0' && buf[0] != '\n') {
  657. pid_str = strim(strsep(&buf, ","));
  658. if (kstrtoint(pid_str, 0, &pid)) {
  659. rdt_last_cmd_printf("Task list parsing error pid %s\n", pid_str);
  660. ret = -EINVAL;
  661. break;
  662. }
  663. if (pid < 0) {
  664. rdt_last_cmd_printf("Invalid pid %d\n", pid);
  665. ret = -EINVAL;
  666. break;
  667. }
  668. ret = rdtgroup_move_task(pid, rdtgrp, of);
  669. if (ret) {
  670. rdt_last_cmd_printf("Error while processing task %d\n", pid);
  671. break;
  672. }
  673. }
  674. unlock:
  675. rdtgroup_kn_unlock(of->kn);
  676. return ret ?: nbytes;
  677. }
  678. static void show_rdt_tasks(struct rdtgroup *r, struct seq_file *s)
  679. {
  680. struct task_struct *p, *t;
  681. pid_t pid;
  682. rcu_read_lock();
  683. for_each_process_thread(p, t) {
  684. if (is_closid_match(t, r) || is_rmid_match(t, r)) {
  685. pid = task_pid_vnr(t);
  686. if (pid)
  687. seq_printf(s, "%d\n", pid);
  688. }
  689. }
  690. rcu_read_unlock();
  691. }
  692. static int rdtgroup_tasks_show(struct kernfs_open_file *of,
  693. struct seq_file *s, void *v)
  694. {
  695. struct rdtgroup *rdtgrp;
  696. int ret = 0;
  697. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  698. if (rdtgrp)
  699. show_rdt_tasks(rdtgrp, s);
  700. else
  701. ret = -ENOENT;
  702. rdtgroup_kn_unlock(of->kn);
  703. return ret;
  704. }
  705. static int rdtgroup_closid_show(struct kernfs_open_file *of,
  706. struct seq_file *s, void *v)
  707. {
  708. struct rdtgroup *rdtgrp;
  709. int ret = 0;
  710. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  711. if (rdtgrp)
  712. seq_printf(s, "%u\n", rdtgrp->closid);
  713. else
  714. ret = -ENOENT;
  715. rdtgroup_kn_unlock(of->kn);
  716. return ret;
  717. }
  718. static int rdtgroup_rmid_show(struct kernfs_open_file *of,
  719. struct seq_file *s, void *v)
  720. {
  721. struct rdtgroup *rdtgrp;
  722. int ret = 0;
  723. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  724. if (rdtgrp)
  725. seq_printf(s, "%u\n", rdtgrp->mon.rmid);
  726. else
  727. ret = -ENOENT;
  728. rdtgroup_kn_unlock(of->kn);
  729. return ret;
  730. }
  731. #ifdef CONFIG_PROC_CPU_RESCTRL
  732. /*
  733. * A task can only be part of one resctrl control group and of one monitor
  734. * group which is associated to that control group.
  735. *
  736. * 1) res:
  737. * mon:
  738. *
  739. * resctrl is not available.
  740. *
  741. * 2) res:/
  742. * mon:
  743. *
  744. * Task is part of the root resctrl control group, and it is not associated
  745. * to any monitor group.
  746. *
  747. * 3) res:/
  748. * mon:mon0
  749. *
  750. * Task is part of the root resctrl control group and monitor group mon0.
  751. *
  752. * 4) res:group0
  753. * mon:
  754. *
  755. * Task is part of resctrl control group group0, and it is not associated
  756. * to any monitor group.
  757. *
  758. * 5) res:group0
  759. * mon:mon1
  760. *
  761. * Task is part of resctrl control group group0 and monitor group mon1.
  762. */
  763. int proc_resctrl_show(struct seq_file *s, struct pid_namespace *ns,
  764. struct pid *pid, struct task_struct *tsk)
  765. {
  766. struct rdtgroup *rdtg;
  767. int ret = 0;
  768. mutex_lock(&rdtgroup_mutex);
  769. /* Return empty if resctrl has not been mounted. */
  770. if (!resctrl_mounted) {
  771. seq_puts(s, "res:\nmon:\n");
  772. goto unlock;
  773. }
  774. list_for_each_entry(rdtg, &rdt_all_groups, rdtgroup_list) {
  775. struct rdtgroup *crg;
  776. /*
  777. * Task information is only relevant for shareable
  778. * and exclusive groups.
  779. */
  780. if (rdtg->mode != RDT_MODE_SHAREABLE &&
  781. rdtg->mode != RDT_MODE_EXCLUSIVE)
  782. continue;
  783. if (!resctrl_arch_match_closid(tsk, rdtg->closid))
  784. continue;
  785. seq_printf(s, "res:%s%s\n", (rdtg == &rdtgroup_default) ? "/" : "",
  786. rdt_kn_name(rdtg->kn));
  787. seq_puts(s, "mon:");
  788. list_for_each_entry(crg, &rdtg->mon.crdtgrp_list,
  789. mon.crdtgrp_list) {
  790. if (!resctrl_arch_match_rmid(tsk, crg->mon.parent->closid,
  791. crg->mon.rmid))
  792. continue;
  793. seq_printf(s, "%s", rdt_kn_name(crg->kn));
  794. break;
  795. }
  796. seq_putc(s, '\n');
  797. goto unlock;
  798. }
  799. /*
  800. * The above search should succeed. Otherwise return
  801. * with an error.
  802. */
  803. ret = -ENOENT;
  804. unlock:
  805. mutex_unlock(&rdtgroup_mutex);
  806. return ret;
  807. }
  808. #endif
  809. static int rdt_last_cmd_status_show(struct kernfs_open_file *of,
  810. struct seq_file *seq, void *v)
  811. {
  812. int len;
  813. mutex_lock(&rdtgroup_mutex);
  814. len = seq_buf_used(&last_cmd_status);
  815. if (len)
  816. seq_printf(seq, "%.*s", len, last_cmd_status_buf);
  817. else
  818. seq_puts(seq, "ok\n");
  819. mutex_unlock(&rdtgroup_mutex);
  820. return 0;
  821. }
  822. void *rdt_kn_parent_priv(struct kernfs_node *kn)
  823. {
  824. /*
  825. * The parent pointer is only valid within RCU section since it can be
  826. * replaced.
  827. */
  828. guard(rcu)();
  829. return rcu_dereference(kn->__parent)->priv;
  830. }
  831. static int rdt_num_closids_show(struct kernfs_open_file *of,
  832. struct seq_file *seq, void *v)
  833. {
  834. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  835. seq_printf(seq, "%u\n", s->num_closid);
  836. return 0;
  837. }
  838. static int rdt_default_ctrl_show(struct kernfs_open_file *of,
  839. struct seq_file *seq, void *v)
  840. {
  841. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  842. struct rdt_resource *r = s->res;
  843. seq_printf(seq, "%x\n", resctrl_get_default_ctrl(r));
  844. return 0;
  845. }
  846. static int rdt_min_cbm_bits_show(struct kernfs_open_file *of,
  847. struct seq_file *seq, void *v)
  848. {
  849. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  850. struct rdt_resource *r = s->res;
  851. seq_printf(seq, "%u\n", r->cache.min_cbm_bits);
  852. return 0;
  853. }
  854. static int rdt_shareable_bits_show(struct kernfs_open_file *of,
  855. struct seq_file *seq, void *v)
  856. {
  857. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  858. struct rdt_resource *r = s->res;
  859. seq_printf(seq, "%x\n", r->cache.shareable_bits);
  860. return 0;
  861. }
  862. /*
  863. * rdt_bit_usage_show - Display current usage of resources
  864. *
  865. * A domain is a shared resource that can now be allocated differently. Here
  866. * we display the current regions of the domain as an annotated bitmask.
  867. * For each domain of this resource its allocation bitmask
  868. * is annotated as below to indicate the current usage of the corresponding bit:
  869. * 0 - currently unused
  870. * X - currently available for sharing and used by software and hardware
  871. * H - currently used by hardware only but available for software use
  872. * S - currently used and shareable by software only
  873. * E - currently used exclusively by one resource group
  874. * P - currently pseudo-locked by one resource group
  875. */
  876. static int rdt_bit_usage_show(struct kernfs_open_file *of,
  877. struct seq_file *seq, void *v)
  878. {
  879. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  880. /*
  881. * Use unsigned long even though only 32 bits are used to ensure
  882. * test_bit() is used safely.
  883. */
  884. unsigned long sw_shareable = 0, hw_shareable = 0;
  885. unsigned long exclusive = 0, pseudo_locked = 0;
  886. struct rdt_resource *r = s->res;
  887. struct rdt_ctrl_domain *dom;
  888. int i, hwb, swb, excl, psl;
  889. enum rdtgrp_mode mode;
  890. bool sep = false;
  891. u32 ctrl_val;
  892. cpus_read_lock();
  893. mutex_lock(&rdtgroup_mutex);
  894. list_for_each_entry(dom, &r->ctrl_domains, hdr.list) {
  895. if (sep)
  896. seq_putc(seq, ';');
  897. hw_shareable = r->cache.shareable_bits;
  898. sw_shareable = 0;
  899. exclusive = 0;
  900. seq_printf(seq, "%d=", dom->hdr.id);
  901. for (i = 0; i < closids_supported(); i++) {
  902. if (!closid_allocated(i) ||
  903. (resctrl_arch_get_io_alloc_enabled(r) &&
  904. i == resctrl_io_alloc_closid(r)))
  905. continue;
  906. ctrl_val = resctrl_arch_get_config(r, dom, i,
  907. s->conf_type);
  908. mode = rdtgroup_mode_by_closid(i);
  909. switch (mode) {
  910. case RDT_MODE_SHAREABLE:
  911. sw_shareable |= ctrl_val;
  912. break;
  913. case RDT_MODE_EXCLUSIVE:
  914. exclusive |= ctrl_val;
  915. break;
  916. case RDT_MODE_PSEUDO_LOCKSETUP:
  917. /*
  918. * RDT_MODE_PSEUDO_LOCKSETUP is possible
  919. * here but not included since the CBM
  920. * associated with this CLOSID in this mode
  921. * is not initialized and no task or cpu can be
  922. * assigned this CLOSID.
  923. */
  924. break;
  925. case RDT_MODE_PSEUDO_LOCKED:
  926. case RDT_NUM_MODES:
  927. WARN(1,
  928. "invalid mode for closid %d\n", i);
  929. break;
  930. }
  931. }
  932. /*
  933. * When the "io_alloc" feature is enabled, a portion of the cache
  934. * is configured for shared use between hardware and software.
  935. * Also, when CDP is enabled the CBMs of CDP_CODE and CDP_DATA
  936. * resources are kept in sync. So, the CBMs for "io_alloc" can
  937. * be accessed through either resource.
  938. */
  939. if (resctrl_arch_get_io_alloc_enabled(r)) {
  940. ctrl_val = resctrl_arch_get_config(r, dom,
  941. resctrl_io_alloc_closid(r),
  942. s->conf_type);
  943. hw_shareable |= ctrl_val;
  944. }
  945. for (i = r->cache.cbm_len - 1; i >= 0; i--) {
  946. pseudo_locked = dom->plr ? dom->plr->cbm : 0;
  947. hwb = test_bit(i, &hw_shareable);
  948. swb = test_bit(i, &sw_shareable);
  949. excl = test_bit(i, &exclusive);
  950. psl = test_bit(i, &pseudo_locked);
  951. if (hwb && swb)
  952. seq_putc(seq, 'X');
  953. else if (hwb && !swb)
  954. seq_putc(seq, 'H');
  955. else if (!hwb && swb)
  956. seq_putc(seq, 'S');
  957. else if (excl)
  958. seq_putc(seq, 'E');
  959. else if (psl)
  960. seq_putc(seq, 'P');
  961. else /* Unused bits remain */
  962. seq_putc(seq, '0');
  963. }
  964. sep = true;
  965. }
  966. seq_putc(seq, '\n');
  967. mutex_unlock(&rdtgroup_mutex);
  968. cpus_read_unlock();
  969. return 0;
  970. }
  971. static int rdt_min_bw_show(struct kernfs_open_file *of,
  972. struct seq_file *seq, void *v)
  973. {
  974. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  975. struct rdt_resource *r = s->res;
  976. seq_printf(seq, "%u\n", r->membw.min_bw);
  977. return 0;
  978. }
  979. static int rdt_num_rmids_show(struct kernfs_open_file *of,
  980. struct seq_file *seq, void *v)
  981. {
  982. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  983. seq_printf(seq, "%u\n", r->mon.num_rmid);
  984. return 0;
  985. }
  986. static int rdt_mon_features_show(struct kernfs_open_file *of,
  987. struct seq_file *seq, void *v)
  988. {
  989. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  990. struct mon_evt *mevt;
  991. for_each_mon_event(mevt) {
  992. if (mevt->rid != r->rid || !mevt->enabled)
  993. continue;
  994. seq_printf(seq, "%s\n", mevt->name);
  995. if (mevt->configurable &&
  996. !resctrl_arch_mbm_cntr_assign_enabled(r))
  997. seq_printf(seq, "%s_config\n", mevt->name);
  998. }
  999. return 0;
  1000. }
  1001. static int rdt_bw_gran_show(struct kernfs_open_file *of,
  1002. struct seq_file *seq, void *v)
  1003. {
  1004. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  1005. struct rdt_resource *r = s->res;
  1006. seq_printf(seq, "%u\n", r->membw.bw_gran);
  1007. return 0;
  1008. }
  1009. static int rdt_delay_linear_show(struct kernfs_open_file *of,
  1010. struct seq_file *seq, void *v)
  1011. {
  1012. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  1013. struct rdt_resource *r = s->res;
  1014. seq_printf(seq, "%u\n", r->membw.delay_linear);
  1015. return 0;
  1016. }
  1017. static int max_threshold_occ_show(struct kernfs_open_file *of,
  1018. struct seq_file *seq, void *v)
  1019. {
  1020. seq_printf(seq, "%u\n", resctrl_rmid_realloc_threshold);
  1021. return 0;
  1022. }
  1023. static int rdt_thread_throttle_mode_show(struct kernfs_open_file *of,
  1024. struct seq_file *seq, void *v)
  1025. {
  1026. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  1027. struct rdt_resource *r = s->res;
  1028. switch (r->membw.throttle_mode) {
  1029. case THREAD_THROTTLE_PER_THREAD:
  1030. seq_puts(seq, "per-thread\n");
  1031. return 0;
  1032. case THREAD_THROTTLE_MAX:
  1033. seq_puts(seq, "max\n");
  1034. return 0;
  1035. case THREAD_THROTTLE_UNDEFINED:
  1036. seq_puts(seq, "undefined\n");
  1037. return 0;
  1038. }
  1039. WARN_ON_ONCE(1);
  1040. return 0;
  1041. }
  1042. static ssize_t max_threshold_occ_write(struct kernfs_open_file *of,
  1043. char *buf, size_t nbytes, loff_t off)
  1044. {
  1045. unsigned int bytes;
  1046. int ret;
  1047. ret = kstrtouint(buf, 0, &bytes);
  1048. if (ret)
  1049. return ret;
  1050. if (bytes > resctrl_rmid_realloc_limit)
  1051. return -EINVAL;
  1052. resctrl_rmid_realloc_threshold = resctrl_arch_round_mon_val(bytes);
  1053. return nbytes;
  1054. }
  1055. /*
  1056. * rdtgroup_mode_show - Display mode of this resource group
  1057. */
  1058. static int rdtgroup_mode_show(struct kernfs_open_file *of,
  1059. struct seq_file *s, void *v)
  1060. {
  1061. struct rdtgroup *rdtgrp;
  1062. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1063. if (!rdtgrp) {
  1064. rdtgroup_kn_unlock(of->kn);
  1065. return -ENOENT;
  1066. }
  1067. seq_printf(s, "%s\n", rdtgroup_mode_str(rdtgrp->mode));
  1068. rdtgroup_kn_unlock(of->kn);
  1069. return 0;
  1070. }
  1071. enum resctrl_conf_type resctrl_peer_type(enum resctrl_conf_type my_type)
  1072. {
  1073. switch (my_type) {
  1074. case CDP_CODE:
  1075. return CDP_DATA;
  1076. case CDP_DATA:
  1077. return CDP_CODE;
  1078. default:
  1079. case CDP_NONE:
  1080. return CDP_NONE;
  1081. }
  1082. }
  1083. static int rdt_has_sparse_bitmasks_show(struct kernfs_open_file *of,
  1084. struct seq_file *seq, void *v)
  1085. {
  1086. struct resctrl_schema *s = rdt_kn_parent_priv(of->kn);
  1087. struct rdt_resource *r = s->res;
  1088. seq_printf(seq, "%u\n", r->cache.arch_has_sparse_bitmasks);
  1089. return 0;
  1090. }
  1091. /**
  1092. * __rdtgroup_cbm_overlaps - Does CBM for intended closid overlap with other
  1093. * @r: Resource to which domain instance @d belongs.
  1094. * @d: The domain instance for which @closid is being tested.
  1095. * @cbm: Capacity bitmask being tested.
  1096. * @closid: Intended closid for @cbm.
  1097. * @type: CDP type of @r.
  1098. * @exclusive: Only check if overlaps with exclusive resource groups
  1099. *
  1100. * Checks if provided @cbm intended to be used for @closid on domain
  1101. * @d overlaps with any other closids or other hardware usage associated
  1102. * with this domain. If @exclusive is true then only overlaps with
  1103. * resource groups in exclusive mode will be considered. If @exclusive
  1104. * is false then overlaps with any resource group or hardware entities
  1105. * will be considered.
  1106. *
  1107. * @cbm is unsigned long, even if only 32 bits are used, to make the
  1108. * bitmap functions work correctly.
  1109. *
  1110. * Return: false if CBM does not overlap, true if it does.
  1111. */
  1112. static bool __rdtgroup_cbm_overlaps(struct rdt_resource *r, struct rdt_ctrl_domain *d,
  1113. unsigned long cbm, int closid,
  1114. enum resctrl_conf_type type, bool exclusive)
  1115. {
  1116. enum rdtgrp_mode mode;
  1117. unsigned long ctrl_b;
  1118. int i;
  1119. /* Check for any overlap with regions used by hardware directly */
  1120. if (!exclusive) {
  1121. ctrl_b = r->cache.shareable_bits;
  1122. if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len))
  1123. return true;
  1124. }
  1125. /* Check for overlap with other resource groups */
  1126. for (i = 0; i < closids_supported(); i++) {
  1127. ctrl_b = resctrl_arch_get_config(r, d, i, type);
  1128. mode = rdtgroup_mode_by_closid(i);
  1129. if (closid_allocated(i) && i != closid &&
  1130. mode != RDT_MODE_PSEUDO_LOCKSETUP) {
  1131. if (bitmap_intersects(&cbm, &ctrl_b, r->cache.cbm_len)) {
  1132. if (exclusive) {
  1133. if (mode == RDT_MODE_EXCLUSIVE)
  1134. return true;
  1135. continue;
  1136. }
  1137. return true;
  1138. }
  1139. }
  1140. }
  1141. return false;
  1142. }
  1143. /**
  1144. * rdtgroup_cbm_overlaps - Does CBM overlap with other use of hardware
  1145. * @s: Schema for the resource to which domain instance @d belongs.
  1146. * @d: The domain instance for which @closid is being tested.
  1147. * @cbm: Capacity bitmask being tested.
  1148. * @closid: Intended closid for @cbm.
  1149. * @exclusive: Only check if overlaps with exclusive resource groups
  1150. *
  1151. * Resources that can be allocated using a CBM can use the CBM to control
  1152. * the overlap of these allocations. rdtgroup_cmb_overlaps() is the test
  1153. * for overlap. Overlap test is not limited to the specific resource for
  1154. * which the CBM is intended though - when dealing with CDP resources that
  1155. * share the underlying hardware the overlap check should be performed on
  1156. * the CDP resource sharing the hardware also.
  1157. *
  1158. * Refer to description of __rdtgroup_cbm_overlaps() for the details of the
  1159. * overlap test.
  1160. *
  1161. * Return: true if CBM overlap detected, false if there is no overlap
  1162. */
  1163. bool rdtgroup_cbm_overlaps(struct resctrl_schema *s, struct rdt_ctrl_domain *d,
  1164. unsigned long cbm, int closid, bool exclusive)
  1165. {
  1166. enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
  1167. struct rdt_resource *r = s->res;
  1168. if (__rdtgroup_cbm_overlaps(r, d, cbm, closid, s->conf_type,
  1169. exclusive))
  1170. return true;
  1171. if (!resctrl_arch_get_cdp_enabled(r->rid))
  1172. return false;
  1173. return __rdtgroup_cbm_overlaps(r, d, cbm, closid, peer_type, exclusive);
  1174. }
  1175. /**
  1176. * rdtgroup_mode_test_exclusive - Test if this resource group can be exclusive
  1177. * @rdtgrp: Resource group identified through its closid.
  1178. *
  1179. * An exclusive resource group implies that there should be no sharing of
  1180. * its allocated resources. At the time this group is considered to be
  1181. * exclusive this test can determine if its current schemata supports this
  1182. * setting by testing for overlap with all other resource groups.
  1183. *
  1184. * Return: true if resource group can be exclusive, false if there is overlap
  1185. * with allocations of other resource groups and thus this resource group
  1186. * cannot be exclusive.
  1187. */
  1188. static bool rdtgroup_mode_test_exclusive(struct rdtgroup *rdtgrp)
  1189. {
  1190. int closid = rdtgrp->closid;
  1191. struct rdt_ctrl_domain *d;
  1192. struct resctrl_schema *s;
  1193. struct rdt_resource *r;
  1194. bool has_cache = false;
  1195. u32 ctrl;
  1196. /* Walking r->domains, ensure it can't race with cpuhp */
  1197. lockdep_assert_cpus_held();
  1198. list_for_each_entry(s, &resctrl_schema_all, list) {
  1199. r = s->res;
  1200. if (r->rid == RDT_RESOURCE_MBA || r->rid == RDT_RESOURCE_SMBA)
  1201. continue;
  1202. has_cache = true;
  1203. list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
  1204. ctrl = resctrl_arch_get_config(r, d, closid,
  1205. s->conf_type);
  1206. if (rdtgroup_cbm_overlaps(s, d, ctrl, closid, false)) {
  1207. rdt_last_cmd_puts("Schemata overlaps\n");
  1208. return false;
  1209. }
  1210. }
  1211. }
  1212. if (!has_cache) {
  1213. rdt_last_cmd_puts("Cannot be exclusive without CAT/CDP\n");
  1214. return false;
  1215. }
  1216. return true;
  1217. }
  1218. /*
  1219. * rdtgroup_mode_write - Modify the resource group's mode
  1220. */
  1221. static ssize_t rdtgroup_mode_write(struct kernfs_open_file *of,
  1222. char *buf, size_t nbytes, loff_t off)
  1223. {
  1224. struct rdtgroup *rdtgrp;
  1225. enum rdtgrp_mode mode;
  1226. int ret = 0;
  1227. /* Valid input requires a trailing newline */
  1228. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1229. return -EINVAL;
  1230. buf[nbytes - 1] = '\0';
  1231. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1232. if (!rdtgrp) {
  1233. rdtgroup_kn_unlock(of->kn);
  1234. return -ENOENT;
  1235. }
  1236. rdt_last_cmd_clear();
  1237. mode = rdtgrp->mode;
  1238. if ((!strcmp(buf, "shareable") && mode == RDT_MODE_SHAREABLE) ||
  1239. (!strcmp(buf, "exclusive") && mode == RDT_MODE_EXCLUSIVE) ||
  1240. (!strcmp(buf, "pseudo-locksetup") &&
  1241. mode == RDT_MODE_PSEUDO_LOCKSETUP) ||
  1242. (!strcmp(buf, "pseudo-locked") && mode == RDT_MODE_PSEUDO_LOCKED))
  1243. goto out;
  1244. if (mode == RDT_MODE_PSEUDO_LOCKED) {
  1245. rdt_last_cmd_puts("Cannot change pseudo-locked group\n");
  1246. ret = -EINVAL;
  1247. goto out;
  1248. }
  1249. if (!strcmp(buf, "shareable")) {
  1250. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1251. ret = rdtgroup_locksetup_exit(rdtgrp);
  1252. if (ret)
  1253. goto out;
  1254. }
  1255. rdtgrp->mode = RDT_MODE_SHAREABLE;
  1256. } else if (!strcmp(buf, "exclusive")) {
  1257. if (!rdtgroup_mode_test_exclusive(rdtgrp)) {
  1258. ret = -EINVAL;
  1259. goto out;
  1260. }
  1261. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1262. ret = rdtgroup_locksetup_exit(rdtgrp);
  1263. if (ret)
  1264. goto out;
  1265. }
  1266. rdtgrp->mode = RDT_MODE_EXCLUSIVE;
  1267. } else if (IS_ENABLED(CONFIG_RESCTRL_FS_PSEUDO_LOCK) &&
  1268. !strcmp(buf, "pseudo-locksetup")) {
  1269. ret = rdtgroup_locksetup_enter(rdtgrp);
  1270. if (ret)
  1271. goto out;
  1272. rdtgrp->mode = RDT_MODE_PSEUDO_LOCKSETUP;
  1273. } else {
  1274. rdt_last_cmd_puts("Unknown or unsupported mode\n");
  1275. ret = -EINVAL;
  1276. }
  1277. out:
  1278. rdtgroup_kn_unlock(of->kn);
  1279. return ret ?: nbytes;
  1280. }
  1281. /**
  1282. * rdtgroup_cbm_to_size - Translate CBM to size in bytes
  1283. * @r: RDT resource to which @d belongs.
  1284. * @d: RDT domain instance.
  1285. * @cbm: bitmask for which the size should be computed.
  1286. *
  1287. * The bitmask provided associated with the RDT domain instance @d will be
  1288. * translated into how many bytes it represents. The size in bytes is
  1289. * computed by first dividing the total cache size by the CBM length to
  1290. * determine how many bytes each bit in the bitmask represents. The result
  1291. * is multiplied with the number of bits set in the bitmask.
  1292. *
  1293. * @cbm is unsigned long, even if only 32 bits are used to make the
  1294. * bitmap functions work correctly.
  1295. */
  1296. unsigned int rdtgroup_cbm_to_size(struct rdt_resource *r,
  1297. struct rdt_ctrl_domain *d, unsigned long cbm)
  1298. {
  1299. unsigned int size = 0;
  1300. struct cacheinfo *ci;
  1301. int num_b;
  1302. if (WARN_ON_ONCE(r->ctrl_scope != RESCTRL_L2_CACHE && r->ctrl_scope != RESCTRL_L3_CACHE))
  1303. return size;
  1304. num_b = bitmap_weight(&cbm, r->cache.cbm_len);
  1305. ci = get_cpu_cacheinfo_level(cpumask_any(&d->hdr.cpu_mask), r->ctrl_scope);
  1306. if (ci)
  1307. size = ci->size / r->cache.cbm_len * num_b;
  1308. return size;
  1309. }
  1310. bool is_mba_sc(struct rdt_resource *r)
  1311. {
  1312. if (!r)
  1313. r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
  1314. /*
  1315. * The software controller support is only applicable to MBA resource.
  1316. * Make sure to check for resource type.
  1317. */
  1318. if (r->rid != RDT_RESOURCE_MBA)
  1319. return false;
  1320. return r->membw.mba_sc;
  1321. }
  1322. /*
  1323. * rdtgroup_size_show - Display size in bytes of allocated regions
  1324. *
  1325. * The "size" file mirrors the layout of the "schemata" file, printing the
  1326. * size in bytes of each region instead of the capacity bitmask.
  1327. */
  1328. static int rdtgroup_size_show(struct kernfs_open_file *of,
  1329. struct seq_file *s, void *v)
  1330. {
  1331. struct resctrl_schema *schema;
  1332. enum resctrl_conf_type type;
  1333. struct rdt_ctrl_domain *d;
  1334. struct rdtgroup *rdtgrp;
  1335. struct rdt_resource *r;
  1336. unsigned int size;
  1337. int ret = 0;
  1338. u32 closid;
  1339. bool sep;
  1340. u32 ctrl;
  1341. rdtgrp = rdtgroup_kn_lock_live(of->kn);
  1342. if (!rdtgrp) {
  1343. rdtgroup_kn_unlock(of->kn);
  1344. return -ENOENT;
  1345. }
  1346. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  1347. if (!rdtgrp->plr->d) {
  1348. rdt_last_cmd_clear();
  1349. rdt_last_cmd_puts("Cache domain offline\n");
  1350. ret = -ENODEV;
  1351. } else {
  1352. seq_printf(s, "%*s:", max_name_width,
  1353. rdtgrp->plr->s->name);
  1354. size = rdtgroup_cbm_to_size(rdtgrp->plr->s->res,
  1355. rdtgrp->plr->d,
  1356. rdtgrp->plr->cbm);
  1357. seq_printf(s, "%d=%u\n", rdtgrp->plr->d->hdr.id, size);
  1358. }
  1359. goto out;
  1360. }
  1361. closid = rdtgrp->closid;
  1362. list_for_each_entry(schema, &resctrl_schema_all, list) {
  1363. r = schema->res;
  1364. type = schema->conf_type;
  1365. sep = false;
  1366. seq_printf(s, "%*s:", max_name_width, schema->name);
  1367. list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
  1368. if (sep)
  1369. seq_putc(s, ';');
  1370. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP) {
  1371. size = 0;
  1372. } else {
  1373. if (is_mba_sc(r))
  1374. ctrl = d->mbps_val[closid];
  1375. else
  1376. ctrl = resctrl_arch_get_config(r, d,
  1377. closid,
  1378. type);
  1379. if (r->rid == RDT_RESOURCE_MBA ||
  1380. r->rid == RDT_RESOURCE_SMBA)
  1381. size = ctrl;
  1382. else
  1383. size = rdtgroup_cbm_to_size(r, d, ctrl);
  1384. }
  1385. seq_printf(s, "%d=%u", d->hdr.id, size);
  1386. sep = true;
  1387. }
  1388. seq_putc(s, '\n');
  1389. }
  1390. out:
  1391. rdtgroup_kn_unlock(of->kn);
  1392. return ret;
  1393. }
  1394. static void mondata_config_read(struct resctrl_mon_config_info *mon_info)
  1395. {
  1396. smp_call_function_any(&mon_info->d->hdr.cpu_mask,
  1397. resctrl_arch_mon_event_config_read, mon_info, 1);
  1398. }
  1399. static int mbm_config_show(struct seq_file *s, struct rdt_resource *r, u32 evtid)
  1400. {
  1401. struct resctrl_mon_config_info mon_info;
  1402. struct rdt_l3_mon_domain *dom;
  1403. bool sep = false;
  1404. cpus_read_lock();
  1405. mutex_lock(&rdtgroup_mutex);
  1406. list_for_each_entry(dom, &r->mon_domains, hdr.list) {
  1407. if (sep)
  1408. seq_puts(s, ";");
  1409. memset(&mon_info, 0, sizeof(struct resctrl_mon_config_info));
  1410. mon_info.r = r;
  1411. mon_info.d = dom;
  1412. mon_info.evtid = evtid;
  1413. mondata_config_read(&mon_info);
  1414. seq_printf(s, "%d=0x%02x", dom->hdr.id, mon_info.mon_config);
  1415. sep = true;
  1416. }
  1417. seq_puts(s, "\n");
  1418. mutex_unlock(&rdtgroup_mutex);
  1419. cpus_read_unlock();
  1420. return 0;
  1421. }
  1422. static int mbm_total_bytes_config_show(struct kernfs_open_file *of,
  1423. struct seq_file *seq, void *v)
  1424. {
  1425. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1426. mbm_config_show(seq, r, QOS_L3_MBM_TOTAL_EVENT_ID);
  1427. return 0;
  1428. }
  1429. static int mbm_local_bytes_config_show(struct kernfs_open_file *of,
  1430. struct seq_file *seq, void *v)
  1431. {
  1432. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1433. mbm_config_show(seq, r, QOS_L3_MBM_LOCAL_EVENT_ID);
  1434. return 0;
  1435. }
  1436. static void mbm_config_write_domain(struct rdt_resource *r,
  1437. struct rdt_l3_mon_domain *d, u32 evtid, u32 val)
  1438. {
  1439. struct resctrl_mon_config_info mon_info = {0};
  1440. /*
  1441. * Read the current config value first. If both are the same then
  1442. * no need to write it again.
  1443. */
  1444. mon_info.r = r;
  1445. mon_info.d = d;
  1446. mon_info.evtid = evtid;
  1447. mondata_config_read(&mon_info);
  1448. if (mon_info.mon_config == val)
  1449. return;
  1450. mon_info.mon_config = val;
  1451. /*
  1452. * Update MSR_IA32_EVT_CFG_BASE MSR on one of the CPUs in the
  1453. * domain. The MSRs offset from MSR MSR_IA32_EVT_CFG_BASE
  1454. * are scoped at the domain level. Writing any of these MSRs
  1455. * on one CPU is observed by all the CPUs in the domain.
  1456. */
  1457. smp_call_function_any(&d->hdr.cpu_mask, resctrl_arch_mon_event_config_write,
  1458. &mon_info, 1);
  1459. /*
  1460. * When an Event Configuration is changed, the bandwidth counters
  1461. * for all RMIDs and Events will be cleared by the hardware. The
  1462. * hardware also sets MSR_IA32_QM_CTR.Unavailable (bit 62) for
  1463. * every RMID on the next read to any event for every RMID.
  1464. * Subsequent reads will have MSR_IA32_QM_CTR.Unavailable (bit 62)
  1465. * cleared while it is tracked by the hardware. Clear the
  1466. * mbm_local and mbm_total counts for all the RMIDs.
  1467. */
  1468. resctrl_arch_reset_rmid_all(r, d);
  1469. }
  1470. static int mon_config_write(struct rdt_resource *r, char *tok, u32 evtid)
  1471. {
  1472. char *dom_str = NULL, *id_str;
  1473. struct rdt_l3_mon_domain *d;
  1474. unsigned long dom_id, val;
  1475. /* Walking r->domains, ensure it can't race with cpuhp */
  1476. lockdep_assert_cpus_held();
  1477. next:
  1478. if (!tok || tok[0] == '\0')
  1479. return 0;
  1480. /* Start processing the strings for each domain */
  1481. dom_str = strim(strsep(&tok, ";"));
  1482. id_str = strsep(&dom_str, "=");
  1483. if (!id_str || kstrtoul(id_str, 10, &dom_id)) {
  1484. rdt_last_cmd_puts("Missing '=' or non-numeric domain id\n");
  1485. return -EINVAL;
  1486. }
  1487. if (!dom_str || kstrtoul(dom_str, 16, &val)) {
  1488. rdt_last_cmd_puts("Non-numeric event configuration value\n");
  1489. return -EINVAL;
  1490. }
  1491. /* Value from user cannot be more than the supported set of events */
  1492. if ((val & r->mon.mbm_cfg_mask) != val) {
  1493. rdt_last_cmd_printf("Invalid event configuration: max valid mask is 0x%02x\n",
  1494. r->mon.mbm_cfg_mask);
  1495. return -EINVAL;
  1496. }
  1497. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  1498. if (d->hdr.id == dom_id) {
  1499. mbm_config_write_domain(r, d, evtid, val);
  1500. goto next;
  1501. }
  1502. }
  1503. return -EINVAL;
  1504. }
  1505. static ssize_t mbm_total_bytes_config_write(struct kernfs_open_file *of,
  1506. char *buf, size_t nbytes,
  1507. loff_t off)
  1508. {
  1509. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1510. int ret;
  1511. /* Valid input requires a trailing newline */
  1512. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1513. return -EINVAL;
  1514. cpus_read_lock();
  1515. mutex_lock(&rdtgroup_mutex);
  1516. rdt_last_cmd_clear();
  1517. buf[nbytes - 1] = '\0';
  1518. ret = mon_config_write(r, buf, QOS_L3_MBM_TOTAL_EVENT_ID);
  1519. mutex_unlock(&rdtgroup_mutex);
  1520. cpus_read_unlock();
  1521. return ret ?: nbytes;
  1522. }
  1523. static ssize_t mbm_local_bytes_config_write(struct kernfs_open_file *of,
  1524. char *buf, size_t nbytes,
  1525. loff_t off)
  1526. {
  1527. struct rdt_resource *r = rdt_kn_parent_priv(of->kn);
  1528. int ret;
  1529. /* Valid input requires a trailing newline */
  1530. if (nbytes == 0 || buf[nbytes - 1] != '\n')
  1531. return -EINVAL;
  1532. cpus_read_lock();
  1533. mutex_lock(&rdtgroup_mutex);
  1534. rdt_last_cmd_clear();
  1535. buf[nbytes - 1] = '\0';
  1536. ret = mon_config_write(r, buf, QOS_L3_MBM_LOCAL_EVENT_ID);
  1537. mutex_unlock(&rdtgroup_mutex);
  1538. cpus_read_unlock();
  1539. return ret ?: nbytes;
  1540. }
  1541. /*
  1542. * resctrl_bmec_files_show() — Controls the visibility of BMEC-related resctrl
  1543. * files. When @show is true, the files are displayed; when false, the files
  1544. * are hidden.
  1545. * Don't treat kernfs_find_and_get failure as an error, since this function may
  1546. * be called regardless of whether BMEC is supported or the event is enabled.
  1547. */
  1548. void resctrl_bmec_files_show(struct rdt_resource *r, struct kernfs_node *l3_mon_kn,
  1549. bool show)
  1550. {
  1551. struct kernfs_node *kn_config, *mon_kn = NULL;
  1552. char name[32];
  1553. if (!l3_mon_kn) {
  1554. sprintf(name, "%s_MON", r->name);
  1555. mon_kn = kernfs_find_and_get(kn_info, name);
  1556. if (!mon_kn)
  1557. return;
  1558. l3_mon_kn = mon_kn;
  1559. }
  1560. kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_total_bytes_config");
  1561. if (kn_config) {
  1562. kernfs_show(kn_config, show);
  1563. kernfs_put(kn_config);
  1564. }
  1565. kn_config = kernfs_find_and_get(l3_mon_kn, "mbm_local_bytes_config");
  1566. if (kn_config) {
  1567. kernfs_show(kn_config, show);
  1568. kernfs_put(kn_config);
  1569. }
  1570. /* Release the reference only if it was acquired */
  1571. if (mon_kn)
  1572. kernfs_put(mon_kn);
  1573. }
  1574. const char *rdtgroup_name_by_closid(u32 closid)
  1575. {
  1576. struct rdtgroup *rdtgrp;
  1577. list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
  1578. if (rdtgrp->closid == closid)
  1579. return rdt_kn_name(rdtgrp->kn);
  1580. }
  1581. return NULL;
  1582. }
  1583. /* rdtgroup information files for one cache resource. */
  1584. static struct rftype res_common_files[] = {
  1585. {
  1586. .name = "last_cmd_status",
  1587. .mode = 0444,
  1588. .kf_ops = &rdtgroup_kf_single_ops,
  1589. .seq_show = rdt_last_cmd_status_show,
  1590. .fflags = RFTYPE_TOP_INFO,
  1591. },
  1592. {
  1593. .name = "mbm_assign_on_mkdir",
  1594. .mode = 0644,
  1595. .kf_ops = &rdtgroup_kf_single_ops,
  1596. .seq_show = resctrl_mbm_assign_on_mkdir_show,
  1597. .write = resctrl_mbm_assign_on_mkdir_write,
  1598. },
  1599. {
  1600. .name = "num_closids",
  1601. .mode = 0444,
  1602. .kf_ops = &rdtgroup_kf_single_ops,
  1603. .seq_show = rdt_num_closids_show,
  1604. .fflags = RFTYPE_CTRL_INFO,
  1605. },
  1606. {
  1607. .name = "mon_features",
  1608. .mode = 0444,
  1609. .kf_ops = &rdtgroup_kf_single_ops,
  1610. .seq_show = rdt_mon_features_show,
  1611. .fflags = RFTYPE_MON_INFO,
  1612. },
  1613. {
  1614. .name = "available_mbm_cntrs",
  1615. .mode = 0444,
  1616. .kf_ops = &rdtgroup_kf_single_ops,
  1617. .seq_show = resctrl_available_mbm_cntrs_show,
  1618. },
  1619. {
  1620. .name = "num_rmids",
  1621. .mode = 0444,
  1622. .kf_ops = &rdtgroup_kf_single_ops,
  1623. .seq_show = rdt_num_rmids_show,
  1624. .fflags = RFTYPE_MON_INFO,
  1625. },
  1626. {
  1627. .name = "cbm_mask",
  1628. .mode = 0444,
  1629. .kf_ops = &rdtgroup_kf_single_ops,
  1630. .seq_show = rdt_default_ctrl_show,
  1631. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
  1632. },
  1633. {
  1634. .name = "num_mbm_cntrs",
  1635. .mode = 0444,
  1636. .kf_ops = &rdtgroup_kf_single_ops,
  1637. .seq_show = resctrl_num_mbm_cntrs_show,
  1638. },
  1639. {
  1640. .name = "min_cbm_bits",
  1641. .mode = 0444,
  1642. .kf_ops = &rdtgroup_kf_single_ops,
  1643. .seq_show = rdt_min_cbm_bits_show,
  1644. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
  1645. },
  1646. {
  1647. .name = "shareable_bits",
  1648. .mode = 0444,
  1649. .kf_ops = &rdtgroup_kf_single_ops,
  1650. .seq_show = rdt_shareable_bits_show,
  1651. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
  1652. },
  1653. {
  1654. .name = "bit_usage",
  1655. .mode = 0444,
  1656. .kf_ops = &rdtgroup_kf_single_ops,
  1657. .seq_show = rdt_bit_usage_show,
  1658. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
  1659. },
  1660. {
  1661. .name = "min_bandwidth",
  1662. .mode = 0444,
  1663. .kf_ops = &rdtgroup_kf_single_ops,
  1664. .seq_show = rdt_min_bw_show,
  1665. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
  1666. },
  1667. {
  1668. .name = "bandwidth_gran",
  1669. .mode = 0444,
  1670. .kf_ops = &rdtgroup_kf_single_ops,
  1671. .seq_show = rdt_bw_gran_show,
  1672. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
  1673. },
  1674. {
  1675. .name = "delay_linear",
  1676. .mode = 0444,
  1677. .kf_ops = &rdtgroup_kf_single_ops,
  1678. .seq_show = rdt_delay_linear_show,
  1679. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_MB,
  1680. },
  1681. /*
  1682. * Platform specific which (if any) capabilities are provided by
  1683. * thread_throttle_mode. Defer "fflags" initialization to platform
  1684. * discovery.
  1685. */
  1686. {
  1687. .name = "thread_throttle_mode",
  1688. .mode = 0444,
  1689. .kf_ops = &rdtgroup_kf_single_ops,
  1690. .seq_show = rdt_thread_throttle_mode_show,
  1691. },
  1692. {
  1693. .name = "io_alloc",
  1694. .mode = 0644,
  1695. .kf_ops = &rdtgroup_kf_single_ops,
  1696. .seq_show = resctrl_io_alloc_show,
  1697. .write = resctrl_io_alloc_write,
  1698. },
  1699. {
  1700. .name = "io_alloc_cbm",
  1701. .mode = 0644,
  1702. .kf_ops = &rdtgroup_kf_single_ops,
  1703. .seq_show = resctrl_io_alloc_cbm_show,
  1704. .write = resctrl_io_alloc_cbm_write,
  1705. },
  1706. {
  1707. .name = "max_threshold_occupancy",
  1708. .mode = 0644,
  1709. .kf_ops = &rdtgroup_kf_single_ops,
  1710. .write = max_threshold_occ_write,
  1711. .seq_show = max_threshold_occ_show,
  1712. .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
  1713. },
  1714. {
  1715. .name = "mbm_total_bytes_config",
  1716. .mode = 0644,
  1717. .kf_ops = &rdtgroup_kf_single_ops,
  1718. .seq_show = mbm_total_bytes_config_show,
  1719. .write = mbm_total_bytes_config_write,
  1720. },
  1721. {
  1722. .name = "mbm_local_bytes_config",
  1723. .mode = 0644,
  1724. .kf_ops = &rdtgroup_kf_single_ops,
  1725. .seq_show = mbm_local_bytes_config_show,
  1726. .write = mbm_local_bytes_config_write,
  1727. },
  1728. {
  1729. .name = "event_filter",
  1730. .mode = 0644,
  1731. .kf_ops = &rdtgroup_kf_single_ops,
  1732. .seq_show = event_filter_show,
  1733. .write = event_filter_write,
  1734. },
  1735. {
  1736. .name = "mbm_L3_assignments",
  1737. .mode = 0644,
  1738. .kf_ops = &rdtgroup_kf_single_ops,
  1739. .seq_show = mbm_L3_assignments_show,
  1740. .write = mbm_L3_assignments_write,
  1741. },
  1742. {
  1743. .name = "mbm_assign_mode",
  1744. .mode = 0644,
  1745. .kf_ops = &rdtgroup_kf_single_ops,
  1746. .seq_show = resctrl_mbm_assign_mode_show,
  1747. .write = resctrl_mbm_assign_mode_write,
  1748. .fflags = RFTYPE_MON_INFO | RFTYPE_RES_CACHE,
  1749. },
  1750. {
  1751. .name = "cpus",
  1752. .mode = 0644,
  1753. .kf_ops = &rdtgroup_kf_single_ops,
  1754. .write = rdtgroup_cpus_write,
  1755. .seq_show = rdtgroup_cpus_show,
  1756. .fflags = RFTYPE_BASE,
  1757. },
  1758. {
  1759. .name = "cpus_list",
  1760. .mode = 0644,
  1761. .kf_ops = &rdtgroup_kf_single_ops,
  1762. .write = rdtgroup_cpus_write,
  1763. .seq_show = rdtgroup_cpus_show,
  1764. .flags = RFTYPE_FLAGS_CPUS_LIST,
  1765. .fflags = RFTYPE_BASE,
  1766. },
  1767. {
  1768. .name = "tasks",
  1769. .mode = 0644,
  1770. .kf_ops = &rdtgroup_kf_single_ops,
  1771. .write = rdtgroup_tasks_write,
  1772. .seq_show = rdtgroup_tasks_show,
  1773. .fflags = RFTYPE_BASE,
  1774. },
  1775. {
  1776. .name = "mon_hw_id",
  1777. .mode = 0444,
  1778. .kf_ops = &rdtgroup_kf_single_ops,
  1779. .seq_show = rdtgroup_rmid_show,
  1780. .fflags = RFTYPE_MON_BASE | RFTYPE_DEBUG,
  1781. },
  1782. {
  1783. .name = "schemata",
  1784. .mode = 0644,
  1785. .kf_ops = &rdtgroup_kf_single_ops,
  1786. .write = rdtgroup_schemata_write,
  1787. .seq_show = rdtgroup_schemata_show,
  1788. .fflags = RFTYPE_CTRL_BASE,
  1789. },
  1790. {
  1791. .name = "mba_MBps_event",
  1792. .mode = 0644,
  1793. .kf_ops = &rdtgroup_kf_single_ops,
  1794. .write = rdtgroup_mba_mbps_event_write,
  1795. .seq_show = rdtgroup_mba_mbps_event_show,
  1796. },
  1797. {
  1798. .name = "mode",
  1799. .mode = 0644,
  1800. .kf_ops = &rdtgroup_kf_single_ops,
  1801. .write = rdtgroup_mode_write,
  1802. .seq_show = rdtgroup_mode_show,
  1803. .fflags = RFTYPE_CTRL_BASE,
  1804. },
  1805. {
  1806. .name = "size",
  1807. .mode = 0444,
  1808. .kf_ops = &rdtgroup_kf_single_ops,
  1809. .seq_show = rdtgroup_size_show,
  1810. .fflags = RFTYPE_CTRL_BASE,
  1811. },
  1812. {
  1813. .name = "sparse_masks",
  1814. .mode = 0444,
  1815. .kf_ops = &rdtgroup_kf_single_ops,
  1816. .seq_show = rdt_has_sparse_bitmasks_show,
  1817. .fflags = RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE,
  1818. },
  1819. {
  1820. .name = "ctrl_hw_id",
  1821. .mode = 0444,
  1822. .kf_ops = &rdtgroup_kf_single_ops,
  1823. .seq_show = rdtgroup_closid_show,
  1824. .fflags = RFTYPE_CTRL_BASE | RFTYPE_DEBUG,
  1825. },
  1826. };
  1827. static int rdtgroup_add_files(struct kernfs_node *kn, unsigned long fflags)
  1828. {
  1829. struct rftype *rfts, *rft;
  1830. int ret, len;
  1831. rfts = res_common_files;
  1832. len = ARRAY_SIZE(res_common_files);
  1833. lockdep_assert_held(&rdtgroup_mutex);
  1834. if (resctrl_debug)
  1835. fflags |= RFTYPE_DEBUG;
  1836. for (rft = rfts; rft < rfts + len; rft++) {
  1837. if (rft->fflags && ((fflags & rft->fflags) == rft->fflags)) {
  1838. ret = rdtgroup_add_file(kn, rft);
  1839. if (ret)
  1840. goto error;
  1841. }
  1842. }
  1843. return 0;
  1844. error:
  1845. pr_warn("Failed to add %s, err=%d\n", rft->name, ret);
  1846. while (--rft >= rfts) {
  1847. if ((fflags & rft->fflags) == rft->fflags)
  1848. kernfs_remove_by_name(kn, rft->name);
  1849. }
  1850. return ret;
  1851. }
  1852. static struct rftype *rdtgroup_get_rftype_by_name(const char *name)
  1853. {
  1854. struct rftype *rfts, *rft;
  1855. int len;
  1856. rfts = res_common_files;
  1857. len = ARRAY_SIZE(res_common_files);
  1858. for (rft = rfts; rft < rfts + len; rft++) {
  1859. if (!strcmp(rft->name, name))
  1860. return rft;
  1861. }
  1862. return NULL;
  1863. }
  1864. static void thread_throttle_mode_init(void)
  1865. {
  1866. enum membw_throttle_mode throttle_mode = THREAD_THROTTLE_UNDEFINED;
  1867. struct rdt_resource *r_mba, *r_smba;
  1868. r_mba = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
  1869. if (r_mba->alloc_capable &&
  1870. r_mba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
  1871. throttle_mode = r_mba->membw.throttle_mode;
  1872. r_smba = resctrl_arch_get_resource(RDT_RESOURCE_SMBA);
  1873. if (r_smba->alloc_capable &&
  1874. r_smba->membw.throttle_mode != THREAD_THROTTLE_UNDEFINED)
  1875. throttle_mode = r_smba->membw.throttle_mode;
  1876. if (throttle_mode == THREAD_THROTTLE_UNDEFINED)
  1877. return;
  1878. resctrl_file_fflags_init("thread_throttle_mode",
  1879. RFTYPE_CTRL_INFO | RFTYPE_RES_MB);
  1880. }
  1881. /*
  1882. * The resctrl file "io_alloc" is added using L3 resource. However, it results
  1883. * in this file being visible for *all* cache resources (eg. L2 cache),
  1884. * whether it supports "io_alloc" or not.
  1885. */
  1886. static void io_alloc_init(void)
  1887. {
  1888. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  1889. if (r->cache.io_alloc_capable) {
  1890. resctrl_file_fflags_init("io_alloc", RFTYPE_CTRL_INFO |
  1891. RFTYPE_RES_CACHE);
  1892. resctrl_file_fflags_init("io_alloc_cbm",
  1893. RFTYPE_CTRL_INFO | RFTYPE_RES_CACHE);
  1894. }
  1895. }
  1896. void resctrl_file_fflags_init(const char *config, unsigned long fflags)
  1897. {
  1898. struct rftype *rft;
  1899. rft = rdtgroup_get_rftype_by_name(config);
  1900. if (rft)
  1901. rft->fflags = fflags;
  1902. }
  1903. /**
  1904. * rdtgroup_kn_mode_restrict - Restrict user access to named resctrl file
  1905. * @r: The resource group with which the file is associated.
  1906. * @name: Name of the file
  1907. *
  1908. * The permissions of named resctrl file, directory, or link are modified
  1909. * to not allow read, write, or execute by any user.
  1910. *
  1911. * WARNING: This function is intended to communicate to the user that the
  1912. * resctrl file has been locked down - that it is not relevant to the
  1913. * particular state the system finds itself in. It should not be relied
  1914. * on to protect from user access because after the file's permissions
  1915. * are restricted the user can still change the permissions using chmod
  1916. * from the command line.
  1917. *
  1918. * Return: 0 on success, <0 on failure.
  1919. */
  1920. int rdtgroup_kn_mode_restrict(struct rdtgroup *r, const char *name)
  1921. {
  1922. struct iattr iattr = {.ia_valid = ATTR_MODE,};
  1923. struct kernfs_node *kn;
  1924. int ret = 0;
  1925. kn = kernfs_find_and_get_ns(r->kn, name, NULL);
  1926. if (!kn)
  1927. return -ENOENT;
  1928. switch (kernfs_type(kn)) {
  1929. case KERNFS_DIR:
  1930. iattr.ia_mode = S_IFDIR;
  1931. break;
  1932. case KERNFS_FILE:
  1933. iattr.ia_mode = S_IFREG;
  1934. break;
  1935. case KERNFS_LINK:
  1936. iattr.ia_mode = S_IFLNK;
  1937. break;
  1938. }
  1939. ret = kernfs_setattr(kn, &iattr);
  1940. kernfs_put(kn);
  1941. return ret;
  1942. }
  1943. /**
  1944. * rdtgroup_kn_mode_restore - Restore user access to named resctrl file
  1945. * @r: The resource group with which the file is associated.
  1946. * @name: Name of the file
  1947. * @mask: Mask of permissions that should be restored
  1948. *
  1949. * Restore the permissions of the named file. If @name is a directory the
  1950. * permissions of its parent will be used.
  1951. *
  1952. * Return: 0 on success, <0 on failure.
  1953. */
  1954. int rdtgroup_kn_mode_restore(struct rdtgroup *r, const char *name,
  1955. umode_t mask)
  1956. {
  1957. struct iattr iattr = {.ia_valid = ATTR_MODE,};
  1958. struct kernfs_node *kn, *parent;
  1959. struct rftype *rfts, *rft;
  1960. int ret, len;
  1961. rfts = res_common_files;
  1962. len = ARRAY_SIZE(res_common_files);
  1963. for (rft = rfts; rft < rfts + len; rft++) {
  1964. if (!strcmp(rft->name, name))
  1965. iattr.ia_mode = rft->mode & mask;
  1966. }
  1967. kn = kernfs_find_and_get_ns(r->kn, name, NULL);
  1968. if (!kn)
  1969. return -ENOENT;
  1970. switch (kernfs_type(kn)) {
  1971. case KERNFS_DIR:
  1972. parent = kernfs_get_parent(kn);
  1973. if (parent) {
  1974. iattr.ia_mode |= parent->mode;
  1975. kernfs_put(parent);
  1976. }
  1977. iattr.ia_mode |= S_IFDIR;
  1978. break;
  1979. case KERNFS_FILE:
  1980. iattr.ia_mode |= S_IFREG;
  1981. break;
  1982. case KERNFS_LINK:
  1983. iattr.ia_mode |= S_IFLNK;
  1984. break;
  1985. }
  1986. ret = kernfs_setattr(kn, &iattr);
  1987. kernfs_put(kn);
  1988. return ret;
  1989. }
  1990. static int resctrl_mkdir_event_configs(struct rdt_resource *r, struct kernfs_node *l3_mon_kn)
  1991. {
  1992. struct kernfs_node *kn_subdir, *kn_subdir2;
  1993. struct mon_evt *mevt;
  1994. int ret;
  1995. kn_subdir = kernfs_create_dir(l3_mon_kn, "event_configs", l3_mon_kn->mode, NULL);
  1996. if (IS_ERR(kn_subdir))
  1997. return PTR_ERR(kn_subdir);
  1998. ret = rdtgroup_kn_set_ugid(kn_subdir);
  1999. if (ret)
  2000. return ret;
  2001. for_each_mon_event(mevt) {
  2002. if (mevt->rid != r->rid || !mevt->enabled || !resctrl_is_mbm_event(mevt->evtid))
  2003. continue;
  2004. kn_subdir2 = kernfs_create_dir(kn_subdir, mevt->name, kn_subdir->mode, mevt);
  2005. if (IS_ERR(kn_subdir2)) {
  2006. ret = PTR_ERR(kn_subdir2);
  2007. goto out;
  2008. }
  2009. ret = rdtgroup_kn_set_ugid(kn_subdir2);
  2010. if (ret)
  2011. goto out;
  2012. ret = rdtgroup_add_files(kn_subdir2, RFTYPE_ASSIGN_CONFIG);
  2013. if (ret)
  2014. break;
  2015. }
  2016. out:
  2017. return ret;
  2018. }
  2019. static int rdtgroup_mkdir_info_resdir(void *priv, char *name,
  2020. unsigned long fflags)
  2021. {
  2022. struct kernfs_node *kn_subdir;
  2023. struct rdt_resource *r;
  2024. int ret;
  2025. kn_subdir = kernfs_create_dir(kn_info, name,
  2026. kn_info->mode, priv);
  2027. if (IS_ERR(kn_subdir))
  2028. return PTR_ERR(kn_subdir);
  2029. ret = rdtgroup_kn_set_ugid(kn_subdir);
  2030. if (ret)
  2031. return ret;
  2032. ret = rdtgroup_add_files(kn_subdir, fflags);
  2033. if (ret)
  2034. return ret;
  2035. if ((fflags & RFTYPE_MON_INFO) == RFTYPE_MON_INFO) {
  2036. r = priv;
  2037. if (r->mon.mbm_cntr_assignable) {
  2038. ret = resctrl_mkdir_event_configs(r, kn_subdir);
  2039. if (ret)
  2040. return ret;
  2041. /*
  2042. * Hide BMEC related files if mbm_event mode
  2043. * is enabled.
  2044. */
  2045. if (resctrl_arch_mbm_cntr_assign_enabled(r))
  2046. resctrl_bmec_files_show(r, kn_subdir, false);
  2047. }
  2048. }
  2049. kernfs_activate(kn_subdir);
  2050. return ret;
  2051. }
  2052. static unsigned long fflags_from_resource(struct rdt_resource *r)
  2053. {
  2054. switch (r->rid) {
  2055. case RDT_RESOURCE_L3:
  2056. case RDT_RESOURCE_L2:
  2057. return RFTYPE_RES_CACHE;
  2058. case RDT_RESOURCE_MBA:
  2059. case RDT_RESOURCE_SMBA:
  2060. return RFTYPE_RES_MB;
  2061. case RDT_RESOURCE_PERF_PKG:
  2062. return RFTYPE_RES_PERF_PKG;
  2063. }
  2064. return WARN_ON_ONCE(1);
  2065. }
  2066. static int rdtgroup_create_info_dir(struct kernfs_node *parent_kn)
  2067. {
  2068. struct resctrl_schema *s;
  2069. struct rdt_resource *r;
  2070. unsigned long fflags;
  2071. char name[32];
  2072. int ret;
  2073. /* create the directory */
  2074. kn_info = kernfs_create_dir(parent_kn, "info", parent_kn->mode, NULL);
  2075. if (IS_ERR(kn_info))
  2076. return PTR_ERR(kn_info);
  2077. ret = rdtgroup_add_files(kn_info, RFTYPE_TOP_INFO);
  2078. if (ret)
  2079. goto out_destroy;
  2080. /* loop over enabled controls, these are all alloc_capable */
  2081. list_for_each_entry(s, &resctrl_schema_all, list) {
  2082. r = s->res;
  2083. fflags = fflags_from_resource(r) | RFTYPE_CTRL_INFO;
  2084. ret = rdtgroup_mkdir_info_resdir(s, s->name, fflags);
  2085. if (ret)
  2086. goto out_destroy;
  2087. }
  2088. for_each_mon_capable_rdt_resource(r) {
  2089. fflags = fflags_from_resource(r) | RFTYPE_MON_INFO;
  2090. sprintf(name, "%s_MON", r->name);
  2091. ret = rdtgroup_mkdir_info_resdir(r, name, fflags);
  2092. if (ret)
  2093. goto out_destroy;
  2094. }
  2095. ret = rdtgroup_kn_set_ugid(kn_info);
  2096. if (ret)
  2097. goto out_destroy;
  2098. kernfs_activate(kn_info);
  2099. return 0;
  2100. out_destroy:
  2101. kernfs_remove(kn_info);
  2102. return ret;
  2103. }
  2104. static int
  2105. mongroup_create_dir(struct kernfs_node *parent_kn, struct rdtgroup *prgrp,
  2106. char *name, struct kernfs_node **dest_kn)
  2107. {
  2108. struct kernfs_node *kn;
  2109. int ret;
  2110. /* create the directory */
  2111. kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
  2112. if (IS_ERR(kn))
  2113. return PTR_ERR(kn);
  2114. if (dest_kn)
  2115. *dest_kn = kn;
  2116. ret = rdtgroup_kn_set_ugid(kn);
  2117. if (ret)
  2118. goto out_destroy;
  2119. kernfs_activate(kn);
  2120. return 0;
  2121. out_destroy:
  2122. kernfs_remove(kn);
  2123. return ret;
  2124. }
  2125. static inline bool is_mba_linear(void)
  2126. {
  2127. return resctrl_arch_get_resource(RDT_RESOURCE_MBA)->membw.delay_linear;
  2128. }
  2129. static int mba_sc_domain_allocate(struct rdt_resource *r, struct rdt_ctrl_domain *d)
  2130. {
  2131. u32 num_closid = resctrl_arch_get_num_closid(r);
  2132. int cpu = cpumask_any(&d->hdr.cpu_mask);
  2133. int i;
  2134. d->mbps_val = kcalloc_node(num_closid, sizeof(*d->mbps_val),
  2135. GFP_KERNEL, cpu_to_node(cpu));
  2136. if (!d->mbps_val)
  2137. return -ENOMEM;
  2138. for (i = 0; i < num_closid; i++)
  2139. d->mbps_val[i] = MBA_MAX_MBPS;
  2140. return 0;
  2141. }
  2142. static void mba_sc_domain_destroy(struct rdt_resource *r,
  2143. struct rdt_ctrl_domain *d)
  2144. {
  2145. kfree(d->mbps_val);
  2146. d->mbps_val = NULL;
  2147. }
  2148. /*
  2149. * MBA software controller is supported only if
  2150. * MBM is supported and MBA is in linear scale,
  2151. * and the MBM monitor scope is the same as MBA
  2152. * control scope.
  2153. */
  2154. static bool supports_mba_mbps(void)
  2155. {
  2156. struct rdt_resource *rmbm = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  2157. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
  2158. return (resctrl_is_mbm_enabled() &&
  2159. r->alloc_capable && is_mba_linear() &&
  2160. r->ctrl_scope == rmbm->mon_scope);
  2161. }
  2162. /*
  2163. * Enable or disable the MBA software controller
  2164. * which helps user specify bandwidth in MBps.
  2165. */
  2166. static int set_mba_sc(bool mba_sc)
  2167. {
  2168. struct rdt_resource *r = resctrl_arch_get_resource(RDT_RESOURCE_MBA);
  2169. u32 num_closid = resctrl_arch_get_num_closid(r);
  2170. struct rdt_ctrl_domain *d;
  2171. unsigned long fflags;
  2172. int i;
  2173. if (!supports_mba_mbps() || mba_sc == is_mba_sc(r))
  2174. return -EINVAL;
  2175. r->membw.mba_sc = mba_sc;
  2176. rdtgroup_default.mba_mbps_event = mba_mbps_default_event;
  2177. list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
  2178. for (i = 0; i < num_closid; i++)
  2179. d->mbps_val[i] = MBA_MAX_MBPS;
  2180. }
  2181. fflags = mba_sc ? RFTYPE_CTRL_BASE | RFTYPE_MON_BASE : 0;
  2182. resctrl_file_fflags_init("mba_MBps_event", fflags);
  2183. return 0;
  2184. }
  2185. /*
  2186. * We don't allow rdtgroup directories to be created anywhere
  2187. * except the root directory. Thus when looking for the rdtgroup
  2188. * structure for a kernfs node we are either looking at a directory,
  2189. * in which case the rdtgroup structure is pointed at by the "priv"
  2190. * field, otherwise we have a file, and need only look to the parent
  2191. * to find the rdtgroup.
  2192. */
  2193. static struct rdtgroup *kernfs_to_rdtgroup(struct kernfs_node *kn)
  2194. {
  2195. if (kernfs_type(kn) == KERNFS_DIR) {
  2196. /*
  2197. * All the resource directories use "kn->priv"
  2198. * to point to the "struct rdtgroup" for the
  2199. * resource. "info" and its subdirectories don't
  2200. * have rdtgroup structures, so return NULL here.
  2201. */
  2202. if (kn == kn_info ||
  2203. rcu_access_pointer(kn->__parent) == kn_info)
  2204. return NULL;
  2205. else
  2206. return kn->priv;
  2207. } else {
  2208. return rdt_kn_parent_priv(kn);
  2209. }
  2210. }
  2211. static void rdtgroup_kn_get(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
  2212. {
  2213. atomic_inc(&rdtgrp->waitcount);
  2214. kernfs_break_active_protection(kn);
  2215. }
  2216. static void rdtgroup_kn_put(struct rdtgroup *rdtgrp, struct kernfs_node *kn)
  2217. {
  2218. if (atomic_dec_and_test(&rdtgrp->waitcount) &&
  2219. (rdtgrp->flags & RDT_DELETED)) {
  2220. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  2221. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
  2222. rdtgroup_pseudo_lock_remove(rdtgrp);
  2223. kernfs_unbreak_active_protection(kn);
  2224. rdtgroup_remove(rdtgrp);
  2225. } else {
  2226. kernfs_unbreak_active_protection(kn);
  2227. }
  2228. }
  2229. struct rdtgroup *rdtgroup_kn_lock_live(struct kernfs_node *kn)
  2230. {
  2231. struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
  2232. if (!rdtgrp)
  2233. return NULL;
  2234. rdtgroup_kn_get(rdtgrp, kn);
  2235. cpus_read_lock();
  2236. mutex_lock(&rdtgroup_mutex);
  2237. /* Was this group deleted while we waited? */
  2238. if (rdtgrp->flags & RDT_DELETED)
  2239. return NULL;
  2240. return rdtgrp;
  2241. }
  2242. void rdtgroup_kn_unlock(struct kernfs_node *kn)
  2243. {
  2244. struct rdtgroup *rdtgrp = kernfs_to_rdtgroup(kn);
  2245. if (!rdtgrp)
  2246. return;
  2247. mutex_unlock(&rdtgroup_mutex);
  2248. cpus_read_unlock();
  2249. rdtgroup_kn_put(rdtgrp, kn);
  2250. }
  2251. static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  2252. struct rdtgroup *prgrp,
  2253. struct kernfs_node **mon_data_kn);
  2254. static void rdt_disable_ctx(void)
  2255. {
  2256. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
  2257. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
  2258. set_mba_sc(false);
  2259. resctrl_debug = false;
  2260. }
  2261. static int rdt_enable_ctx(struct rdt_fs_context *ctx)
  2262. {
  2263. int ret = 0;
  2264. if (ctx->enable_cdpl2) {
  2265. ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, true);
  2266. if (ret)
  2267. goto out_done;
  2268. }
  2269. if (ctx->enable_cdpl3) {
  2270. ret = resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, true);
  2271. if (ret)
  2272. goto out_cdpl2;
  2273. }
  2274. if (ctx->enable_mba_mbps) {
  2275. ret = set_mba_sc(true);
  2276. if (ret)
  2277. goto out_cdpl3;
  2278. }
  2279. if (ctx->enable_debug)
  2280. resctrl_debug = true;
  2281. return 0;
  2282. out_cdpl3:
  2283. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L3, false);
  2284. out_cdpl2:
  2285. resctrl_arch_set_cdp_enabled(RDT_RESOURCE_L2, false);
  2286. out_done:
  2287. return ret;
  2288. }
  2289. static int schemata_list_add(struct rdt_resource *r, enum resctrl_conf_type type)
  2290. {
  2291. struct resctrl_schema *s;
  2292. const char *suffix = "";
  2293. int ret, cl;
  2294. s = kzalloc_obj(*s);
  2295. if (!s)
  2296. return -ENOMEM;
  2297. s->res = r;
  2298. s->num_closid = resctrl_arch_get_num_closid(r);
  2299. if (resctrl_arch_get_cdp_enabled(r->rid))
  2300. s->num_closid /= 2;
  2301. s->conf_type = type;
  2302. switch (type) {
  2303. case CDP_CODE:
  2304. suffix = "CODE";
  2305. break;
  2306. case CDP_DATA:
  2307. suffix = "DATA";
  2308. break;
  2309. case CDP_NONE:
  2310. suffix = "";
  2311. break;
  2312. }
  2313. ret = snprintf(s->name, sizeof(s->name), "%s%s", r->name, suffix);
  2314. if (ret >= sizeof(s->name)) {
  2315. kfree(s);
  2316. return -EINVAL;
  2317. }
  2318. cl = strlen(s->name);
  2319. /*
  2320. * If CDP is supported by this resource, but not enabled,
  2321. * include the suffix. This ensures the tabular format of the
  2322. * schemata file does not change between mounts of the filesystem.
  2323. */
  2324. if (r->cdp_capable && !resctrl_arch_get_cdp_enabled(r->rid))
  2325. cl += 4;
  2326. if (cl > max_name_width)
  2327. max_name_width = cl;
  2328. switch (r->schema_fmt) {
  2329. case RESCTRL_SCHEMA_BITMAP:
  2330. s->fmt_str = "%d=%x";
  2331. break;
  2332. case RESCTRL_SCHEMA_RANGE:
  2333. s->fmt_str = "%d=%u";
  2334. break;
  2335. }
  2336. if (WARN_ON_ONCE(!s->fmt_str)) {
  2337. kfree(s);
  2338. return -EINVAL;
  2339. }
  2340. INIT_LIST_HEAD(&s->list);
  2341. list_add(&s->list, &resctrl_schema_all);
  2342. return 0;
  2343. }
  2344. static int schemata_list_create(void)
  2345. {
  2346. struct rdt_resource *r;
  2347. int ret = 0;
  2348. for_each_alloc_capable_rdt_resource(r) {
  2349. if (resctrl_arch_get_cdp_enabled(r->rid)) {
  2350. ret = schemata_list_add(r, CDP_CODE);
  2351. if (ret)
  2352. break;
  2353. ret = schemata_list_add(r, CDP_DATA);
  2354. } else {
  2355. ret = schemata_list_add(r, CDP_NONE);
  2356. }
  2357. if (ret)
  2358. break;
  2359. }
  2360. return ret;
  2361. }
  2362. static void schemata_list_destroy(void)
  2363. {
  2364. struct resctrl_schema *s, *tmp;
  2365. list_for_each_entry_safe(s, tmp, &resctrl_schema_all, list) {
  2366. list_del(&s->list);
  2367. kfree(s);
  2368. }
  2369. }
  2370. static int rdt_get_tree(struct fs_context *fc)
  2371. {
  2372. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  2373. unsigned long flags = RFTYPE_CTRL_BASE;
  2374. struct rdt_l3_mon_domain *dom;
  2375. struct rdt_resource *r;
  2376. int ret;
  2377. DO_ONCE_SLEEPABLE(resctrl_arch_pre_mount);
  2378. cpus_read_lock();
  2379. mutex_lock(&rdtgroup_mutex);
  2380. /*
  2381. * resctrl file system can only be mounted once.
  2382. */
  2383. if (resctrl_mounted) {
  2384. ret = -EBUSY;
  2385. goto out;
  2386. }
  2387. ret = setup_rmid_lru_list();
  2388. if (ret)
  2389. goto out;
  2390. ret = rdtgroup_setup_root(ctx);
  2391. if (ret)
  2392. goto out;
  2393. ret = rdt_enable_ctx(ctx);
  2394. if (ret)
  2395. goto out_root;
  2396. ret = schemata_list_create();
  2397. if (ret)
  2398. goto out_schemata_free;
  2399. ret = closid_init();
  2400. if (ret)
  2401. goto out_schemata_free;
  2402. if (resctrl_arch_mon_capable())
  2403. flags |= RFTYPE_MON;
  2404. ret = rdtgroup_add_files(rdtgroup_default.kn, flags);
  2405. if (ret)
  2406. goto out_closid_exit;
  2407. kernfs_activate(rdtgroup_default.kn);
  2408. ret = rdtgroup_create_info_dir(rdtgroup_default.kn);
  2409. if (ret < 0)
  2410. goto out_closid_exit;
  2411. if (resctrl_arch_mon_capable()) {
  2412. ret = mongroup_create_dir(rdtgroup_default.kn,
  2413. &rdtgroup_default, "mon_groups",
  2414. &kn_mongrp);
  2415. if (ret < 0)
  2416. goto out_info;
  2417. rdtgroup_assign_cntrs(&rdtgroup_default);
  2418. ret = mkdir_mondata_all(rdtgroup_default.kn,
  2419. &rdtgroup_default, &kn_mondata);
  2420. if (ret < 0)
  2421. goto out_mongrp;
  2422. rdtgroup_default.mon.mon_data_kn = kn_mondata;
  2423. }
  2424. ret = rdt_pseudo_lock_init();
  2425. if (ret)
  2426. goto out_mondata;
  2427. ret = kernfs_get_tree(fc);
  2428. if (ret < 0)
  2429. goto out_psl;
  2430. if (resctrl_arch_alloc_capable())
  2431. resctrl_arch_enable_alloc();
  2432. if (resctrl_arch_mon_capable())
  2433. resctrl_arch_enable_mon();
  2434. if (resctrl_arch_alloc_capable() || resctrl_arch_mon_capable())
  2435. resctrl_mounted = true;
  2436. if (resctrl_is_mbm_enabled()) {
  2437. r = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  2438. list_for_each_entry(dom, &r->mon_domains, hdr.list)
  2439. mbm_setup_overflow_handler(dom, MBM_OVERFLOW_INTERVAL,
  2440. RESCTRL_PICK_ANY_CPU);
  2441. }
  2442. goto out;
  2443. out_psl:
  2444. rdt_pseudo_lock_release();
  2445. out_mondata:
  2446. if (resctrl_arch_mon_capable())
  2447. kernfs_remove(kn_mondata);
  2448. out_mongrp:
  2449. if (resctrl_arch_mon_capable()) {
  2450. rdtgroup_unassign_cntrs(&rdtgroup_default);
  2451. kernfs_remove(kn_mongrp);
  2452. }
  2453. out_info:
  2454. kernfs_remove(kn_info);
  2455. out_closid_exit:
  2456. closid_exit();
  2457. out_schemata_free:
  2458. schemata_list_destroy();
  2459. rdt_disable_ctx();
  2460. out_root:
  2461. rdtgroup_destroy_root();
  2462. out:
  2463. rdt_last_cmd_clear();
  2464. mutex_unlock(&rdtgroup_mutex);
  2465. cpus_read_unlock();
  2466. return ret;
  2467. }
  2468. enum rdt_param {
  2469. Opt_cdp,
  2470. Opt_cdpl2,
  2471. Opt_mba_mbps,
  2472. Opt_debug,
  2473. nr__rdt_params
  2474. };
  2475. static const struct fs_parameter_spec rdt_fs_parameters[] = {
  2476. fsparam_flag("cdp", Opt_cdp),
  2477. fsparam_flag("cdpl2", Opt_cdpl2),
  2478. fsparam_flag("mba_MBps", Opt_mba_mbps),
  2479. fsparam_flag("debug", Opt_debug),
  2480. {}
  2481. };
  2482. static int rdt_parse_param(struct fs_context *fc, struct fs_parameter *param)
  2483. {
  2484. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  2485. struct fs_parse_result result;
  2486. const char *msg;
  2487. int opt;
  2488. opt = fs_parse(fc, rdt_fs_parameters, param, &result);
  2489. if (opt < 0)
  2490. return opt;
  2491. switch (opt) {
  2492. case Opt_cdp:
  2493. ctx->enable_cdpl3 = true;
  2494. return 0;
  2495. case Opt_cdpl2:
  2496. ctx->enable_cdpl2 = true;
  2497. return 0;
  2498. case Opt_mba_mbps:
  2499. msg = "mba_MBps requires MBM and linear scale MBA at L3 scope";
  2500. if (!supports_mba_mbps())
  2501. return invalfc(fc, msg);
  2502. ctx->enable_mba_mbps = true;
  2503. return 0;
  2504. case Opt_debug:
  2505. ctx->enable_debug = true;
  2506. return 0;
  2507. }
  2508. return -EINVAL;
  2509. }
  2510. static void rdt_fs_context_free(struct fs_context *fc)
  2511. {
  2512. struct rdt_fs_context *ctx = rdt_fc2context(fc);
  2513. kernfs_free_fs_context(fc);
  2514. kfree(ctx);
  2515. }
  2516. static const struct fs_context_operations rdt_fs_context_ops = {
  2517. .free = rdt_fs_context_free,
  2518. .parse_param = rdt_parse_param,
  2519. .get_tree = rdt_get_tree,
  2520. };
  2521. static int rdt_init_fs_context(struct fs_context *fc)
  2522. {
  2523. struct rdt_fs_context *ctx;
  2524. ctx = kzalloc_obj(*ctx);
  2525. if (!ctx)
  2526. return -ENOMEM;
  2527. ctx->kfc.magic = RDTGROUP_SUPER_MAGIC;
  2528. fc->fs_private = &ctx->kfc;
  2529. fc->ops = &rdt_fs_context_ops;
  2530. put_user_ns(fc->user_ns);
  2531. fc->user_ns = get_user_ns(&init_user_ns);
  2532. fc->global = true;
  2533. return 0;
  2534. }
  2535. /*
  2536. * Move tasks from one to the other group. If @from is NULL, then all tasks
  2537. * in the systems are moved unconditionally (used for teardown).
  2538. *
  2539. * If @mask is not NULL the cpus on which moved tasks are running are set
  2540. * in that mask so the update smp function call is restricted to affected
  2541. * cpus.
  2542. */
  2543. static void rdt_move_group_tasks(struct rdtgroup *from, struct rdtgroup *to,
  2544. struct cpumask *mask)
  2545. {
  2546. struct task_struct *p, *t;
  2547. read_lock(&tasklist_lock);
  2548. for_each_process_thread(p, t) {
  2549. if (!from || is_closid_match(t, from) ||
  2550. is_rmid_match(t, from)) {
  2551. resctrl_arch_set_closid_rmid(t, to->closid,
  2552. to->mon.rmid);
  2553. /*
  2554. * Order the closid/rmid stores above before the loads
  2555. * in task_curr(). This pairs with the full barrier
  2556. * between the rq->curr update and
  2557. * resctrl_arch_sched_in() during context switch.
  2558. */
  2559. smp_mb();
  2560. /*
  2561. * If the task is on a CPU, set the CPU in the mask.
  2562. * The detection is inaccurate as tasks might move or
  2563. * schedule before the smp function call takes place.
  2564. * In such a case the function call is pointless, but
  2565. * there is no other side effect.
  2566. */
  2567. if (IS_ENABLED(CONFIG_SMP) && mask && task_curr(t))
  2568. cpumask_set_cpu(task_cpu(t), mask);
  2569. }
  2570. }
  2571. read_unlock(&tasklist_lock);
  2572. }
  2573. static void free_all_child_rdtgrp(struct rdtgroup *rdtgrp)
  2574. {
  2575. struct rdtgroup *sentry, *stmp;
  2576. struct list_head *head;
  2577. head = &rdtgrp->mon.crdtgrp_list;
  2578. list_for_each_entry_safe(sentry, stmp, head, mon.crdtgrp_list) {
  2579. rdtgroup_unassign_cntrs(sentry);
  2580. free_rmid(sentry->closid, sentry->mon.rmid);
  2581. list_del(&sentry->mon.crdtgrp_list);
  2582. if (atomic_read(&sentry->waitcount) != 0)
  2583. sentry->flags = RDT_DELETED;
  2584. else
  2585. rdtgroup_remove(sentry);
  2586. }
  2587. }
  2588. /*
  2589. * Forcibly remove all of subdirectories under root.
  2590. */
  2591. static void rmdir_all_sub(void)
  2592. {
  2593. struct rdtgroup *rdtgrp, *tmp;
  2594. /* Move all tasks to the default resource group */
  2595. rdt_move_group_tasks(NULL, &rdtgroup_default, NULL);
  2596. list_for_each_entry_safe(rdtgrp, tmp, &rdt_all_groups, rdtgroup_list) {
  2597. /* Free any child rmids */
  2598. free_all_child_rdtgrp(rdtgrp);
  2599. /* Remove each rdtgroup other than root */
  2600. if (rdtgrp == &rdtgroup_default)
  2601. continue;
  2602. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  2603. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)
  2604. rdtgroup_pseudo_lock_remove(rdtgrp);
  2605. /*
  2606. * Give any CPUs back to the default group. We cannot copy
  2607. * cpu_online_mask because a CPU might have executed the
  2608. * offline callback already, but is still marked online.
  2609. */
  2610. cpumask_or(&rdtgroup_default.cpu_mask,
  2611. &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
  2612. rdtgroup_unassign_cntrs(rdtgrp);
  2613. free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
  2614. kernfs_remove(rdtgrp->kn);
  2615. list_del(&rdtgrp->rdtgroup_list);
  2616. if (atomic_read(&rdtgrp->waitcount) != 0)
  2617. rdtgrp->flags = RDT_DELETED;
  2618. else
  2619. rdtgroup_remove(rdtgrp);
  2620. }
  2621. /* Notify online CPUs to update per cpu storage and PQR_ASSOC MSR */
  2622. update_closid_rmid(cpu_online_mask, &rdtgroup_default);
  2623. kernfs_remove(kn_info);
  2624. kernfs_remove(kn_mongrp);
  2625. kernfs_remove(kn_mondata);
  2626. }
  2627. /**
  2628. * mon_get_kn_priv() - Get the mon_data priv data for this event.
  2629. *
  2630. * The same values are used across the mon_data directories of all control and
  2631. * monitor groups for the same event in the same domain. Keep a list of
  2632. * allocated structures and re-use an existing one with the same values for
  2633. * @rid, @domid, etc.
  2634. *
  2635. * @rid: The resource id for the event file being created.
  2636. * @domid: The domain id for the event file being created.
  2637. * @mevt: The type of event file being created.
  2638. * @do_sum: Whether SNC summing monitors are being created. Only set
  2639. * when @rid == RDT_RESOURCE_L3.
  2640. */
  2641. static struct mon_data *mon_get_kn_priv(enum resctrl_res_level rid, int domid,
  2642. struct mon_evt *mevt,
  2643. bool do_sum)
  2644. {
  2645. struct mon_data *priv;
  2646. lockdep_assert_held(&rdtgroup_mutex);
  2647. list_for_each_entry(priv, &mon_data_kn_priv_list, list) {
  2648. if (priv->rid == rid && priv->domid == domid &&
  2649. priv->sum == do_sum && priv->evt == mevt)
  2650. return priv;
  2651. }
  2652. priv = kzalloc_obj(*priv);
  2653. if (!priv)
  2654. return NULL;
  2655. priv->rid = rid;
  2656. priv->domid = domid;
  2657. priv->sum = do_sum;
  2658. priv->evt = mevt;
  2659. list_add_tail(&priv->list, &mon_data_kn_priv_list);
  2660. return priv;
  2661. }
  2662. /**
  2663. * mon_put_kn_priv() - Free all allocated mon_data structures.
  2664. *
  2665. * Called when resctrl file system is unmounted.
  2666. */
  2667. static void mon_put_kn_priv(void)
  2668. {
  2669. struct mon_data *priv, *tmp;
  2670. lockdep_assert_held(&rdtgroup_mutex);
  2671. list_for_each_entry_safe(priv, tmp, &mon_data_kn_priv_list, list) {
  2672. list_del(&priv->list);
  2673. kfree(priv);
  2674. }
  2675. }
  2676. static void resctrl_fs_teardown(void)
  2677. {
  2678. lockdep_assert_held(&rdtgroup_mutex);
  2679. /* Cleared by rdtgroup_destroy_root() */
  2680. if (!rdtgroup_default.kn)
  2681. return;
  2682. rmdir_all_sub();
  2683. rdtgroup_unassign_cntrs(&rdtgroup_default);
  2684. mon_put_kn_priv();
  2685. rdt_pseudo_lock_release();
  2686. rdtgroup_default.mode = RDT_MODE_SHAREABLE;
  2687. closid_exit();
  2688. schemata_list_destroy();
  2689. rdtgroup_destroy_root();
  2690. }
  2691. static void rdt_kill_sb(struct super_block *sb)
  2692. {
  2693. struct rdt_resource *r;
  2694. cpus_read_lock();
  2695. mutex_lock(&rdtgroup_mutex);
  2696. rdt_disable_ctx();
  2697. /* Put everything back to default values. */
  2698. for_each_alloc_capable_rdt_resource(r)
  2699. resctrl_arch_reset_all_ctrls(r);
  2700. resctrl_fs_teardown();
  2701. if (resctrl_arch_alloc_capable())
  2702. resctrl_arch_disable_alloc();
  2703. if (resctrl_arch_mon_capable())
  2704. resctrl_arch_disable_mon();
  2705. resctrl_mounted = false;
  2706. kernfs_kill_sb(sb);
  2707. mutex_unlock(&rdtgroup_mutex);
  2708. cpus_read_unlock();
  2709. }
  2710. static struct file_system_type rdt_fs_type = {
  2711. .name = "resctrl",
  2712. .init_fs_context = rdt_init_fs_context,
  2713. .parameters = rdt_fs_parameters,
  2714. .kill_sb = rdt_kill_sb,
  2715. };
  2716. static int mon_addfile(struct kernfs_node *parent_kn, const char *name,
  2717. void *priv)
  2718. {
  2719. struct kernfs_node *kn;
  2720. int ret = 0;
  2721. kn = __kernfs_create_file(parent_kn, name, 0444,
  2722. GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
  2723. &kf_mondata_ops, priv, NULL, NULL);
  2724. if (IS_ERR(kn))
  2725. return PTR_ERR(kn);
  2726. ret = rdtgroup_kn_set_ugid(kn);
  2727. if (ret) {
  2728. kernfs_remove(kn);
  2729. return ret;
  2730. }
  2731. return ret;
  2732. }
  2733. static void mon_rmdir_one_subdir(struct kernfs_node *pkn, char *name, char *subname)
  2734. {
  2735. struct kernfs_node *kn;
  2736. kn = kernfs_find_and_get(pkn, name);
  2737. if (!kn)
  2738. return;
  2739. kernfs_put(kn);
  2740. if (kn->dir.subdirs <= 1)
  2741. kernfs_remove(kn);
  2742. else
  2743. kernfs_remove_by_name(kn, subname);
  2744. }
  2745. /*
  2746. * Remove files and directories for one SNC node. If it is the last node
  2747. * sharing an L3 cache, then remove the upper level directory containing
  2748. * the "sum" files too.
  2749. */
  2750. static void rmdir_mondata_subdir_allrdtgrp_snc(struct rdt_resource *r,
  2751. struct rdt_domain_hdr *hdr)
  2752. {
  2753. struct rdtgroup *prgrp, *crgrp;
  2754. struct rdt_l3_mon_domain *d;
  2755. char subname[32];
  2756. char name[32];
  2757. if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
  2758. return;
  2759. d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
  2760. sprintf(name, "mon_%s_%02d", r->name, d->ci_id);
  2761. sprintf(subname, "mon_sub_%s_%02d", r->name, hdr->id);
  2762. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  2763. mon_rmdir_one_subdir(prgrp->mon.mon_data_kn, name, subname);
  2764. list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
  2765. mon_rmdir_one_subdir(crgrp->mon.mon_data_kn, name, subname);
  2766. }
  2767. }
  2768. /*
  2769. * Remove all subdirectories of mon_data of ctrl_mon groups
  2770. * and monitor groups for the given domain.
  2771. */
  2772. static void rmdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
  2773. struct rdt_domain_hdr *hdr)
  2774. {
  2775. struct rdtgroup *prgrp, *crgrp;
  2776. char name[32];
  2777. if (r->rid == RDT_RESOURCE_L3 && r->mon_scope == RESCTRL_L3_NODE) {
  2778. rmdir_mondata_subdir_allrdtgrp_snc(r, hdr);
  2779. return;
  2780. }
  2781. sprintf(name, "mon_%s_%02d", r->name, hdr->id);
  2782. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  2783. kernfs_remove_by_name(prgrp->mon.mon_data_kn, name);
  2784. list_for_each_entry(crgrp, &prgrp->mon.crdtgrp_list, mon.crdtgrp_list)
  2785. kernfs_remove_by_name(crgrp->mon.mon_data_kn, name);
  2786. }
  2787. }
  2788. /*
  2789. * Create a directory for a domain and populate it with monitor files. Create
  2790. * summing monitors when @hdr is NULL. No need to initialize summing monitors.
  2791. */
  2792. static struct kernfs_node *_mkdir_mondata_subdir(struct kernfs_node *parent_kn, char *name,
  2793. struct rdt_domain_hdr *hdr,
  2794. struct rdt_resource *r,
  2795. struct rdtgroup *prgrp, int domid)
  2796. {
  2797. struct rmid_read rr = {0};
  2798. struct kernfs_node *kn;
  2799. struct mon_data *priv;
  2800. struct mon_evt *mevt;
  2801. int ret;
  2802. kn = kernfs_create_dir(parent_kn, name, parent_kn->mode, prgrp);
  2803. if (IS_ERR(kn))
  2804. return kn;
  2805. ret = rdtgroup_kn_set_ugid(kn);
  2806. if (ret)
  2807. goto out_destroy;
  2808. for_each_mon_event(mevt) {
  2809. if (mevt->rid != r->rid || !mevt->enabled)
  2810. continue;
  2811. priv = mon_get_kn_priv(r->rid, domid, mevt, !hdr);
  2812. if (WARN_ON_ONCE(!priv)) {
  2813. ret = -EINVAL;
  2814. goto out_destroy;
  2815. }
  2816. ret = mon_addfile(kn, mevt->name, priv);
  2817. if (ret)
  2818. goto out_destroy;
  2819. if (hdr && resctrl_is_mbm_event(mevt->evtid))
  2820. mon_event_read(&rr, r, hdr, prgrp, &hdr->cpu_mask, mevt, true);
  2821. }
  2822. return kn;
  2823. out_destroy:
  2824. kernfs_remove(kn);
  2825. return ERR_PTR(ret);
  2826. }
  2827. static int mkdir_mondata_subdir_snc(struct kernfs_node *parent_kn,
  2828. struct rdt_domain_hdr *hdr,
  2829. struct rdt_resource *r, struct rdtgroup *prgrp)
  2830. {
  2831. struct kernfs_node *ckn, *kn;
  2832. struct rdt_l3_mon_domain *d;
  2833. char name[32];
  2834. if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
  2835. return -EINVAL;
  2836. d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
  2837. sprintf(name, "mon_%s_%02d", r->name, d->ci_id);
  2838. kn = kernfs_find_and_get(parent_kn, name);
  2839. if (kn) {
  2840. /*
  2841. * rdtgroup_mutex will prevent this directory from being
  2842. * removed. No need to keep this hold.
  2843. */
  2844. kernfs_put(kn);
  2845. } else {
  2846. kn = _mkdir_mondata_subdir(parent_kn, name, NULL, r, prgrp, d->ci_id);
  2847. if (IS_ERR(kn))
  2848. return PTR_ERR(kn);
  2849. }
  2850. sprintf(name, "mon_sub_%s_%02d", r->name, hdr->id);
  2851. ckn = _mkdir_mondata_subdir(kn, name, hdr, r, prgrp, hdr->id);
  2852. if (IS_ERR(ckn)) {
  2853. kernfs_remove(kn);
  2854. return PTR_ERR(ckn);
  2855. }
  2856. kernfs_activate(kn);
  2857. return 0;
  2858. }
  2859. static int mkdir_mondata_subdir(struct kernfs_node *parent_kn,
  2860. struct rdt_domain_hdr *hdr,
  2861. struct rdt_resource *r, struct rdtgroup *prgrp)
  2862. {
  2863. struct kernfs_node *kn;
  2864. char name[32];
  2865. lockdep_assert_held(&rdtgroup_mutex);
  2866. if (r->rid == RDT_RESOURCE_L3 && r->mon_scope == RESCTRL_L3_NODE)
  2867. return mkdir_mondata_subdir_snc(parent_kn, hdr, r, prgrp);
  2868. sprintf(name, "mon_%s_%02d", r->name, hdr->id);
  2869. kn = _mkdir_mondata_subdir(parent_kn, name, hdr, r, prgrp, hdr->id);
  2870. if (IS_ERR(kn))
  2871. return PTR_ERR(kn);
  2872. kernfs_activate(kn);
  2873. return 0;
  2874. }
  2875. /*
  2876. * Add all subdirectories of mon_data for "ctrl_mon" groups
  2877. * and "monitor" groups with given domain id.
  2878. */
  2879. static void mkdir_mondata_subdir_allrdtgrp(struct rdt_resource *r,
  2880. struct rdt_domain_hdr *hdr)
  2881. {
  2882. struct kernfs_node *parent_kn;
  2883. struct rdtgroup *prgrp, *crgrp;
  2884. struct list_head *head;
  2885. list_for_each_entry(prgrp, &rdt_all_groups, rdtgroup_list) {
  2886. parent_kn = prgrp->mon.mon_data_kn;
  2887. mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
  2888. head = &prgrp->mon.crdtgrp_list;
  2889. list_for_each_entry(crgrp, head, mon.crdtgrp_list) {
  2890. parent_kn = crgrp->mon.mon_data_kn;
  2891. mkdir_mondata_subdir(parent_kn, hdr, r, crgrp);
  2892. }
  2893. }
  2894. }
  2895. static int mkdir_mondata_subdir_alldom(struct kernfs_node *parent_kn,
  2896. struct rdt_resource *r,
  2897. struct rdtgroup *prgrp)
  2898. {
  2899. struct rdt_domain_hdr *hdr;
  2900. int ret;
  2901. /* Walking r->domains, ensure it can't race with cpuhp */
  2902. lockdep_assert_cpus_held();
  2903. list_for_each_entry(hdr, &r->mon_domains, list) {
  2904. ret = mkdir_mondata_subdir(parent_kn, hdr, r, prgrp);
  2905. if (ret)
  2906. return ret;
  2907. }
  2908. return 0;
  2909. }
  2910. /*
  2911. * This creates a directory mon_data which contains the monitored data.
  2912. *
  2913. * mon_data has one directory for each domain which are named
  2914. * in the format mon_<domain_name>_<domain_id>. For ex: A mon_data
  2915. * with L3 domain looks as below:
  2916. * ./mon_data:
  2917. * mon_L3_00
  2918. * mon_L3_01
  2919. * mon_L3_02
  2920. * ...
  2921. *
  2922. * Each domain directory has one file per event:
  2923. * ./mon_L3_00/:
  2924. * llc_occupancy
  2925. *
  2926. */
  2927. static int mkdir_mondata_all(struct kernfs_node *parent_kn,
  2928. struct rdtgroup *prgrp,
  2929. struct kernfs_node **dest_kn)
  2930. {
  2931. struct rdt_resource *r;
  2932. struct kernfs_node *kn;
  2933. int ret;
  2934. /*
  2935. * Create the mon_data directory first.
  2936. */
  2937. ret = mongroup_create_dir(parent_kn, prgrp, "mon_data", &kn);
  2938. if (ret)
  2939. return ret;
  2940. if (dest_kn)
  2941. *dest_kn = kn;
  2942. /*
  2943. * Create the subdirectories for each domain. Note that all events
  2944. * in a domain like L3 are grouped into a resource whose domain is L3
  2945. */
  2946. for_each_mon_capable_rdt_resource(r) {
  2947. ret = mkdir_mondata_subdir_alldom(kn, r, prgrp);
  2948. if (ret)
  2949. goto out_destroy;
  2950. }
  2951. return 0;
  2952. out_destroy:
  2953. kernfs_remove(kn);
  2954. return ret;
  2955. }
  2956. /**
  2957. * cbm_ensure_valid - Enforce validity on provided CBM
  2958. * @_val: Candidate CBM
  2959. * @r: RDT resource to which the CBM belongs
  2960. *
  2961. * The provided CBM represents all cache portions available for use. This
  2962. * may be represented by a bitmap that does not consist of contiguous ones
  2963. * and thus be an invalid CBM.
  2964. * Here the provided CBM is forced to be a valid CBM by only considering
  2965. * the first set of contiguous bits as valid and clearing all bits.
  2966. * The intention here is to provide a valid default CBM with which a new
  2967. * resource group is initialized. The user can follow this with a
  2968. * modification to the CBM if the default does not satisfy the
  2969. * requirements.
  2970. */
  2971. static u32 cbm_ensure_valid(u32 _val, struct rdt_resource *r)
  2972. {
  2973. unsigned int cbm_len = r->cache.cbm_len;
  2974. unsigned long first_bit, zero_bit;
  2975. unsigned long val;
  2976. if (!_val || r->cache.arch_has_sparse_bitmasks)
  2977. return _val;
  2978. val = _val;
  2979. first_bit = find_first_bit(&val, cbm_len);
  2980. zero_bit = find_next_zero_bit(&val, cbm_len, first_bit);
  2981. /* Clear any remaining bits to ensure contiguous region */
  2982. bitmap_clear(&val, zero_bit, cbm_len - zero_bit);
  2983. return (u32)val;
  2984. }
  2985. /*
  2986. * Initialize cache resources per RDT domain
  2987. *
  2988. * Set the RDT domain up to start off with all usable allocations. That is,
  2989. * all shareable and unused bits. All-zero CBM is invalid.
  2990. */
  2991. static int __init_one_rdt_domain(struct rdt_ctrl_domain *d, struct resctrl_schema *s,
  2992. u32 closid)
  2993. {
  2994. enum resctrl_conf_type peer_type = resctrl_peer_type(s->conf_type);
  2995. enum resctrl_conf_type t = s->conf_type;
  2996. struct resctrl_staged_config *cfg;
  2997. struct rdt_resource *r = s->res;
  2998. u32 used_b = 0, unused_b = 0;
  2999. unsigned long tmp_cbm;
  3000. enum rdtgrp_mode mode;
  3001. u32 peer_ctl, ctrl_val;
  3002. int i;
  3003. cfg = &d->staged_config[t];
  3004. cfg->have_new_ctrl = false;
  3005. cfg->new_ctrl = r->cache.shareable_bits;
  3006. used_b = r->cache.shareable_bits;
  3007. for (i = 0; i < closids_supported(); i++) {
  3008. if (closid_allocated(i) && i != closid) {
  3009. mode = rdtgroup_mode_by_closid(i);
  3010. if (mode == RDT_MODE_PSEUDO_LOCKSETUP)
  3011. /*
  3012. * ctrl values for locksetup aren't relevant
  3013. * until the schemata is written, and the mode
  3014. * becomes RDT_MODE_PSEUDO_LOCKED.
  3015. */
  3016. continue;
  3017. /*
  3018. * If CDP is active include peer domain's
  3019. * usage to ensure there is no overlap
  3020. * with an exclusive group.
  3021. */
  3022. if (resctrl_arch_get_cdp_enabled(r->rid))
  3023. peer_ctl = resctrl_arch_get_config(r, d, i,
  3024. peer_type);
  3025. else
  3026. peer_ctl = 0;
  3027. ctrl_val = resctrl_arch_get_config(r, d, i,
  3028. s->conf_type);
  3029. used_b |= ctrl_val | peer_ctl;
  3030. if (mode == RDT_MODE_SHAREABLE)
  3031. cfg->new_ctrl |= ctrl_val | peer_ctl;
  3032. }
  3033. }
  3034. if (d->plr && d->plr->cbm > 0)
  3035. used_b |= d->plr->cbm;
  3036. unused_b = used_b ^ (BIT_MASK(r->cache.cbm_len) - 1);
  3037. unused_b &= BIT_MASK(r->cache.cbm_len) - 1;
  3038. cfg->new_ctrl |= unused_b;
  3039. /*
  3040. * Force the initial CBM to be valid, user can
  3041. * modify the CBM based on system availability.
  3042. */
  3043. cfg->new_ctrl = cbm_ensure_valid(cfg->new_ctrl, r);
  3044. /*
  3045. * Assign the u32 CBM to an unsigned long to ensure that
  3046. * bitmap_weight() does not access out-of-bound memory.
  3047. */
  3048. tmp_cbm = cfg->new_ctrl;
  3049. if (bitmap_weight(&tmp_cbm, r->cache.cbm_len) < r->cache.min_cbm_bits) {
  3050. rdt_last_cmd_printf("No space on %s:%d\n", s->name, d->hdr.id);
  3051. return -ENOSPC;
  3052. }
  3053. cfg->have_new_ctrl = true;
  3054. return 0;
  3055. }
  3056. /*
  3057. * Initialize cache resources with default values.
  3058. *
  3059. * A new RDT group is being created on an allocation capable (CAT)
  3060. * supporting system. Set this group up to start off with all usable
  3061. * allocations.
  3062. *
  3063. * If there are no more shareable bits available on any domain then
  3064. * the entire allocation will fail.
  3065. */
  3066. int rdtgroup_init_cat(struct resctrl_schema *s, u32 closid)
  3067. {
  3068. struct rdt_ctrl_domain *d;
  3069. int ret;
  3070. list_for_each_entry(d, &s->res->ctrl_domains, hdr.list) {
  3071. ret = __init_one_rdt_domain(d, s, closid);
  3072. if (ret < 0)
  3073. return ret;
  3074. }
  3075. return 0;
  3076. }
  3077. /* Initialize MBA resource with default values. */
  3078. static void rdtgroup_init_mba(struct rdt_resource *r, u32 closid)
  3079. {
  3080. struct resctrl_staged_config *cfg;
  3081. struct rdt_ctrl_domain *d;
  3082. list_for_each_entry(d, &r->ctrl_domains, hdr.list) {
  3083. if (is_mba_sc(r)) {
  3084. d->mbps_val[closid] = MBA_MAX_MBPS;
  3085. continue;
  3086. }
  3087. cfg = &d->staged_config[CDP_NONE];
  3088. cfg->new_ctrl = resctrl_get_default_ctrl(r);
  3089. cfg->have_new_ctrl = true;
  3090. }
  3091. }
  3092. /* Initialize the RDT group's allocations. */
  3093. static int rdtgroup_init_alloc(struct rdtgroup *rdtgrp)
  3094. {
  3095. struct resctrl_schema *s;
  3096. struct rdt_resource *r;
  3097. int ret = 0;
  3098. rdt_staged_configs_clear();
  3099. list_for_each_entry(s, &resctrl_schema_all, list) {
  3100. r = s->res;
  3101. if (r->rid == RDT_RESOURCE_MBA ||
  3102. r->rid == RDT_RESOURCE_SMBA) {
  3103. rdtgroup_init_mba(r, rdtgrp->closid);
  3104. if (is_mba_sc(r))
  3105. continue;
  3106. } else {
  3107. ret = rdtgroup_init_cat(s, rdtgrp->closid);
  3108. if (ret < 0)
  3109. goto out;
  3110. }
  3111. ret = resctrl_arch_update_domains(r, rdtgrp->closid);
  3112. if (ret < 0) {
  3113. rdt_last_cmd_puts("Failed to initialize allocations\n");
  3114. goto out;
  3115. }
  3116. }
  3117. rdtgrp->mode = RDT_MODE_SHAREABLE;
  3118. out:
  3119. rdt_staged_configs_clear();
  3120. return ret;
  3121. }
  3122. static int mkdir_rdt_prepare_rmid_alloc(struct rdtgroup *rdtgrp)
  3123. {
  3124. int ret;
  3125. if (!resctrl_arch_mon_capable())
  3126. return 0;
  3127. ret = alloc_rmid(rdtgrp->closid);
  3128. if (ret < 0) {
  3129. rdt_last_cmd_puts("Out of RMIDs\n");
  3130. return ret;
  3131. }
  3132. rdtgrp->mon.rmid = ret;
  3133. rdtgroup_assign_cntrs(rdtgrp);
  3134. ret = mkdir_mondata_all(rdtgrp->kn, rdtgrp, &rdtgrp->mon.mon_data_kn);
  3135. if (ret) {
  3136. rdt_last_cmd_puts("kernfs subdir error\n");
  3137. rdtgroup_unassign_cntrs(rdtgrp);
  3138. free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
  3139. return ret;
  3140. }
  3141. return 0;
  3142. }
  3143. static void mkdir_rdt_prepare_rmid_free(struct rdtgroup *rgrp)
  3144. {
  3145. if (resctrl_arch_mon_capable()) {
  3146. rdtgroup_unassign_cntrs(rgrp);
  3147. free_rmid(rgrp->closid, rgrp->mon.rmid);
  3148. }
  3149. }
  3150. /*
  3151. * We allow creating mon groups only with in a directory called "mon_groups"
  3152. * which is present in every ctrl_mon group. Check if this is a valid
  3153. * "mon_groups" directory.
  3154. *
  3155. * 1. The directory should be named "mon_groups".
  3156. * 2. The mon group itself should "not" be named "mon_groups".
  3157. * This makes sure "mon_groups" directory always has a ctrl_mon group
  3158. * as parent.
  3159. */
  3160. static bool is_mon_groups(struct kernfs_node *kn, const char *name)
  3161. {
  3162. return (!strcmp(rdt_kn_name(kn), "mon_groups") &&
  3163. strcmp(name, "mon_groups"));
  3164. }
  3165. static int mkdir_rdt_prepare(struct kernfs_node *parent_kn,
  3166. const char *name, umode_t mode,
  3167. enum rdt_group_type rtype, struct rdtgroup **r)
  3168. {
  3169. struct rdtgroup *prdtgrp, *rdtgrp;
  3170. unsigned long files = 0;
  3171. struct kernfs_node *kn;
  3172. int ret;
  3173. prdtgrp = rdtgroup_kn_lock_live(parent_kn);
  3174. if (!prdtgrp) {
  3175. ret = -ENODEV;
  3176. goto out_unlock;
  3177. }
  3178. rdt_last_cmd_clear();
  3179. /*
  3180. * Check that the parent directory for a monitor group is a "mon_groups"
  3181. * directory.
  3182. */
  3183. if (rtype == RDTMON_GROUP && !is_mon_groups(parent_kn, name)) {
  3184. ret = -EPERM;
  3185. goto out_unlock;
  3186. }
  3187. if (rtype == RDTMON_GROUP &&
  3188. (prdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  3189. prdtgrp->mode == RDT_MODE_PSEUDO_LOCKED)) {
  3190. ret = -EINVAL;
  3191. rdt_last_cmd_puts("Pseudo-locking in progress\n");
  3192. goto out_unlock;
  3193. }
  3194. /* allocate the rdtgroup. */
  3195. rdtgrp = kzalloc_obj(*rdtgrp);
  3196. if (!rdtgrp) {
  3197. ret = -ENOSPC;
  3198. rdt_last_cmd_puts("Kernel out of memory\n");
  3199. goto out_unlock;
  3200. }
  3201. *r = rdtgrp;
  3202. rdtgrp->mon.parent = prdtgrp;
  3203. rdtgrp->type = rtype;
  3204. INIT_LIST_HEAD(&rdtgrp->mon.crdtgrp_list);
  3205. /* kernfs creates the directory for rdtgrp */
  3206. kn = kernfs_create_dir(parent_kn, name, mode, rdtgrp);
  3207. if (IS_ERR(kn)) {
  3208. ret = PTR_ERR(kn);
  3209. rdt_last_cmd_puts("kernfs create error\n");
  3210. goto out_free_rgrp;
  3211. }
  3212. rdtgrp->kn = kn;
  3213. /*
  3214. * kernfs_remove() will drop the reference count on "kn" which
  3215. * will free it. But we still need it to stick around for the
  3216. * rdtgroup_kn_unlock(kn) call. Take one extra reference here,
  3217. * which will be dropped by kernfs_put() in rdtgroup_remove().
  3218. */
  3219. kernfs_get(kn);
  3220. ret = rdtgroup_kn_set_ugid(kn);
  3221. if (ret) {
  3222. rdt_last_cmd_puts("kernfs perm error\n");
  3223. goto out_destroy;
  3224. }
  3225. if (rtype == RDTCTRL_GROUP) {
  3226. files = RFTYPE_BASE | RFTYPE_CTRL;
  3227. if (resctrl_arch_mon_capable())
  3228. files |= RFTYPE_MON;
  3229. } else {
  3230. files = RFTYPE_BASE | RFTYPE_MON;
  3231. }
  3232. ret = rdtgroup_add_files(kn, files);
  3233. if (ret) {
  3234. rdt_last_cmd_puts("kernfs fill error\n");
  3235. goto out_destroy;
  3236. }
  3237. /*
  3238. * The caller unlocks the parent_kn upon success.
  3239. */
  3240. return 0;
  3241. out_destroy:
  3242. kernfs_put(rdtgrp->kn);
  3243. kernfs_remove(rdtgrp->kn);
  3244. out_free_rgrp:
  3245. kfree(rdtgrp);
  3246. out_unlock:
  3247. rdtgroup_kn_unlock(parent_kn);
  3248. return ret;
  3249. }
  3250. static void mkdir_rdt_prepare_clean(struct rdtgroup *rgrp)
  3251. {
  3252. kernfs_remove(rgrp->kn);
  3253. rdtgroup_remove(rgrp);
  3254. }
  3255. /*
  3256. * Create a monitor group under "mon_groups" directory of a control
  3257. * and monitor group(ctrl_mon). This is a resource group
  3258. * to monitor a subset of tasks and cpus in its parent ctrl_mon group.
  3259. */
  3260. static int rdtgroup_mkdir_mon(struct kernfs_node *parent_kn,
  3261. const char *name, umode_t mode)
  3262. {
  3263. struct rdtgroup *rdtgrp, *prgrp;
  3264. int ret;
  3265. ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTMON_GROUP, &rdtgrp);
  3266. if (ret)
  3267. return ret;
  3268. prgrp = rdtgrp->mon.parent;
  3269. rdtgrp->closid = prgrp->closid;
  3270. ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
  3271. if (ret) {
  3272. mkdir_rdt_prepare_clean(rdtgrp);
  3273. goto out_unlock;
  3274. }
  3275. kernfs_activate(rdtgrp->kn);
  3276. /*
  3277. * Add the rdtgrp to the list of rdtgrps the parent
  3278. * ctrl_mon group has to track.
  3279. */
  3280. list_add_tail(&rdtgrp->mon.crdtgrp_list, &prgrp->mon.crdtgrp_list);
  3281. out_unlock:
  3282. rdtgroup_kn_unlock(parent_kn);
  3283. return ret;
  3284. }
  3285. /*
  3286. * These are rdtgroups created under the root directory. Can be used
  3287. * to allocate and monitor resources.
  3288. */
  3289. static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn,
  3290. const char *name, umode_t mode)
  3291. {
  3292. struct rdtgroup *rdtgrp;
  3293. struct kernfs_node *kn;
  3294. u32 closid;
  3295. int ret;
  3296. ret = mkdir_rdt_prepare(parent_kn, name, mode, RDTCTRL_GROUP, &rdtgrp);
  3297. if (ret)
  3298. return ret;
  3299. kn = rdtgrp->kn;
  3300. ret = closid_alloc();
  3301. if (ret < 0) {
  3302. rdt_last_cmd_puts("Out of CLOSIDs\n");
  3303. goto out_common_fail;
  3304. }
  3305. closid = ret;
  3306. ret = 0;
  3307. rdtgrp->closid = closid;
  3308. ret = mkdir_rdt_prepare_rmid_alloc(rdtgrp);
  3309. if (ret)
  3310. goto out_closid_free;
  3311. kernfs_activate(rdtgrp->kn);
  3312. ret = rdtgroup_init_alloc(rdtgrp);
  3313. if (ret < 0)
  3314. goto out_rmid_free;
  3315. list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups);
  3316. if (resctrl_arch_mon_capable()) {
  3317. /*
  3318. * Create an empty mon_groups directory to hold the subset
  3319. * of tasks and cpus to monitor.
  3320. */
  3321. ret = mongroup_create_dir(kn, rdtgrp, "mon_groups", NULL);
  3322. if (ret) {
  3323. rdt_last_cmd_puts("kernfs subdir error\n");
  3324. goto out_del_list;
  3325. }
  3326. if (is_mba_sc(NULL))
  3327. rdtgrp->mba_mbps_event = mba_mbps_default_event;
  3328. }
  3329. goto out_unlock;
  3330. out_del_list:
  3331. list_del(&rdtgrp->rdtgroup_list);
  3332. out_rmid_free:
  3333. mkdir_rdt_prepare_rmid_free(rdtgrp);
  3334. out_closid_free:
  3335. closid_free(closid);
  3336. out_common_fail:
  3337. mkdir_rdt_prepare_clean(rdtgrp);
  3338. out_unlock:
  3339. rdtgroup_kn_unlock(parent_kn);
  3340. return ret;
  3341. }
  3342. static int rdtgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
  3343. umode_t mode)
  3344. {
  3345. /* Do not accept '\n' to avoid unparsable situation. */
  3346. if (strchr(name, '\n'))
  3347. return -EINVAL;
  3348. /*
  3349. * If the parent directory is the root directory and RDT
  3350. * allocation is supported, add a control and monitoring
  3351. * subdirectory
  3352. */
  3353. if (resctrl_arch_alloc_capable() && parent_kn == rdtgroup_default.kn)
  3354. return rdtgroup_mkdir_ctrl_mon(parent_kn, name, mode);
  3355. /* Else, attempt to add a monitoring subdirectory. */
  3356. if (resctrl_arch_mon_capable())
  3357. return rdtgroup_mkdir_mon(parent_kn, name, mode);
  3358. return -EPERM;
  3359. }
  3360. static int rdtgroup_rmdir_mon(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
  3361. {
  3362. struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
  3363. u32 closid, rmid;
  3364. int cpu;
  3365. /* Give any tasks back to the parent group */
  3366. rdt_move_group_tasks(rdtgrp, prdtgrp, tmpmask);
  3367. /*
  3368. * Update per cpu closid/rmid of the moved CPUs first.
  3369. * Note: the closid will not change, but the arch code still needs it.
  3370. */
  3371. closid = prdtgrp->closid;
  3372. rmid = prdtgrp->mon.rmid;
  3373. for_each_cpu(cpu, &rdtgrp->cpu_mask)
  3374. resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
  3375. /*
  3376. * Update the MSR on moved CPUs and CPUs which have moved
  3377. * task running on them.
  3378. */
  3379. cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
  3380. update_closid_rmid(tmpmask, NULL);
  3381. rdtgrp->flags = RDT_DELETED;
  3382. rdtgroup_unassign_cntrs(rdtgrp);
  3383. free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
  3384. /*
  3385. * Remove the rdtgrp from the parent ctrl_mon group's list
  3386. */
  3387. WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
  3388. list_del(&rdtgrp->mon.crdtgrp_list);
  3389. kernfs_remove(rdtgrp->kn);
  3390. return 0;
  3391. }
  3392. static int rdtgroup_ctrl_remove(struct rdtgroup *rdtgrp)
  3393. {
  3394. rdtgrp->flags = RDT_DELETED;
  3395. list_del(&rdtgrp->rdtgroup_list);
  3396. kernfs_remove(rdtgrp->kn);
  3397. return 0;
  3398. }
  3399. static int rdtgroup_rmdir_ctrl(struct rdtgroup *rdtgrp, cpumask_var_t tmpmask)
  3400. {
  3401. u32 closid, rmid;
  3402. int cpu;
  3403. /* Give any tasks back to the default group */
  3404. rdt_move_group_tasks(rdtgrp, &rdtgroup_default, tmpmask);
  3405. /* Give any CPUs back to the default group */
  3406. cpumask_or(&rdtgroup_default.cpu_mask,
  3407. &rdtgroup_default.cpu_mask, &rdtgrp->cpu_mask);
  3408. /* Update per cpu closid and rmid of the moved CPUs first */
  3409. closid = rdtgroup_default.closid;
  3410. rmid = rdtgroup_default.mon.rmid;
  3411. for_each_cpu(cpu, &rdtgrp->cpu_mask)
  3412. resctrl_arch_set_cpu_default_closid_rmid(cpu, closid, rmid);
  3413. /*
  3414. * Update the MSR on moved CPUs and CPUs which have moved
  3415. * task running on them.
  3416. */
  3417. cpumask_or(tmpmask, tmpmask, &rdtgrp->cpu_mask);
  3418. update_closid_rmid(tmpmask, NULL);
  3419. rdtgroup_unassign_cntrs(rdtgrp);
  3420. free_rmid(rdtgrp->closid, rdtgrp->mon.rmid);
  3421. closid_free(rdtgrp->closid);
  3422. rdtgroup_ctrl_remove(rdtgrp);
  3423. /*
  3424. * Free all the child monitor group rmids.
  3425. */
  3426. free_all_child_rdtgrp(rdtgrp);
  3427. return 0;
  3428. }
  3429. static struct kernfs_node *rdt_kn_parent(struct kernfs_node *kn)
  3430. {
  3431. /*
  3432. * Valid within the RCU section it was obtained or while rdtgroup_mutex
  3433. * is held.
  3434. */
  3435. return rcu_dereference_check(kn->__parent, lockdep_is_held(&rdtgroup_mutex));
  3436. }
  3437. static int rdtgroup_rmdir(struct kernfs_node *kn)
  3438. {
  3439. struct kernfs_node *parent_kn;
  3440. struct rdtgroup *rdtgrp;
  3441. cpumask_var_t tmpmask;
  3442. int ret = 0;
  3443. if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
  3444. return -ENOMEM;
  3445. rdtgrp = rdtgroup_kn_lock_live(kn);
  3446. if (!rdtgrp) {
  3447. ret = -EPERM;
  3448. goto out;
  3449. }
  3450. parent_kn = rdt_kn_parent(kn);
  3451. /*
  3452. * If the rdtgroup is a ctrl_mon group and parent directory
  3453. * is the root directory, remove the ctrl_mon group.
  3454. *
  3455. * If the rdtgroup is a mon group and parent directory
  3456. * is a valid "mon_groups" directory, remove the mon group.
  3457. */
  3458. if (rdtgrp->type == RDTCTRL_GROUP && parent_kn == rdtgroup_default.kn &&
  3459. rdtgrp != &rdtgroup_default) {
  3460. if (rdtgrp->mode == RDT_MODE_PSEUDO_LOCKSETUP ||
  3461. rdtgrp->mode == RDT_MODE_PSEUDO_LOCKED) {
  3462. ret = rdtgroup_ctrl_remove(rdtgrp);
  3463. } else {
  3464. ret = rdtgroup_rmdir_ctrl(rdtgrp, tmpmask);
  3465. }
  3466. } else if (rdtgrp->type == RDTMON_GROUP &&
  3467. is_mon_groups(parent_kn, rdt_kn_name(kn))) {
  3468. ret = rdtgroup_rmdir_mon(rdtgrp, tmpmask);
  3469. } else {
  3470. ret = -EPERM;
  3471. }
  3472. out:
  3473. rdtgroup_kn_unlock(kn);
  3474. free_cpumask_var(tmpmask);
  3475. return ret;
  3476. }
  3477. /**
  3478. * mongrp_reparent() - replace parent CTRL_MON group of a MON group
  3479. * @rdtgrp: the MON group whose parent should be replaced
  3480. * @new_prdtgrp: replacement parent CTRL_MON group for @rdtgrp
  3481. * @cpus: cpumask provided by the caller for use during this call
  3482. *
  3483. * Replaces the parent CTRL_MON group for a MON group, resulting in all member
  3484. * tasks' CLOSID immediately changing to that of the new parent group.
  3485. * Monitoring data for the group is unaffected by this operation.
  3486. */
  3487. static void mongrp_reparent(struct rdtgroup *rdtgrp,
  3488. struct rdtgroup *new_prdtgrp,
  3489. cpumask_var_t cpus)
  3490. {
  3491. struct rdtgroup *prdtgrp = rdtgrp->mon.parent;
  3492. WARN_ON(rdtgrp->type != RDTMON_GROUP);
  3493. WARN_ON(new_prdtgrp->type != RDTCTRL_GROUP);
  3494. /* Nothing to do when simply renaming a MON group. */
  3495. if (prdtgrp == new_prdtgrp)
  3496. return;
  3497. WARN_ON(list_empty(&prdtgrp->mon.crdtgrp_list));
  3498. list_move_tail(&rdtgrp->mon.crdtgrp_list,
  3499. &new_prdtgrp->mon.crdtgrp_list);
  3500. rdtgrp->mon.parent = new_prdtgrp;
  3501. rdtgrp->closid = new_prdtgrp->closid;
  3502. /* Propagate updated closid to all tasks in this group. */
  3503. rdt_move_group_tasks(rdtgrp, rdtgrp, cpus);
  3504. update_closid_rmid(cpus, NULL);
  3505. }
  3506. static int rdtgroup_rename(struct kernfs_node *kn,
  3507. struct kernfs_node *new_parent, const char *new_name)
  3508. {
  3509. struct kernfs_node *kn_parent;
  3510. struct rdtgroup *new_prdtgrp;
  3511. struct rdtgroup *rdtgrp;
  3512. cpumask_var_t tmpmask;
  3513. int ret;
  3514. rdtgrp = kernfs_to_rdtgroup(kn);
  3515. new_prdtgrp = kernfs_to_rdtgroup(new_parent);
  3516. if (!rdtgrp || !new_prdtgrp)
  3517. return -ENOENT;
  3518. /* Release both kernfs active_refs before obtaining rdtgroup mutex. */
  3519. rdtgroup_kn_get(rdtgrp, kn);
  3520. rdtgroup_kn_get(new_prdtgrp, new_parent);
  3521. mutex_lock(&rdtgroup_mutex);
  3522. rdt_last_cmd_clear();
  3523. /*
  3524. * Don't allow kernfs_to_rdtgroup() to return a parent rdtgroup if
  3525. * either kernfs_node is a file.
  3526. */
  3527. if (kernfs_type(kn) != KERNFS_DIR ||
  3528. kernfs_type(new_parent) != KERNFS_DIR) {
  3529. rdt_last_cmd_puts("Source and destination must be directories");
  3530. ret = -EPERM;
  3531. goto out;
  3532. }
  3533. if ((rdtgrp->flags & RDT_DELETED) || (new_prdtgrp->flags & RDT_DELETED)) {
  3534. ret = -ENOENT;
  3535. goto out;
  3536. }
  3537. kn_parent = rdt_kn_parent(kn);
  3538. if (rdtgrp->type != RDTMON_GROUP || !kn_parent ||
  3539. !is_mon_groups(kn_parent, rdt_kn_name(kn))) {
  3540. rdt_last_cmd_puts("Source must be a MON group\n");
  3541. ret = -EPERM;
  3542. goto out;
  3543. }
  3544. if (!is_mon_groups(new_parent, new_name)) {
  3545. rdt_last_cmd_puts("Destination must be a mon_groups subdirectory\n");
  3546. ret = -EPERM;
  3547. goto out;
  3548. }
  3549. /*
  3550. * If the MON group is monitoring CPUs, the CPUs must be assigned to the
  3551. * current parent CTRL_MON group and therefore cannot be assigned to
  3552. * the new parent, making the move illegal.
  3553. */
  3554. if (!cpumask_empty(&rdtgrp->cpu_mask) &&
  3555. rdtgrp->mon.parent != new_prdtgrp) {
  3556. rdt_last_cmd_puts("Cannot move a MON group that monitors CPUs\n");
  3557. ret = -EPERM;
  3558. goto out;
  3559. }
  3560. /*
  3561. * Allocate the cpumask for use in mongrp_reparent() to avoid the
  3562. * possibility of failing to allocate it after kernfs_rename() has
  3563. * succeeded.
  3564. */
  3565. if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL)) {
  3566. ret = -ENOMEM;
  3567. goto out;
  3568. }
  3569. /*
  3570. * Perform all input validation and allocations needed to ensure
  3571. * mongrp_reparent() will succeed before calling kernfs_rename(),
  3572. * otherwise it would be necessary to revert this call if
  3573. * mongrp_reparent() failed.
  3574. */
  3575. ret = kernfs_rename(kn, new_parent, new_name);
  3576. if (!ret)
  3577. mongrp_reparent(rdtgrp, new_prdtgrp, tmpmask);
  3578. free_cpumask_var(tmpmask);
  3579. out:
  3580. mutex_unlock(&rdtgroup_mutex);
  3581. rdtgroup_kn_put(rdtgrp, kn);
  3582. rdtgroup_kn_put(new_prdtgrp, new_parent);
  3583. return ret;
  3584. }
  3585. static int rdtgroup_show_options(struct seq_file *seq, struct kernfs_root *kf)
  3586. {
  3587. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L3))
  3588. seq_puts(seq, ",cdp");
  3589. if (resctrl_arch_get_cdp_enabled(RDT_RESOURCE_L2))
  3590. seq_puts(seq, ",cdpl2");
  3591. if (is_mba_sc(resctrl_arch_get_resource(RDT_RESOURCE_MBA)))
  3592. seq_puts(seq, ",mba_MBps");
  3593. if (resctrl_debug)
  3594. seq_puts(seq, ",debug");
  3595. return 0;
  3596. }
  3597. static struct kernfs_syscall_ops rdtgroup_kf_syscall_ops = {
  3598. .mkdir = rdtgroup_mkdir,
  3599. .rmdir = rdtgroup_rmdir,
  3600. .rename = rdtgroup_rename,
  3601. .show_options = rdtgroup_show_options,
  3602. };
  3603. static int rdtgroup_setup_root(struct rdt_fs_context *ctx)
  3604. {
  3605. rdt_root = kernfs_create_root(&rdtgroup_kf_syscall_ops,
  3606. KERNFS_ROOT_CREATE_DEACTIVATED |
  3607. KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK,
  3608. &rdtgroup_default);
  3609. if (IS_ERR(rdt_root))
  3610. return PTR_ERR(rdt_root);
  3611. ctx->kfc.root = rdt_root;
  3612. rdtgroup_default.kn = kernfs_root_to_node(rdt_root);
  3613. return 0;
  3614. }
  3615. static void rdtgroup_destroy_root(void)
  3616. {
  3617. lockdep_assert_held(&rdtgroup_mutex);
  3618. kernfs_destroy_root(rdt_root);
  3619. rdtgroup_default.kn = NULL;
  3620. }
  3621. static void rdtgroup_setup_default(void)
  3622. {
  3623. mutex_lock(&rdtgroup_mutex);
  3624. rdtgroup_default.closid = RESCTRL_RESERVED_CLOSID;
  3625. rdtgroup_default.mon.rmid = RESCTRL_RESERVED_RMID;
  3626. rdtgroup_default.type = RDTCTRL_GROUP;
  3627. INIT_LIST_HEAD(&rdtgroup_default.mon.crdtgrp_list);
  3628. list_add(&rdtgroup_default.rdtgroup_list, &rdt_all_groups);
  3629. mutex_unlock(&rdtgroup_mutex);
  3630. }
  3631. static void domain_destroy_l3_mon_state(struct rdt_l3_mon_domain *d)
  3632. {
  3633. int idx;
  3634. kfree(d->cntr_cfg);
  3635. bitmap_free(d->rmid_busy_llc);
  3636. for_each_mbm_idx(idx) {
  3637. kfree(d->mbm_states[idx]);
  3638. d->mbm_states[idx] = NULL;
  3639. }
  3640. }
  3641. void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
  3642. {
  3643. mutex_lock(&rdtgroup_mutex);
  3644. if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA)
  3645. mba_sc_domain_destroy(r, d);
  3646. mutex_unlock(&rdtgroup_mutex);
  3647. }
  3648. void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
  3649. {
  3650. struct rdt_l3_mon_domain *d;
  3651. mutex_lock(&rdtgroup_mutex);
  3652. /*
  3653. * If resctrl is mounted, remove all the
  3654. * per domain monitor data directories.
  3655. */
  3656. if (resctrl_mounted && resctrl_arch_mon_capable())
  3657. rmdir_mondata_subdir_allrdtgrp(r, hdr);
  3658. if (r->rid != RDT_RESOURCE_L3)
  3659. goto out_unlock;
  3660. if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
  3661. goto out_unlock;
  3662. d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
  3663. if (resctrl_is_mbm_enabled())
  3664. cancel_delayed_work(&d->mbm_over);
  3665. if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) && has_busy_rmid(d)) {
  3666. /*
  3667. * When a package is going down, forcefully
  3668. * decrement rmid->ebusy. There is no way to know
  3669. * that the L3 was flushed and hence may lead to
  3670. * incorrect counts in rare scenarios, but leaving
  3671. * the RMID as busy creates RMID leaks if the
  3672. * package never comes back.
  3673. */
  3674. __check_limbo(d, true);
  3675. cancel_delayed_work(&d->cqm_limbo);
  3676. }
  3677. domain_destroy_l3_mon_state(d);
  3678. out_unlock:
  3679. mutex_unlock(&rdtgroup_mutex);
  3680. }
  3681. /**
  3682. * domain_setup_l3_mon_state() - Initialise domain monitoring structures.
  3683. * @r: The resource for the newly online domain.
  3684. * @d: The newly online domain.
  3685. *
  3686. * Allocate monitor resources that belong to this domain.
  3687. * Called when the first CPU of a domain comes online, regardless of whether
  3688. * the filesystem is mounted.
  3689. * During boot this may be called before global allocations have been made by
  3690. * resctrl_l3_mon_resource_init().
  3691. *
  3692. * Called during CPU online that may run as soon as CPU online callbacks
  3693. * are set up during resctrl initialization. The number of supported RMIDs
  3694. * may be reduced if additional mon_capable resources are enumerated
  3695. * at mount time. This means the rdt_l3_mon_domain::mbm_states[] and
  3696. * rdt_l3_mon_domain::rmid_busy_llc allocations may be larger than needed.
  3697. *
  3698. * Return: 0 for success, or -ENOMEM.
  3699. */
  3700. static int domain_setup_l3_mon_state(struct rdt_resource *r, struct rdt_l3_mon_domain *d)
  3701. {
  3702. u32 idx_limit = resctrl_arch_system_num_rmid_idx();
  3703. size_t tsize = sizeof(*d->mbm_states[0]);
  3704. enum resctrl_event_id eventid;
  3705. int idx;
  3706. if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) {
  3707. d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL);
  3708. if (!d->rmid_busy_llc)
  3709. return -ENOMEM;
  3710. }
  3711. for_each_mbm_event_id(eventid) {
  3712. if (!resctrl_is_mon_event_enabled(eventid))
  3713. continue;
  3714. idx = MBM_STATE_IDX(eventid);
  3715. d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL);
  3716. if (!d->mbm_states[idx])
  3717. goto cleanup;
  3718. }
  3719. if (resctrl_is_mbm_enabled() && r->mon.mbm_cntr_assignable) {
  3720. tsize = sizeof(*d->cntr_cfg);
  3721. d->cntr_cfg = kcalloc(r->mon.num_mbm_cntrs, tsize, GFP_KERNEL);
  3722. if (!d->cntr_cfg)
  3723. goto cleanup;
  3724. }
  3725. return 0;
  3726. cleanup:
  3727. bitmap_free(d->rmid_busy_llc);
  3728. for_each_mbm_idx(idx) {
  3729. kfree(d->mbm_states[idx]);
  3730. d->mbm_states[idx] = NULL;
  3731. }
  3732. return -ENOMEM;
  3733. }
  3734. int resctrl_online_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d)
  3735. {
  3736. int err = 0;
  3737. mutex_lock(&rdtgroup_mutex);
  3738. if (supports_mba_mbps() && r->rid == RDT_RESOURCE_MBA) {
  3739. /* RDT_RESOURCE_MBA is never mon_capable */
  3740. err = mba_sc_domain_allocate(r, d);
  3741. }
  3742. mutex_unlock(&rdtgroup_mutex);
  3743. return err;
  3744. }
  3745. int resctrl_online_mon_domain(struct rdt_resource *r, struct rdt_domain_hdr *hdr)
  3746. {
  3747. struct rdt_l3_mon_domain *d;
  3748. int err = -EINVAL;
  3749. mutex_lock(&rdtgroup_mutex);
  3750. if (r->rid != RDT_RESOURCE_L3)
  3751. goto mkdir;
  3752. if (!domain_header_is_valid(hdr, RESCTRL_MON_DOMAIN, RDT_RESOURCE_L3))
  3753. goto out_unlock;
  3754. d = container_of(hdr, struct rdt_l3_mon_domain, hdr);
  3755. err = domain_setup_l3_mon_state(r, d);
  3756. if (err)
  3757. goto out_unlock;
  3758. if (resctrl_is_mbm_enabled()) {
  3759. INIT_DELAYED_WORK(&d->mbm_over, mbm_handle_overflow);
  3760. mbm_setup_overflow_handler(d, MBM_OVERFLOW_INTERVAL,
  3761. RESCTRL_PICK_ANY_CPU);
  3762. }
  3763. if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID))
  3764. INIT_DELAYED_WORK(&d->cqm_limbo, cqm_handle_limbo);
  3765. mkdir:
  3766. err = 0;
  3767. /*
  3768. * If the filesystem is not mounted then only the default resource group
  3769. * exists. Creation of its directories is deferred until mount time
  3770. * by rdt_get_tree() calling mkdir_mondata_all().
  3771. * If resctrl is mounted, add per domain monitor data directories.
  3772. */
  3773. if (resctrl_mounted && resctrl_arch_mon_capable())
  3774. mkdir_mondata_subdir_allrdtgrp(r, hdr);
  3775. out_unlock:
  3776. mutex_unlock(&rdtgroup_mutex);
  3777. return err;
  3778. }
  3779. void resctrl_online_cpu(unsigned int cpu)
  3780. {
  3781. mutex_lock(&rdtgroup_mutex);
  3782. /* The CPU is set in default rdtgroup after online. */
  3783. cpumask_set_cpu(cpu, &rdtgroup_default.cpu_mask);
  3784. mutex_unlock(&rdtgroup_mutex);
  3785. }
  3786. static void clear_childcpus(struct rdtgroup *r, unsigned int cpu)
  3787. {
  3788. struct rdtgroup *cr;
  3789. list_for_each_entry(cr, &r->mon.crdtgrp_list, mon.crdtgrp_list) {
  3790. if (cpumask_test_and_clear_cpu(cpu, &cr->cpu_mask))
  3791. break;
  3792. }
  3793. }
  3794. static struct rdt_l3_mon_domain *get_mon_domain_from_cpu(int cpu,
  3795. struct rdt_resource *r)
  3796. {
  3797. struct rdt_l3_mon_domain *d;
  3798. lockdep_assert_cpus_held();
  3799. list_for_each_entry(d, &r->mon_domains, hdr.list) {
  3800. /* Find the domain that contains this CPU */
  3801. if (cpumask_test_cpu(cpu, &d->hdr.cpu_mask))
  3802. return d;
  3803. }
  3804. return NULL;
  3805. }
  3806. void resctrl_offline_cpu(unsigned int cpu)
  3807. {
  3808. struct rdt_resource *l3 = resctrl_arch_get_resource(RDT_RESOURCE_L3);
  3809. struct rdt_l3_mon_domain *d;
  3810. struct rdtgroup *rdtgrp;
  3811. mutex_lock(&rdtgroup_mutex);
  3812. list_for_each_entry(rdtgrp, &rdt_all_groups, rdtgroup_list) {
  3813. if (cpumask_test_and_clear_cpu(cpu, &rdtgrp->cpu_mask)) {
  3814. clear_childcpus(rdtgrp, cpu);
  3815. break;
  3816. }
  3817. }
  3818. if (!l3->mon_capable)
  3819. goto out_unlock;
  3820. d = get_mon_domain_from_cpu(cpu, l3);
  3821. if (d) {
  3822. if (resctrl_is_mbm_enabled() && cpu == d->mbm_work_cpu) {
  3823. cancel_delayed_work(&d->mbm_over);
  3824. mbm_setup_overflow_handler(d, 0, cpu);
  3825. }
  3826. if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID) &&
  3827. cpu == d->cqm_work_cpu && has_busy_rmid(d)) {
  3828. cancel_delayed_work(&d->cqm_limbo);
  3829. cqm_setup_limbo_handler(d, 0, cpu);
  3830. }
  3831. }
  3832. out_unlock:
  3833. mutex_unlock(&rdtgroup_mutex);
  3834. }
  3835. /*
  3836. * resctrl_init - resctrl filesystem initialization
  3837. *
  3838. * Setup resctrl file system including set up root, create mount point,
  3839. * register resctrl filesystem, and initialize files under root directory.
  3840. *
  3841. * Return: 0 on success or -errno
  3842. */
  3843. int resctrl_init(void)
  3844. {
  3845. int ret = 0;
  3846. seq_buf_init(&last_cmd_status, last_cmd_status_buf,
  3847. sizeof(last_cmd_status_buf));
  3848. rdtgroup_setup_default();
  3849. thread_throttle_mode_init();
  3850. io_alloc_init();
  3851. ret = resctrl_l3_mon_resource_init();
  3852. if (ret)
  3853. return ret;
  3854. ret = sysfs_create_mount_point(fs_kobj, "resctrl");
  3855. if (ret) {
  3856. resctrl_l3_mon_resource_exit();
  3857. return ret;
  3858. }
  3859. ret = register_filesystem(&rdt_fs_type);
  3860. if (ret)
  3861. goto cleanup_mountpoint;
  3862. /*
  3863. * Adding the resctrl debugfs directory here may not be ideal since
  3864. * it would let the resctrl debugfs directory appear on the debugfs
  3865. * filesystem before the resctrl filesystem is mounted.
  3866. * It may also be ok since that would enable debugging of RDT before
  3867. * resctrl is mounted.
  3868. * The reason why the debugfs directory is created here and not in
  3869. * rdt_get_tree() is because rdt_get_tree() takes rdtgroup_mutex and
  3870. * during the debugfs directory creation also &sb->s_type->i_mutex_key
  3871. * (the lockdep class of inode->i_rwsem). Other filesystem
  3872. * interactions (eg. SyS_getdents) have the lock ordering:
  3873. * &sb->s_type->i_mutex_key --> &mm->mmap_lock
  3874. * During mmap(), called with &mm->mmap_lock, the rdtgroup_mutex
  3875. * is taken, thus creating dependency:
  3876. * &mm->mmap_lock --> rdtgroup_mutex for the latter that can cause
  3877. * issues considering the other two lock dependencies.
  3878. * By creating the debugfs directory here we avoid a dependency
  3879. * that may cause deadlock (even though file operations cannot
  3880. * occur until the filesystem is mounted, but I do not know how to
  3881. * tell lockdep that).
  3882. */
  3883. debugfs_resctrl = debugfs_create_dir("resctrl", NULL);
  3884. return 0;
  3885. cleanup_mountpoint:
  3886. sysfs_remove_mount_point(fs_kobj, "resctrl");
  3887. resctrl_l3_mon_resource_exit();
  3888. return ret;
  3889. }
  3890. static bool resctrl_online_domains_exist(void)
  3891. {
  3892. struct rdt_resource *r;
  3893. /*
  3894. * Only walk capable resources to allow resctrl_arch_get_resource()
  3895. * to return dummy 'not capable' resources.
  3896. */
  3897. for_each_alloc_capable_rdt_resource(r) {
  3898. if (!list_empty(&r->ctrl_domains))
  3899. return true;
  3900. }
  3901. for_each_mon_capable_rdt_resource(r) {
  3902. if (!list_empty(&r->mon_domains))
  3903. return true;
  3904. }
  3905. return false;
  3906. }
  3907. /**
  3908. * resctrl_exit() - Remove the resctrl filesystem and free resources.
  3909. *
  3910. * Called by the architecture code in response to a fatal error.
  3911. * Removes resctrl files and structures from kernfs to prevent further
  3912. * configuration.
  3913. *
  3914. * When called by the architecture code, all CPUs and resctrl domains must be
  3915. * offline. This ensures the limbo and overflow handlers are not scheduled to
  3916. * run, meaning the data structures they access can be freed by
  3917. * resctrl_l3_mon_resource_exit().
  3918. *
  3919. * After resctrl_exit() returns, the architecture code should return an
  3920. * error from all resctrl_arch_ functions that can do this.
  3921. * resctrl_arch_get_resource() must continue to return struct rdt_resources
  3922. * with the correct rid field to ensure the filesystem can be unmounted.
  3923. */
  3924. void resctrl_exit(void)
  3925. {
  3926. cpus_read_lock();
  3927. WARN_ON_ONCE(resctrl_online_domains_exist());
  3928. mutex_lock(&rdtgroup_mutex);
  3929. resctrl_fs_teardown();
  3930. mutex_unlock(&rdtgroup_mutex);
  3931. cpus_read_unlock();
  3932. debugfs_remove_recursive(debugfs_resctrl);
  3933. debugfs_resctrl = NULL;
  3934. unregister_filesystem(&rdt_fs_type);
  3935. /*
  3936. * Do not remove the sysfs mount point added by resctrl_init() so that
  3937. * it can be used to umount resctrl.
  3938. */
  3939. resctrl_l3_mon_resource_exit();
  3940. free_rmid_lru_list();
  3941. }