intel_rapl_common.c 66 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Common code for Intel Running Average Power Limit (RAPL) support.
  4. * Copyright (c) 2019, Intel Corporation.
  5. */
  6. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  7. #include <linux/bitmap.h>
  8. #include <linux/cleanup.h>
  9. #include <linux/cpu.h>
  10. #include <linux/delay.h>
  11. #include <linux/device.h>
  12. #include <linux/intel_rapl.h>
  13. #include <linux/kernel.h>
  14. #include <linux/list.h>
  15. #include <linux/log2.h>
  16. #include <linux/module.h>
  17. #include <linux/nospec.h>
  18. #include <linux/perf_event.h>
  19. #include <linux/platform_device.h>
  20. #include <linux/powercap.h>
  21. #include <linux/processor.h>
  22. #include <linux/slab.h>
  23. #include <linux/suspend.h>
  24. #include <linux/sysfs.h>
  25. #include <linux/types.h>
  26. #include <asm/cpu_device_id.h>
  27. #include <asm/intel-family.h>
  28. #include <asm/iosf_mbi.h>
  29. #include <asm/msr.h>
  30. /* bitmasks for RAPL MSRs, used by primitive access functions */
  31. #define ENERGY_STATUS_MASK 0xffffffff
  32. #define POWER_LIMIT1_MASK 0x7FFF
  33. #define POWER_LIMIT1_ENABLE BIT(15)
  34. #define POWER_LIMIT1_CLAMP BIT(16)
  35. #define POWER_LIMIT2_MASK (0x7FFFULL<<32)
  36. #define POWER_LIMIT2_ENABLE BIT_ULL(47)
  37. #define POWER_LIMIT2_CLAMP BIT_ULL(48)
  38. #define POWER_HIGH_LOCK BIT_ULL(63)
  39. #define POWER_LOW_LOCK BIT(31)
  40. #define POWER_LIMIT4_MASK 0x1FFF
  41. #define TIME_WINDOW1_MASK (0x7FULL<<17)
  42. #define TIME_WINDOW2_MASK (0x7FULL<<49)
  43. #define POWER_UNIT_OFFSET 0
  44. #define POWER_UNIT_MASK 0x0F
  45. #define ENERGY_UNIT_OFFSET 0x08
  46. #define ENERGY_UNIT_MASK 0x1F00
  47. #define TIME_UNIT_OFFSET 0x10
  48. #define TIME_UNIT_MASK 0xF0000
  49. #define POWER_INFO_MAX_MASK (0x7fffULL<<32)
  50. #define POWER_INFO_MIN_MASK (0x7fffULL<<16)
  51. #define POWER_INFO_MAX_TIME_WIN_MASK (0x3fULL<<48)
  52. #define POWER_INFO_THERMAL_SPEC_MASK 0x7fff
  53. #define PERF_STATUS_THROTTLE_TIME_MASK 0xffffffff
  54. #define PP_POLICY_MASK 0x1F
  55. /*
  56. * SPR has different layout for Psys Domain PowerLimit registers.
  57. * There are 17 bits of PL1 and PL2 instead of 15 bits.
  58. * The Enable bits and TimeWindow bits are also shifted as a result.
  59. */
  60. #define PSYS_POWER_LIMIT1_MASK 0x1FFFF
  61. #define PSYS_POWER_LIMIT1_ENABLE BIT(17)
  62. #define PSYS_POWER_LIMIT2_MASK (0x1FFFFULL<<32)
  63. #define PSYS_POWER_LIMIT2_ENABLE BIT_ULL(49)
  64. #define PSYS_TIME_WINDOW1_MASK (0x7FULL<<19)
  65. #define PSYS_TIME_WINDOW2_MASK (0x7FULL<<51)
  66. /* bitmasks for RAPL TPMI, used by primitive access functions */
  67. #define TPMI_POWER_LIMIT_MASK 0x3FFFF
  68. #define TPMI_POWER_LIMIT_ENABLE BIT_ULL(62)
  69. #define TPMI_TIME_WINDOW_MASK (0x7FULL<<18)
  70. #define TPMI_INFO_SPEC_MASK 0x3FFFF
  71. #define TPMI_INFO_MIN_MASK (0x3FFFFULL << 18)
  72. #define TPMI_INFO_MAX_MASK (0x3FFFFULL << 36)
  73. #define TPMI_INFO_MAX_TIME_WIN_MASK (0x7FULL << 54)
  74. /* Non HW constants */
  75. #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
  76. #define RAPL_PRIMITIVE_DUMMY BIT(2)
  77. #define TIME_WINDOW_MAX_MSEC 40000
  78. #define TIME_WINDOW_MIN_MSEC 250
  79. #define ENERGY_UNIT_SCALE 1000 /* scale from driver unit to powercap unit */
  80. enum unit_type {
  81. ARBITRARY_UNIT, /* no translation */
  82. POWER_UNIT,
  83. ENERGY_UNIT,
  84. TIME_UNIT,
  85. };
  86. /* per domain data, some are optional */
  87. #define NR_RAW_PRIMITIVES (NR_RAPL_PRIMITIVES - 2)
  88. #define DOMAIN_STATE_INACTIVE BIT(0)
  89. #define DOMAIN_STATE_POWER_LIMIT_SET BIT(1)
  90. static const char *pl_names[NR_POWER_LIMITS] = {
  91. [POWER_LIMIT1] = "long_term",
  92. [POWER_LIMIT2] = "short_term",
  93. [POWER_LIMIT4] = "peak_power",
  94. };
  95. enum pl_prims {
  96. PL_ENABLE,
  97. PL_CLAMP,
  98. PL_LIMIT,
  99. PL_TIME_WINDOW,
  100. PL_MAX_POWER,
  101. PL_LOCK,
  102. };
  103. static bool is_pl_valid(struct rapl_domain *rd, int pl)
  104. {
  105. if (pl < POWER_LIMIT1 || pl > POWER_LIMIT4)
  106. return false;
  107. return rd->rpl[pl].name ? true : false;
  108. }
  109. static int get_pl_lock_prim(struct rapl_domain *rd, int pl)
  110. {
  111. if (rd->rp->priv->type == RAPL_IF_TPMI) {
  112. if (pl == POWER_LIMIT1)
  113. return PL1_LOCK;
  114. if (pl == POWER_LIMIT2)
  115. return PL2_LOCK;
  116. if (pl == POWER_LIMIT4)
  117. return PL4_LOCK;
  118. }
  119. /* MSR/MMIO Interface doesn't have Lock bit for PL4 */
  120. if (pl == POWER_LIMIT4)
  121. return -EINVAL;
  122. /*
  123. * Power Limit register that supports two power limits has a different
  124. * bit position for the Lock bit.
  125. */
  126. if (rd->rp->priv->limits[rd->id] & BIT(POWER_LIMIT2))
  127. return FW_HIGH_LOCK;
  128. return FW_LOCK;
  129. }
  130. static int get_pl_prim(struct rapl_domain *rd, int pl, enum pl_prims prim)
  131. {
  132. switch (pl) {
  133. case POWER_LIMIT1:
  134. if (prim == PL_ENABLE)
  135. return PL1_ENABLE;
  136. if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
  137. return PL1_CLAMP;
  138. if (prim == PL_LIMIT)
  139. return POWER_LIMIT1;
  140. if (prim == PL_TIME_WINDOW)
  141. return TIME_WINDOW1;
  142. if (prim == PL_MAX_POWER)
  143. return THERMAL_SPEC_POWER;
  144. if (prim == PL_LOCK)
  145. return get_pl_lock_prim(rd, pl);
  146. return -EINVAL;
  147. case POWER_LIMIT2:
  148. if (prim == PL_ENABLE)
  149. return PL2_ENABLE;
  150. if (prim == PL_CLAMP && rd->rp->priv->type != RAPL_IF_TPMI)
  151. return PL2_CLAMP;
  152. if (prim == PL_LIMIT)
  153. return POWER_LIMIT2;
  154. if (prim == PL_TIME_WINDOW)
  155. return TIME_WINDOW2;
  156. if (prim == PL_MAX_POWER)
  157. return MAX_POWER;
  158. if (prim == PL_LOCK)
  159. return get_pl_lock_prim(rd, pl);
  160. return -EINVAL;
  161. case POWER_LIMIT4:
  162. if (prim == PL_LIMIT)
  163. return POWER_LIMIT4;
  164. if (prim == PL_ENABLE)
  165. return PL4_ENABLE;
  166. /* PL4 would be around two times PL2, use same prim as PL2. */
  167. if (prim == PL_MAX_POWER)
  168. return MAX_POWER;
  169. if (prim == PL_LOCK)
  170. return get_pl_lock_prim(rd, pl);
  171. return -EINVAL;
  172. default:
  173. return -EINVAL;
  174. }
  175. }
  176. #define power_zone_to_rapl_domain(_zone) \
  177. container_of(_zone, struct rapl_domain, power_zone)
  178. struct rapl_defaults {
  179. u8 floor_freq_reg_addr;
  180. int (*check_unit)(struct rapl_domain *rd);
  181. void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
  182. u64 (*compute_time_window)(struct rapl_domain *rd, u64 val,
  183. bool to_raw);
  184. unsigned int dram_domain_energy_unit;
  185. unsigned int psys_domain_energy_unit;
  186. bool spr_psys_bits;
  187. };
  188. static struct rapl_defaults *defaults_msr;
  189. static const struct rapl_defaults defaults_tpmi;
  190. static struct rapl_defaults *get_defaults(struct rapl_package *rp)
  191. {
  192. return rp->priv->defaults;
  193. }
  194. /* Sideband MBI registers */
  195. #define IOSF_CPU_POWER_BUDGET_CTL_BYT (0x2)
  196. #define IOSF_CPU_POWER_BUDGET_CTL_TNG (0xdf)
  197. #define PACKAGE_PLN_INT_SAVED BIT(0)
  198. #define MAX_PRIM_NAME (32)
  199. /* per domain data. used to describe individual knobs such that access function
  200. * can be consolidated into one instead of many inline functions.
  201. */
  202. struct rapl_primitive_info {
  203. const char *name;
  204. u64 mask;
  205. int shift;
  206. enum rapl_domain_reg_id id;
  207. enum unit_type unit;
  208. u32 flag;
  209. };
  210. #define PRIMITIVE_INFO_INIT(p, m, s, i, u, f) { \
  211. .name = #p, \
  212. .mask = m, \
  213. .shift = s, \
  214. .id = i, \
  215. .unit = u, \
  216. .flag = f \
  217. }
  218. static void rapl_init_domains(struct rapl_package *rp);
  219. static int rapl_read_data_raw(struct rapl_domain *rd,
  220. enum rapl_primitives prim,
  221. bool xlate, u64 *data,
  222. bool pmu_ctx);
  223. static int rapl_write_data_raw(struct rapl_domain *rd,
  224. enum rapl_primitives prim,
  225. unsigned long long value);
  226. static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
  227. enum pl_prims pl_prim,
  228. bool xlate, u64 *data);
  229. static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
  230. enum pl_prims pl_prim,
  231. unsigned long long value);
  232. static u64 rapl_unit_xlate(struct rapl_domain *rd,
  233. enum unit_type type, u64 value, int to_raw);
  234. static void package_power_limit_irq_save(struct rapl_package *rp);
  235. static LIST_HEAD(rapl_packages); /* guarded by CPU hotplug lock */
  236. static const char *const rapl_domain_names[] = {
  237. "package",
  238. "core",
  239. "uncore",
  240. "dram",
  241. "psys",
  242. };
  243. static int get_energy_counter(struct powercap_zone *power_zone,
  244. u64 *energy_raw)
  245. {
  246. struct rapl_domain *rd;
  247. u64 energy_now;
  248. /* prevent CPU hotplug, make sure the RAPL domain does not go
  249. * away while reading the counter.
  250. */
  251. cpus_read_lock();
  252. rd = power_zone_to_rapl_domain(power_zone);
  253. if (!rapl_read_data_raw(rd, ENERGY_COUNTER, true, &energy_now, false)) {
  254. *energy_raw = energy_now;
  255. cpus_read_unlock();
  256. return 0;
  257. }
  258. cpus_read_unlock();
  259. return -EIO;
  260. }
  261. static int get_max_energy_counter(struct powercap_zone *pcd_dev, u64 *energy)
  262. {
  263. struct rapl_domain *rd = power_zone_to_rapl_domain(pcd_dev);
  264. *energy = rapl_unit_xlate(rd, ENERGY_UNIT, ENERGY_STATUS_MASK, 0);
  265. return 0;
  266. }
  267. static int release_zone(struct powercap_zone *power_zone)
  268. {
  269. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  270. struct rapl_package *rp = rd->rp;
  271. /* package zone is the last zone of a package, we can free
  272. * memory here since all children has been unregistered.
  273. */
  274. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  275. kfree(rd);
  276. rp->domains = NULL;
  277. }
  278. return 0;
  279. }
  280. static int find_nr_power_limit(struct rapl_domain *rd)
  281. {
  282. int i, nr_pl = 0;
  283. for (i = 0; i < NR_POWER_LIMITS; i++) {
  284. if (is_pl_valid(rd, i))
  285. nr_pl++;
  286. }
  287. return nr_pl;
  288. }
  289. static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
  290. {
  291. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  292. struct rapl_defaults *defaults = get_defaults(rd->rp);
  293. u64 val;
  294. int ret;
  295. cpus_read_lock();
  296. ret = rapl_write_pl_data(rd, POWER_LIMIT1, PL_ENABLE, mode);
  297. if (ret)
  298. goto end;
  299. ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, false, &val);
  300. if (ret)
  301. goto end;
  302. if (mode != val) {
  303. pr_debug("%s cannot be %s\n", power_zone->name,
  304. str_enabled_disabled(mode));
  305. goto end;
  306. }
  307. if (defaults->set_floor_freq)
  308. defaults->set_floor_freq(rd, mode);
  309. end:
  310. cpus_read_unlock();
  311. return ret;
  312. }
  313. static int get_domain_enable(struct powercap_zone *power_zone, bool *mode)
  314. {
  315. struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
  316. u64 val;
  317. int ret;
  318. if (rd->rpl[POWER_LIMIT1].locked) {
  319. *mode = false;
  320. return 0;
  321. }
  322. cpus_read_lock();
  323. ret = rapl_read_pl_data(rd, POWER_LIMIT1, PL_ENABLE, true, &val);
  324. if (!ret)
  325. *mode = val;
  326. cpus_read_unlock();
  327. return ret;
  328. }
  329. /* per RAPL domain ops, in the order of rapl_domain_type */
  330. static const struct powercap_zone_ops zone_ops[] = {
  331. /* RAPL_DOMAIN_PACKAGE */
  332. {
  333. .get_energy_uj = get_energy_counter,
  334. .get_max_energy_range_uj = get_max_energy_counter,
  335. .release = release_zone,
  336. .set_enable = set_domain_enable,
  337. .get_enable = get_domain_enable,
  338. },
  339. /* RAPL_DOMAIN_PP0 */
  340. {
  341. .get_energy_uj = get_energy_counter,
  342. .get_max_energy_range_uj = get_max_energy_counter,
  343. .release = release_zone,
  344. .set_enable = set_domain_enable,
  345. .get_enable = get_domain_enable,
  346. },
  347. /* RAPL_DOMAIN_PP1 */
  348. {
  349. .get_energy_uj = get_energy_counter,
  350. .get_max_energy_range_uj = get_max_energy_counter,
  351. .release = release_zone,
  352. .set_enable = set_domain_enable,
  353. .get_enable = get_domain_enable,
  354. },
  355. /* RAPL_DOMAIN_DRAM */
  356. {
  357. .get_energy_uj = get_energy_counter,
  358. .get_max_energy_range_uj = get_max_energy_counter,
  359. .release = release_zone,
  360. .set_enable = set_domain_enable,
  361. .get_enable = get_domain_enable,
  362. },
  363. /* RAPL_DOMAIN_PLATFORM */
  364. {
  365. .get_energy_uj = get_energy_counter,
  366. .get_max_energy_range_uj = get_max_energy_counter,
  367. .release = release_zone,
  368. .set_enable = set_domain_enable,
  369. .get_enable = get_domain_enable,
  370. },
  371. };
  372. /*
  373. * Constraint index used by powercap can be different than power limit (PL)
  374. * index in that some PLs maybe missing due to non-existent MSRs. So we
  375. * need to convert here by finding the valid PLs only (name populated).
  376. */
  377. static int contraint_to_pl(struct rapl_domain *rd, int cid)
  378. {
  379. int i, j;
  380. for (i = POWER_LIMIT1, j = 0; i < NR_POWER_LIMITS; i++) {
  381. if (is_pl_valid(rd, i) && j++ == cid) {
  382. pr_debug("%s: index %d\n", __func__, i);
  383. return i;
  384. }
  385. }
  386. pr_err("Cannot find matching power limit for constraint %d\n", cid);
  387. return -EINVAL;
  388. }
  389. static int set_power_limit(struct powercap_zone *power_zone, int cid,
  390. u64 power_limit)
  391. {
  392. struct rapl_domain *rd;
  393. struct rapl_package *rp;
  394. int ret = 0;
  395. int id;
  396. cpus_read_lock();
  397. rd = power_zone_to_rapl_domain(power_zone);
  398. id = contraint_to_pl(rd, cid);
  399. rp = rd->rp;
  400. ret = rapl_write_pl_data(rd, id, PL_LIMIT, power_limit);
  401. if (!ret)
  402. package_power_limit_irq_save(rp);
  403. cpus_read_unlock();
  404. return ret;
  405. }
  406. static int get_current_power_limit(struct powercap_zone *power_zone, int cid,
  407. u64 *data)
  408. {
  409. struct rapl_domain *rd;
  410. u64 val;
  411. int ret = 0;
  412. int id;
  413. cpus_read_lock();
  414. rd = power_zone_to_rapl_domain(power_zone);
  415. id = contraint_to_pl(rd, cid);
  416. ret = rapl_read_pl_data(rd, id, PL_LIMIT, true, &val);
  417. if (!ret)
  418. *data = val;
  419. cpus_read_unlock();
  420. return ret;
  421. }
  422. static int set_time_window(struct powercap_zone *power_zone, int cid,
  423. u64 window)
  424. {
  425. struct rapl_domain *rd;
  426. int ret = 0;
  427. int id;
  428. cpus_read_lock();
  429. rd = power_zone_to_rapl_domain(power_zone);
  430. id = contraint_to_pl(rd, cid);
  431. ret = rapl_write_pl_data(rd, id, PL_TIME_WINDOW, window);
  432. cpus_read_unlock();
  433. return ret;
  434. }
  435. static int get_time_window(struct powercap_zone *power_zone, int cid,
  436. u64 *data)
  437. {
  438. struct rapl_domain *rd;
  439. u64 val;
  440. int ret = 0;
  441. int id;
  442. cpus_read_lock();
  443. rd = power_zone_to_rapl_domain(power_zone);
  444. id = contraint_to_pl(rd, cid);
  445. ret = rapl_read_pl_data(rd, id, PL_TIME_WINDOW, true, &val);
  446. if (!ret)
  447. *data = val;
  448. cpus_read_unlock();
  449. return ret;
  450. }
  451. static const char *get_constraint_name(struct powercap_zone *power_zone,
  452. int cid)
  453. {
  454. struct rapl_domain *rd;
  455. int id;
  456. rd = power_zone_to_rapl_domain(power_zone);
  457. id = contraint_to_pl(rd, cid);
  458. if (id >= 0)
  459. return rd->rpl[id].name;
  460. return NULL;
  461. }
  462. static int get_max_power(struct powercap_zone *power_zone, int cid, u64 *data)
  463. {
  464. struct rapl_domain *rd;
  465. u64 val;
  466. int ret = 0;
  467. int id;
  468. cpus_read_lock();
  469. rd = power_zone_to_rapl_domain(power_zone);
  470. id = contraint_to_pl(rd, cid);
  471. ret = rapl_read_pl_data(rd, id, PL_MAX_POWER, true, &val);
  472. if (!ret)
  473. *data = val;
  474. /* As a generalization rule, PL4 would be around two times PL2. */
  475. if (id == POWER_LIMIT4)
  476. *data = *data * 2;
  477. cpus_read_unlock();
  478. return ret;
  479. }
  480. static const struct powercap_zone_constraint_ops constraint_ops = {
  481. .set_power_limit_uw = set_power_limit,
  482. .get_power_limit_uw = get_current_power_limit,
  483. .set_time_window_us = set_time_window,
  484. .get_time_window_us = get_time_window,
  485. .get_max_power_uw = get_max_power,
  486. .get_name = get_constraint_name,
  487. };
  488. /* Return the id used for read_raw/write_raw callback */
  489. static int get_rid(struct rapl_package *rp)
  490. {
  491. return rp->lead_cpu >= 0 ? rp->lead_cpu : rp->id;
  492. }
  493. /* called after domain detection and package level data are set */
  494. static void rapl_init_domains(struct rapl_package *rp)
  495. {
  496. enum rapl_domain_type i;
  497. enum rapl_domain_reg_id j;
  498. struct rapl_domain *rd = rp->domains;
  499. for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
  500. unsigned int mask = rp->domain_map & (1 << i);
  501. int t;
  502. if (!mask)
  503. continue;
  504. rd->rp = rp;
  505. if (i == RAPL_DOMAIN_PLATFORM && rp->id > 0) {
  506. snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "psys-%d",
  507. rp->lead_cpu >= 0 ? topology_physical_package_id(rp->lead_cpu) :
  508. rp->id);
  509. } else {
  510. snprintf(rd->name, RAPL_DOMAIN_NAME_LENGTH, "%s",
  511. rapl_domain_names[i]);
  512. }
  513. rd->id = i;
  514. /* PL1 is supported by default */
  515. rp->priv->limits[i] |= BIT(POWER_LIMIT1);
  516. for (t = POWER_LIMIT1; t < NR_POWER_LIMITS; t++) {
  517. if (rp->priv->limits[i] & BIT(t))
  518. rd->rpl[t].name = pl_names[t];
  519. }
  520. for (j = 0; j < RAPL_DOMAIN_REG_MAX; j++)
  521. rd->regs[j] = rp->priv->regs[i][j];
  522. rd++;
  523. }
  524. }
  525. static u64 rapl_unit_xlate(struct rapl_domain *rd, enum unit_type type,
  526. u64 value, int to_raw)
  527. {
  528. u64 units = 1;
  529. struct rapl_defaults *defaults = get_defaults(rd->rp);
  530. u64 scale = 1;
  531. switch (type) {
  532. case POWER_UNIT:
  533. units = rd->power_unit;
  534. break;
  535. case ENERGY_UNIT:
  536. scale = ENERGY_UNIT_SCALE;
  537. units = rd->energy_unit;
  538. break;
  539. case TIME_UNIT:
  540. return defaults->compute_time_window(rd, value, to_raw);
  541. case ARBITRARY_UNIT:
  542. default:
  543. return value;
  544. }
  545. if (to_raw)
  546. return div64_u64(value, units) * scale;
  547. value *= units;
  548. return div64_u64(value, scale);
  549. }
  550. /* RAPL primitives for MSR and MMIO I/F */
  551. static struct rapl_primitive_info rpi_msr[NR_RAPL_PRIMITIVES] = {
  552. /* name, mask, shift, msr index, unit divisor */
  553. [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, POWER_LIMIT1_MASK, 0,
  554. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  555. [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, POWER_LIMIT2_MASK, 32,
  556. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  557. [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, POWER_LIMIT4_MASK, 0,
  558. RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
  559. [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
  560. RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
  561. [FW_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_LOW_LOCK, 31,
  562. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  563. [FW_HIGH_LOCK] = PRIMITIVE_INFO_INIT(FW_LOCK, POWER_HIGH_LOCK, 63,
  564. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  565. [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, POWER_LIMIT1_ENABLE, 15,
  566. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  567. [PL1_CLAMP] = PRIMITIVE_INFO_INIT(PL1_CLAMP, POWER_LIMIT1_CLAMP, 16,
  568. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  569. [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, POWER_LIMIT2_ENABLE, 47,
  570. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  571. [PL2_CLAMP] = PRIMITIVE_INFO_INIT(PL2_CLAMP, POWER_LIMIT2_CLAMP, 48,
  572. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  573. [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TIME_WINDOW1_MASK, 17,
  574. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  575. [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TIME_WINDOW2_MASK, 49,
  576. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  577. [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, POWER_INFO_THERMAL_SPEC_MASK,
  578. 0, RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  579. [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, POWER_INFO_MAX_MASK, 32,
  580. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  581. [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, POWER_INFO_MIN_MASK, 16,
  582. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  583. [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, POWER_INFO_MAX_TIME_WIN_MASK, 48,
  584. RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
  585. [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
  586. RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
  587. [PRIORITY_LEVEL] = PRIMITIVE_INFO_INIT(PRIORITY_LEVEL, PP_POLICY_MASK, 0,
  588. RAPL_DOMAIN_REG_POLICY, ARBITRARY_UNIT, 0),
  589. [PSYS_POWER_LIMIT1] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT1, PSYS_POWER_LIMIT1_MASK, 0,
  590. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  591. [PSYS_POWER_LIMIT2] = PRIMITIVE_INFO_INIT(PSYS_POWER_LIMIT2, PSYS_POWER_LIMIT2_MASK, 32,
  592. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  593. [PSYS_PL1_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL1_ENABLE, PSYS_POWER_LIMIT1_ENABLE, 17,
  594. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  595. [PSYS_PL2_ENABLE] = PRIMITIVE_INFO_INIT(PSYS_PL2_ENABLE, PSYS_POWER_LIMIT2_ENABLE, 49,
  596. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  597. [PSYS_TIME_WINDOW1] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW1, PSYS_TIME_WINDOW1_MASK, 19,
  598. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  599. [PSYS_TIME_WINDOW2] = PRIMITIVE_INFO_INIT(PSYS_TIME_WINDOW2, PSYS_TIME_WINDOW2_MASK, 51,
  600. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  601. /* non-hardware */
  602. [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0, POWER_UNIT,
  603. RAPL_PRIMITIVE_DERIVED),
  604. };
  605. /* RAPL primitives for TPMI I/F */
  606. static struct rapl_primitive_info rpi_tpmi[NR_RAPL_PRIMITIVES] = {
  607. /* name, mask, shift, msr index, unit divisor */
  608. [POWER_LIMIT1] = PRIMITIVE_INFO_INIT(POWER_LIMIT1, TPMI_POWER_LIMIT_MASK, 0,
  609. RAPL_DOMAIN_REG_LIMIT, POWER_UNIT, 0),
  610. [POWER_LIMIT2] = PRIMITIVE_INFO_INIT(POWER_LIMIT2, TPMI_POWER_LIMIT_MASK, 0,
  611. RAPL_DOMAIN_REG_PL2, POWER_UNIT, 0),
  612. [POWER_LIMIT4] = PRIMITIVE_INFO_INIT(POWER_LIMIT4, TPMI_POWER_LIMIT_MASK, 0,
  613. RAPL_DOMAIN_REG_PL4, POWER_UNIT, 0),
  614. [ENERGY_COUNTER] = PRIMITIVE_INFO_INIT(ENERGY_COUNTER, ENERGY_STATUS_MASK, 0,
  615. RAPL_DOMAIN_REG_STATUS, ENERGY_UNIT, 0),
  616. [PL1_LOCK] = PRIMITIVE_INFO_INIT(PL1_LOCK, POWER_HIGH_LOCK, 63,
  617. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  618. [PL2_LOCK] = PRIMITIVE_INFO_INIT(PL2_LOCK, POWER_HIGH_LOCK, 63,
  619. RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
  620. [PL4_LOCK] = PRIMITIVE_INFO_INIT(PL4_LOCK, POWER_HIGH_LOCK, 63,
  621. RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
  622. [PL1_ENABLE] = PRIMITIVE_INFO_INIT(PL1_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
  623. RAPL_DOMAIN_REG_LIMIT, ARBITRARY_UNIT, 0),
  624. [PL2_ENABLE] = PRIMITIVE_INFO_INIT(PL2_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
  625. RAPL_DOMAIN_REG_PL2, ARBITRARY_UNIT, 0),
  626. [PL4_ENABLE] = PRIMITIVE_INFO_INIT(PL4_ENABLE, TPMI_POWER_LIMIT_ENABLE, 62,
  627. RAPL_DOMAIN_REG_PL4, ARBITRARY_UNIT, 0),
  628. [TIME_WINDOW1] = PRIMITIVE_INFO_INIT(TIME_WINDOW1, TPMI_TIME_WINDOW_MASK, 18,
  629. RAPL_DOMAIN_REG_LIMIT, TIME_UNIT, 0),
  630. [TIME_WINDOW2] = PRIMITIVE_INFO_INIT(TIME_WINDOW2, TPMI_TIME_WINDOW_MASK, 18,
  631. RAPL_DOMAIN_REG_PL2, TIME_UNIT, 0),
  632. [THERMAL_SPEC_POWER] = PRIMITIVE_INFO_INIT(THERMAL_SPEC_POWER, TPMI_INFO_SPEC_MASK, 0,
  633. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  634. [MAX_POWER] = PRIMITIVE_INFO_INIT(MAX_POWER, TPMI_INFO_MAX_MASK, 36,
  635. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  636. [MIN_POWER] = PRIMITIVE_INFO_INIT(MIN_POWER, TPMI_INFO_MIN_MASK, 18,
  637. RAPL_DOMAIN_REG_INFO, POWER_UNIT, 0),
  638. [MAX_TIME_WINDOW] = PRIMITIVE_INFO_INIT(MAX_TIME_WINDOW, TPMI_INFO_MAX_TIME_WIN_MASK, 54,
  639. RAPL_DOMAIN_REG_INFO, TIME_UNIT, 0),
  640. [THROTTLED_TIME] = PRIMITIVE_INFO_INIT(THROTTLED_TIME, PERF_STATUS_THROTTLE_TIME_MASK, 0,
  641. RAPL_DOMAIN_REG_PERF, TIME_UNIT, 0),
  642. /* non-hardware */
  643. [AVERAGE_POWER] = PRIMITIVE_INFO_INIT(AVERAGE_POWER, 0, 0, 0,
  644. POWER_UNIT, RAPL_PRIMITIVE_DERIVED),
  645. };
  646. static struct rapl_primitive_info *get_rpi(struct rapl_package *rp, int prim)
  647. {
  648. struct rapl_primitive_info *rpi = rp->priv->rpi;
  649. if (prim < 0 || prim >= NR_RAPL_PRIMITIVES || !rpi)
  650. return NULL;
  651. return &rpi[prim];
  652. }
  653. static int rapl_config(struct rapl_package *rp)
  654. {
  655. switch (rp->priv->type) {
  656. /* MMIO I/F shares the same register layout as MSR registers */
  657. case RAPL_IF_MMIO:
  658. case RAPL_IF_MSR:
  659. rp->priv->defaults = (void *)defaults_msr;
  660. rp->priv->rpi = (void *)rpi_msr;
  661. break;
  662. case RAPL_IF_TPMI:
  663. rp->priv->defaults = (void *)&defaults_tpmi;
  664. rp->priv->rpi = (void *)rpi_tpmi;
  665. break;
  666. default:
  667. return -EINVAL;
  668. }
  669. /* defaults_msr can be NULL on unsupported platforms */
  670. if (!rp->priv->defaults || !rp->priv->rpi)
  671. return -ENODEV;
  672. return 0;
  673. }
  674. static enum rapl_primitives
  675. prim_fixups(struct rapl_domain *rd, enum rapl_primitives prim)
  676. {
  677. struct rapl_defaults *defaults = get_defaults(rd->rp);
  678. if (!defaults->spr_psys_bits)
  679. return prim;
  680. if (rd->id != RAPL_DOMAIN_PLATFORM)
  681. return prim;
  682. switch (prim) {
  683. case POWER_LIMIT1:
  684. return PSYS_POWER_LIMIT1;
  685. case POWER_LIMIT2:
  686. return PSYS_POWER_LIMIT2;
  687. case PL1_ENABLE:
  688. return PSYS_PL1_ENABLE;
  689. case PL2_ENABLE:
  690. return PSYS_PL2_ENABLE;
  691. case TIME_WINDOW1:
  692. return PSYS_TIME_WINDOW1;
  693. case TIME_WINDOW2:
  694. return PSYS_TIME_WINDOW2;
  695. default:
  696. return prim;
  697. }
  698. }
  699. /* Read primitive data based on its related struct rapl_primitive_info.
  700. * if xlate flag is set, return translated data based on data units, i.e.
  701. * time, energy, and power.
  702. * RAPL MSRs are non-architectual and are laid out not consistently across
  703. * domains. Here we use primitive info to allow writing consolidated access
  704. * functions.
  705. * For a given primitive, it is processed by MSR mask and shift. Unit conversion
  706. * is pre-assigned based on RAPL unit MSRs read at init time.
  707. * 63-------------------------- 31--------------------------- 0
  708. * | xxxxx (mask) |
  709. * | |<- shift ----------------|
  710. * 63-------------------------- 31--------------------------- 0
  711. */
  712. static int rapl_read_data_raw(struct rapl_domain *rd,
  713. enum rapl_primitives prim, bool xlate, u64 *data,
  714. bool pmu_ctx)
  715. {
  716. u64 value;
  717. enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
  718. struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
  719. struct reg_action ra;
  720. if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
  721. return -EINVAL;
  722. ra.reg = rd->regs[rpi->id];
  723. if (!ra.reg.val)
  724. return -EINVAL;
  725. /* non-hardware data are collected by the polling thread */
  726. if (rpi->flag & RAPL_PRIMITIVE_DERIVED) {
  727. *data = rd->rdd.primitives[prim];
  728. return 0;
  729. }
  730. ra.mask = rpi->mask;
  731. if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, pmu_ctx)) {
  732. pr_debug("failed to read reg 0x%llx for %s:%s\n", ra.reg.val, rd->rp->name, rd->name);
  733. return -EIO;
  734. }
  735. value = ra.value >> rpi->shift;
  736. if (xlate)
  737. *data = rapl_unit_xlate(rd, rpi->unit, value, 0);
  738. else
  739. *data = value;
  740. return 0;
  741. }
  742. /* Similar use of primitive info in the read counterpart */
  743. static int rapl_write_data_raw(struct rapl_domain *rd,
  744. enum rapl_primitives prim,
  745. unsigned long long value)
  746. {
  747. enum rapl_primitives prim_fixed = prim_fixups(rd, prim);
  748. struct rapl_primitive_info *rpi = get_rpi(rd->rp, prim_fixed);
  749. u64 bits;
  750. struct reg_action ra;
  751. int ret;
  752. if (!rpi || !rpi->name || rpi->flag & RAPL_PRIMITIVE_DUMMY)
  753. return -EINVAL;
  754. bits = rapl_unit_xlate(rd, rpi->unit, value, 1);
  755. bits <<= rpi->shift;
  756. bits &= rpi->mask;
  757. memset(&ra, 0, sizeof(ra));
  758. ra.reg = rd->regs[rpi->id];
  759. ra.mask = rpi->mask;
  760. ra.value = bits;
  761. ret = rd->rp->priv->write_raw(get_rid(rd->rp), &ra);
  762. return ret;
  763. }
  764. static int rapl_read_pl_data(struct rapl_domain *rd, int pl,
  765. enum pl_prims pl_prim, bool xlate, u64 *data)
  766. {
  767. enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
  768. if (!is_pl_valid(rd, pl))
  769. return -EINVAL;
  770. return rapl_read_data_raw(rd, prim, xlate, data, false);
  771. }
  772. static int rapl_write_pl_data(struct rapl_domain *rd, int pl,
  773. enum pl_prims pl_prim,
  774. unsigned long long value)
  775. {
  776. enum rapl_primitives prim = get_pl_prim(rd, pl, pl_prim);
  777. if (!is_pl_valid(rd, pl))
  778. return -EINVAL;
  779. if (rd->rpl[pl].locked) {
  780. pr_debug("%s:%s:%s locked by BIOS\n", rd->rp->name, rd->name, pl_names[pl]);
  781. return -EACCES;
  782. }
  783. return rapl_write_data_raw(rd, prim, value);
  784. }
  785. /*
  786. * Raw RAPL data stored in MSRs are in certain scales. We need to
  787. * convert them into standard units based on the units reported in
  788. * the RAPL unit MSRs. This is specific to CPUs as the method to
  789. * calculate units differ on different CPUs.
  790. * We convert the units to below format based on CPUs.
  791. * i.e.
  792. * energy unit: picoJoules : Represented in picoJoules by default
  793. * power unit : microWatts : Represented in milliWatts by default
  794. * time unit : microseconds: Represented in seconds by default
  795. */
  796. static int rapl_check_unit_core(struct rapl_domain *rd)
  797. {
  798. struct reg_action ra;
  799. u32 value;
  800. ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
  801. ra.mask = ~0;
  802. if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
  803. pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
  804. ra.reg.val, rd->rp->name, rd->name);
  805. return -ENODEV;
  806. }
  807. value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
  808. rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
  809. value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
  810. rd->power_unit = 1000000 / (1 << value);
  811. value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
  812. rd->time_unit = 1000000 / (1 << value);
  813. pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
  814. rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
  815. return 0;
  816. }
  817. static int rapl_check_unit_atom(struct rapl_domain *rd)
  818. {
  819. struct reg_action ra;
  820. u32 value;
  821. ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
  822. ra.mask = ~0;
  823. if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
  824. pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
  825. ra.reg.val, rd->rp->name, rd->name);
  826. return -ENODEV;
  827. }
  828. value = (ra.value & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
  829. rd->energy_unit = ENERGY_UNIT_SCALE * 1 << value;
  830. value = (ra.value & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
  831. rd->power_unit = (1 << value) * 1000;
  832. value = (ra.value & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
  833. rd->time_unit = 1000000 / (1 << value);
  834. pr_debug("Atom %s:%s energy=%dpJ, time=%dus, power=%duW\n",
  835. rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
  836. return 0;
  837. }
  838. static void power_limit_irq_save_cpu(void *info)
  839. {
  840. u32 l, h = 0;
  841. struct rapl_package *rp = (struct rapl_package *)info;
  842. /* save the state of PLN irq mask bit before disabling it */
  843. rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
  844. if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED)) {
  845. rp->power_limit_irq = l & PACKAGE_THERM_INT_PLN_ENABLE;
  846. rp->power_limit_irq |= PACKAGE_PLN_INT_SAVED;
  847. }
  848. l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
  849. wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  850. }
  851. /* REVISIT:
  852. * When package power limit is set artificially low by RAPL, LVT
  853. * thermal interrupt for package power limit should be ignored
  854. * since we are not really exceeding the real limit. The intention
  855. * is to avoid excessive interrupts while we are trying to save power.
  856. * A useful feature might be routing the package_power_limit interrupt
  857. * to userspace via eventfd. once we have a usecase, this is simple
  858. * to do by adding an atomic notifier.
  859. */
  860. static void package_power_limit_irq_save(struct rapl_package *rp)
  861. {
  862. if (rp->lead_cpu < 0)
  863. return;
  864. if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
  865. return;
  866. smp_call_function_single(rp->lead_cpu, power_limit_irq_save_cpu, rp, 1);
  867. }
  868. /*
  869. * Restore per package power limit interrupt enable state. Called from cpu
  870. * hotplug code on package removal.
  871. */
  872. static void package_power_limit_irq_restore(struct rapl_package *rp)
  873. {
  874. u32 l, h;
  875. if (rp->lead_cpu < 0)
  876. return;
  877. if (!boot_cpu_has(X86_FEATURE_PTS) || !boot_cpu_has(X86_FEATURE_PLN))
  878. return;
  879. /* irq enable state not saved, nothing to restore */
  880. if (!(rp->power_limit_irq & PACKAGE_PLN_INT_SAVED))
  881. return;
  882. rdmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, &l, &h);
  883. if (rp->power_limit_irq & PACKAGE_THERM_INT_PLN_ENABLE)
  884. l |= PACKAGE_THERM_INT_PLN_ENABLE;
  885. else
  886. l &= ~PACKAGE_THERM_INT_PLN_ENABLE;
  887. wrmsr_safe(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  888. }
  889. static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
  890. {
  891. int i;
  892. /* always enable clamp such that p-state can go below OS requested
  893. * range. power capping priority over guranteed frequency.
  894. */
  895. rapl_write_pl_data(rd, POWER_LIMIT1, PL_CLAMP, mode);
  896. for (i = POWER_LIMIT2; i < NR_POWER_LIMITS; i++) {
  897. rapl_write_pl_data(rd, i, PL_ENABLE, mode);
  898. rapl_write_pl_data(rd, i, PL_CLAMP, mode);
  899. }
  900. }
  901. static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
  902. {
  903. static u32 power_ctrl_orig_val;
  904. struct rapl_defaults *defaults = get_defaults(rd->rp);
  905. u32 mdata;
  906. if (!defaults->floor_freq_reg_addr) {
  907. pr_err("Invalid floor frequency config register\n");
  908. return;
  909. }
  910. if (!power_ctrl_orig_val)
  911. iosf_mbi_read(BT_MBI_UNIT_PMC, MBI_CR_READ,
  912. defaults->floor_freq_reg_addr,
  913. &power_ctrl_orig_val);
  914. mdata = power_ctrl_orig_val;
  915. if (enable) {
  916. mdata &= ~(0x7f << 8);
  917. mdata |= 1 << 8;
  918. }
  919. iosf_mbi_write(BT_MBI_UNIT_PMC, MBI_CR_WRITE,
  920. defaults->floor_freq_reg_addr, mdata);
  921. }
  922. static u64 rapl_compute_time_window_core(struct rapl_domain *rd, u64 value,
  923. bool to_raw)
  924. {
  925. u64 f, y; /* fraction and exp. used for time unit */
  926. /*
  927. * Special processing based on 2^Y*(1+F/4), refer
  928. * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
  929. */
  930. if (!to_raw) {
  931. f = (value & 0x60) >> 5;
  932. y = value & 0x1f;
  933. value = (1 << y) * (4 + f) * rd->time_unit / 4;
  934. } else {
  935. if (value < rd->time_unit)
  936. return 0;
  937. do_div(value, rd->time_unit);
  938. y = ilog2(value);
  939. /*
  940. * The target hardware field is 7 bits wide, so return all ones
  941. * if the exponent is too large.
  942. */
  943. if (y > 0x1f)
  944. return 0x7f;
  945. f = div64_u64(4 * (value - (1ULL << y)), 1ULL << y);
  946. value = (y & 0x1f) | ((f & 0x3) << 5);
  947. }
  948. return value;
  949. }
  950. static u64 rapl_compute_time_window_atom(struct rapl_domain *rd, u64 value,
  951. bool to_raw)
  952. {
  953. /*
  954. * Atom time unit encoding is straight forward val * time_unit,
  955. * where time_unit is default to 1 sec. Never 0.
  956. */
  957. if (!to_raw)
  958. return (value) ? value * rd->time_unit : rd->time_unit;
  959. value = div64_u64(value, rd->time_unit);
  960. return value;
  961. }
  962. /* TPMI Unit register has different layout */
  963. #define TPMI_POWER_UNIT_OFFSET POWER_UNIT_OFFSET
  964. #define TPMI_POWER_UNIT_MASK POWER_UNIT_MASK
  965. #define TPMI_ENERGY_UNIT_OFFSET 0x06
  966. #define TPMI_ENERGY_UNIT_MASK 0x7C0
  967. #define TPMI_TIME_UNIT_OFFSET 0x0C
  968. #define TPMI_TIME_UNIT_MASK 0xF000
  969. static int rapl_check_unit_tpmi(struct rapl_domain *rd)
  970. {
  971. struct reg_action ra;
  972. u32 value;
  973. ra.reg = rd->regs[RAPL_DOMAIN_REG_UNIT];
  974. ra.mask = ~0;
  975. if (rd->rp->priv->read_raw(get_rid(rd->rp), &ra, false)) {
  976. pr_err("Failed to read power unit REG 0x%llx on %s:%s, exit.\n",
  977. ra.reg.val, rd->rp->name, rd->name);
  978. return -ENODEV;
  979. }
  980. value = (ra.value & TPMI_ENERGY_UNIT_MASK) >> TPMI_ENERGY_UNIT_OFFSET;
  981. rd->energy_unit = ENERGY_UNIT_SCALE * 1000000 / (1 << value);
  982. value = (ra.value & TPMI_POWER_UNIT_MASK) >> TPMI_POWER_UNIT_OFFSET;
  983. rd->power_unit = 1000000 / (1 << value);
  984. value = (ra.value & TPMI_TIME_UNIT_MASK) >> TPMI_TIME_UNIT_OFFSET;
  985. rd->time_unit = 1000000 / (1 << value);
  986. pr_debug("Core CPU %s:%s energy=%dpJ, time=%dus, power=%duW\n",
  987. rd->rp->name, rd->name, rd->energy_unit, rd->time_unit, rd->power_unit);
  988. return 0;
  989. }
  990. static const struct rapl_defaults defaults_tpmi = {
  991. .check_unit = rapl_check_unit_tpmi,
  992. /* Reuse existing logic, ignore the PL_CLAMP failures and enable all Power Limits */
  993. .set_floor_freq = set_floor_freq_default,
  994. .compute_time_window = rapl_compute_time_window_core,
  995. };
  996. static const struct rapl_defaults rapl_defaults_core = {
  997. .floor_freq_reg_addr = 0,
  998. .check_unit = rapl_check_unit_core,
  999. .set_floor_freq = set_floor_freq_default,
  1000. .compute_time_window = rapl_compute_time_window_core,
  1001. };
  1002. static const struct rapl_defaults rapl_defaults_hsw_server = {
  1003. .check_unit = rapl_check_unit_core,
  1004. .set_floor_freq = set_floor_freq_default,
  1005. .compute_time_window = rapl_compute_time_window_core,
  1006. .dram_domain_energy_unit = 15300,
  1007. };
  1008. static const struct rapl_defaults rapl_defaults_spr_server = {
  1009. .check_unit = rapl_check_unit_core,
  1010. .set_floor_freq = set_floor_freq_default,
  1011. .compute_time_window = rapl_compute_time_window_core,
  1012. .psys_domain_energy_unit = 1000000000,
  1013. .spr_psys_bits = true,
  1014. };
  1015. static const struct rapl_defaults rapl_defaults_byt = {
  1016. .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_BYT,
  1017. .check_unit = rapl_check_unit_atom,
  1018. .set_floor_freq = set_floor_freq_atom,
  1019. .compute_time_window = rapl_compute_time_window_atom,
  1020. };
  1021. static const struct rapl_defaults rapl_defaults_tng = {
  1022. .floor_freq_reg_addr = IOSF_CPU_POWER_BUDGET_CTL_TNG,
  1023. .check_unit = rapl_check_unit_atom,
  1024. .set_floor_freq = set_floor_freq_atom,
  1025. .compute_time_window = rapl_compute_time_window_atom,
  1026. };
  1027. static const struct rapl_defaults rapl_defaults_ann = {
  1028. .floor_freq_reg_addr = 0,
  1029. .check_unit = rapl_check_unit_atom,
  1030. .set_floor_freq = NULL,
  1031. .compute_time_window = rapl_compute_time_window_atom,
  1032. };
  1033. static const struct rapl_defaults rapl_defaults_cht = {
  1034. .floor_freq_reg_addr = 0,
  1035. .check_unit = rapl_check_unit_atom,
  1036. .set_floor_freq = NULL,
  1037. .compute_time_window = rapl_compute_time_window_atom,
  1038. };
  1039. static const struct rapl_defaults rapl_defaults_amd = {
  1040. .check_unit = rapl_check_unit_core,
  1041. };
  1042. static const struct x86_cpu_id rapl_ids[] __initconst = {
  1043. X86_MATCH_VFM(INTEL_SANDYBRIDGE, &rapl_defaults_core),
  1044. X86_MATCH_VFM(INTEL_SANDYBRIDGE_X, &rapl_defaults_core),
  1045. X86_MATCH_VFM(INTEL_IVYBRIDGE, &rapl_defaults_core),
  1046. X86_MATCH_VFM(INTEL_IVYBRIDGE_X, &rapl_defaults_core),
  1047. X86_MATCH_VFM(INTEL_HASWELL, &rapl_defaults_core),
  1048. X86_MATCH_VFM(INTEL_HASWELL_L, &rapl_defaults_core),
  1049. X86_MATCH_VFM(INTEL_HASWELL_G, &rapl_defaults_core),
  1050. X86_MATCH_VFM(INTEL_HASWELL_X, &rapl_defaults_hsw_server),
  1051. X86_MATCH_VFM(INTEL_BROADWELL, &rapl_defaults_core),
  1052. X86_MATCH_VFM(INTEL_BROADWELL_G, &rapl_defaults_core),
  1053. X86_MATCH_VFM(INTEL_BROADWELL_D, &rapl_defaults_core),
  1054. X86_MATCH_VFM(INTEL_BROADWELL_X, &rapl_defaults_hsw_server),
  1055. X86_MATCH_VFM(INTEL_SKYLAKE, &rapl_defaults_core),
  1056. X86_MATCH_VFM(INTEL_SKYLAKE_L, &rapl_defaults_core),
  1057. X86_MATCH_VFM(INTEL_SKYLAKE_X, &rapl_defaults_hsw_server),
  1058. X86_MATCH_VFM(INTEL_KABYLAKE_L, &rapl_defaults_core),
  1059. X86_MATCH_VFM(INTEL_KABYLAKE, &rapl_defaults_core),
  1060. X86_MATCH_VFM(INTEL_CANNONLAKE_L, &rapl_defaults_core),
  1061. X86_MATCH_VFM(INTEL_ICELAKE_L, &rapl_defaults_core),
  1062. X86_MATCH_VFM(INTEL_ICELAKE, &rapl_defaults_core),
  1063. X86_MATCH_VFM(INTEL_ICELAKE_NNPI, &rapl_defaults_core),
  1064. X86_MATCH_VFM(INTEL_ICELAKE_X, &rapl_defaults_hsw_server),
  1065. X86_MATCH_VFM(INTEL_ICELAKE_D, &rapl_defaults_hsw_server),
  1066. X86_MATCH_VFM(INTEL_COMETLAKE_L, &rapl_defaults_core),
  1067. X86_MATCH_VFM(INTEL_COMETLAKE, &rapl_defaults_core),
  1068. X86_MATCH_VFM(INTEL_TIGERLAKE_L, &rapl_defaults_core),
  1069. X86_MATCH_VFM(INTEL_TIGERLAKE, &rapl_defaults_core),
  1070. X86_MATCH_VFM(INTEL_ROCKETLAKE, &rapl_defaults_core),
  1071. X86_MATCH_VFM(INTEL_ALDERLAKE, &rapl_defaults_core),
  1072. X86_MATCH_VFM(INTEL_ALDERLAKE_L, &rapl_defaults_core),
  1073. X86_MATCH_VFM(INTEL_ATOM_GRACEMONT, &rapl_defaults_core),
  1074. X86_MATCH_VFM(INTEL_RAPTORLAKE, &rapl_defaults_core),
  1075. X86_MATCH_VFM(INTEL_RAPTORLAKE_P, &rapl_defaults_core),
  1076. X86_MATCH_VFM(INTEL_RAPTORLAKE_S, &rapl_defaults_core),
  1077. X86_MATCH_VFM(INTEL_BARTLETTLAKE, &rapl_defaults_core),
  1078. X86_MATCH_VFM(INTEL_METEORLAKE, &rapl_defaults_core),
  1079. X86_MATCH_VFM(INTEL_METEORLAKE_L, &rapl_defaults_core),
  1080. X86_MATCH_VFM(INTEL_SAPPHIRERAPIDS_X, &rapl_defaults_spr_server),
  1081. X86_MATCH_VFM(INTEL_EMERALDRAPIDS_X, &rapl_defaults_spr_server),
  1082. X86_MATCH_VFM(INTEL_LUNARLAKE_M, &rapl_defaults_core),
  1083. X86_MATCH_VFM(INTEL_PANTHERLAKE_L, &rapl_defaults_core),
  1084. X86_MATCH_VFM(INTEL_WILDCATLAKE_L, &rapl_defaults_core),
  1085. X86_MATCH_VFM(INTEL_NOVALAKE, &rapl_defaults_core),
  1086. X86_MATCH_VFM(INTEL_NOVALAKE_L, &rapl_defaults_core),
  1087. X86_MATCH_VFM(INTEL_ARROWLAKE_H, &rapl_defaults_core),
  1088. X86_MATCH_VFM(INTEL_ARROWLAKE, &rapl_defaults_core),
  1089. X86_MATCH_VFM(INTEL_ARROWLAKE_U, &rapl_defaults_core),
  1090. X86_MATCH_VFM(INTEL_LAKEFIELD, &rapl_defaults_core),
  1091. X86_MATCH_VFM(INTEL_ATOM_SILVERMONT, &rapl_defaults_byt),
  1092. X86_MATCH_VFM(INTEL_ATOM_AIRMONT, &rapl_defaults_cht),
  1093. X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID, &rapl_defaults_tng),
  1094. X86_MATCH_VFM(INTEL_ATOM_SILVERMONT_MID2,&rapl_defaults_ann),
  1095. X86_MATCH_VFM(INTEL_ATOM_GOLDMONT, &rapl_defaults_core),
  1096. X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_PLUS, &rapl_defaults_core),
  1097. X86_MATCH_VFM(INTEL_ATOM_GOLDMONT_D, &rapl_defaults_core),
  1098. X86_MATCH_VFM(INTEL_ATOM_TREMONT, &rapl_defaults_core),
  1099. X86_MATCH_VFM(INTEL_ATOM_TREMONT_D, &rapl_defaults_core),
  1100. X86_MATCH_VFM(INTEL_ATOM_TREMONT_L, &rapl_defaults_core),
  1101. X86_MATCH_VFM(INTEL_XEON_PHI_KNL, &rapl_defaults_hsw_server),
  1102. X86_MATCH_VFM(INTEL_XEON_PHI_KNM, &rapl_defaults_hsw_server),
  1103. X86_MATCH_VENDOR_FAM(AMD, 0x17, &rapl_defaults_amd),
  1104. X86_MATCH_VENDOR_FAM(AMD, 0x19, &rapl_defaults_amd),
  1105. X86_MATCH_VENDOR_FAM(AMD, 0x1A, &rapl_defaults_amd),
  1106. X86_MATCH_VENDOR_FAM(HYGON, 0x18, &rapl_defaults_amd),
  1107. {}
  1108. };
  1109. MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
  1110. /* Read once for all raw primitive data for domains */
  1111. static void rapl_update_domain_data(struct rapl_package *rp)
  1112. {
  1113. int dmn, prim;
  1114. u64 val;
  1115. for (dmn = 0; dmn < rp->nr_domains; dmn++) {
  1116. pr_debug("update %s domain %s data\n", rp->name,
  1117. rp->domains[dmn].name);
  1118. /* exclude non-raw primitives */
  1119. for (prim = 0; prim < NR_RAW_PRIMITIVES; prim++) {
  1120. struct rapl_primitive_info *rpi = get_rpi(rp, prim);
  1121. if (!rapl_read_data_raw(&rp->domains[dmn], prim,
  1122. rpi->unit, &val, false))
  1123. rp->domains[dmn].rdd.primitives[prim] = val;
  1124. }
  1125. }
  1126. }
  1127. static int rapl_package_register_powercap(struct rapl_package *rp)
  1128. {
  1129. struct rapl_domain *rd;
  1130. struct powercap_zone *power_zone = NULL;
  1131. int nr_pl, ret;
  1132. /* Update the domain data of the new package */
  1133. rapl_update_domain_data(rp);
  1134. /* first we register package domain as the parent zone */
  1135. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1136. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  1137. nr_pl = find_nr_power_limit(rd);
  1138. pr_debug("register package domain %s\n", rp->name);
  1139. power_zone = powercap_register_zone(&rd->power_zone,
  1140. rp->priv->control_type, rp->name,
  1141. NULL, &zone_ops[rd->id], nr_pl,
  1142. &constraint_ops);
  1143. if (IS_ERR(power_zone)) {
  1144. pr_debug("failed to register power zone %s\n",
  1145. rp->name);
  1146. return PTR_ERR(power_zone);
  1147. }
  1148. /* track parent zone in per package/socket data */
  1149. rp->power_zone = power_zone;
  1150. /* done, only one package domain per socket */
  1151. break;
  1152. }
  1153. }
  1154. if (!power_zone) {
  1155. pr_err("no package domain found, unknown topology!\n");
  1156. return -ENODEV;
  1157. }
  1158. /* now register domains as children of the socket/package */
  1159. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1160. struct powercap_zone *parent = rp->power_zone;
  1161. if (rd->id == RAPL_DOMAIN_PACKAGE)
  1162. continue;
  1163. if (rd->id == RAPL_DOMAIN_PLATFORM)
  1164. parent = NULL;
  1165. /* number of power limits per domain varies */
  1166. nr_pl = find_nr_power_limit(rd);
  1167. power_zone = powercap_register_zone(&rd->power_zone,
  1168. rp->priv->control_type,
  1169. rd->name, parent,
  1170. &zone_ops[rd->id], nr_pl,
  1171. &constraint_ops);
  1172. if (IS_ERR(power_zone)) {
  1173. pr_debug("failed to register power_zone, %s:%s\n",
  1174. rp->name, rd->name);
  1175. ret = PTR_ERR(power_zone);
  1176. goto err_cleanup;
  1177. }
  1178. }
  1179. return 0;
  1180. err_cleanup:
  1181. /*
  1182. * Clean up previously initialized domains within the package if we
  1183. * failed after the first domain setup.
  1184. */
  1185. while (--rd >= rp->domains) {
  1186. pr_debug("unregister %s domain %s\n", rp->name, rd->name);
  1187. powercap_unregister_zone(rp->priv->control_type,
  1188. &rd->power_zone);
  1189. }
  1190. return ret;
  1191. }
  1192. static int rapl_check_domain(int domain, struct rapl_package *rp)
  1193. {
  1194. struct reg_action ra;
  1195. switch (domain) {
  1196. case RAPL_DOMAIN_PACKAGE:
  1197. case RAPL_DOMAIN_PP0:
  1198. case RAPL_DOMAIN_PP1:
  1199. case RAPL_DOMAIN_DRAM:
  1200. case RAPL_DOMAIN_PLATFORM:
  1201. ra.reg = rp->priv->regs[domain][RAPL_DOMAIN_REG_STATUS];
  1202. break;
  1203. default:
  1204. pr_err("invalid domain id %d\n", domain);
  1205. return -EINVAL;
  1206. }
  1207. /* make sure domain counters are available and contains non-zero
  1208. * values, otherwise skip it.
  1209. */
  1210. ra.mask = ENERGY_STATUS_MASK;
  1211. if (rp->priv->read_raw(get_rid(rp), &ra, false) || !ra.value)
  1212. return -ENODEV;
  1213. return 0;
  1214. }
  1215. /*
  1216. * Get per domain energy/power/time unit.
  1217. * RAPL Interfaces without per domain unit register will use the package
  1218. * scope unit register to set per domain units.
  1219. */
  1220. static int rapl_get_domain_unit(struct rapl_domain *rd)
  1221. {
  1222. struct rapl_defaults *defaults = get_defaults(rd->rp);
  1223. int ret;
  1224. if (!rd->regs[RAPL_DOMAIN_REG_UNIT].val) {
  1225. if (!rd->rp->priv->reg_unit.val) {
  1226. pr_err("No valid Unit register found\n");
  1227. return -ENODEV;
  1228. }
  1229. rd->regs[RAPL_DOMAIN_REG_UNIT] = rd->rp->priv->reg_unit;
  1230. }
  1231. if (!defaults->check_unit) {
  1232. pr_err("missing .check_unit() callback\n");
  1233. return -ENODEV;
  1234. }
  1235. ret = defaults->check_unit(rd);
  1236. if (ret)
  1237. return ret;
  1238. if (rd->id == RAPL_DOMAIN_DRAM && defaults->dram_domain_energy_unit)
  1239. rd->energy_unit = defaults->dram_domain_energy_unit;
  1240. if (rd->id == RAPL_DOMAIN_PLATFORM && defaults->psys_domain_energy_unit)
  1241. rd->energy_unit = defaults->psys_domain_energy_unit;
  1242. return 0;
  1243. }
  1244. /*
  1245. * Check if power limits are available. Two cases when they are not available:
  1246. * 1. Locked by BIOS, in this case we still provide read-only access so that
  1247. * users can see what limit is set by the BIOS.
  1248. * 2. Some CPUs make some domains monitoring only which means PLx MSRs may not
  1249. * exist at all. In this case, we do not show the constraints in powercap.
  1250. *
  1251. * Called after domains are detected and initialized.
  1252. */
  1253. static void rapl_detect_powerlimit(struct rapl_domain *rd)
  1254. {
  1255. u64 val64;
  1256. int i;
  1257. for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
  1258. if (!rapl_read_pl_data(rd, i, PL_LOCK, false, &val64)) {
  1259. if (val64) {
  1260. rd->rpl[i].locked = true;
  1261. pr_info("%s:%s:%s locked by BIOS\n",
  1262. rd->rp->name, rd->name, pl_names[i]);
  1263. }
  1264. }
  1265. if (rapl_read_pl_data(rd, i, PL_LIMIT, false, &val64))
  1266. rd->rpl[i].name = NULL;
  1267. }
  1268. }
  1269. /* Detect active and valid domains for the given CPU, caller must
  1270. * ensure the CPU belongs to the targeted package and CPU hotlug is disabled.
  1271. */
  1272. static int rapl_detect_domains(struct rapl_package *rp)
  1273. {
  1274. struct rapl_domain *rd;
  1275. int i;
  1276. for (i = 0; i < RAPL_DOMAIN_MAX; i++) {
  1277. /* use physical package id to read counters */
  1278. if (!rapl_check_domain(i, rp)) {
  1279. rp->domain_map |= 1 << i;
  1280. pr_info("Found RAPL domain %s\n", rapl_domain_names[i]);
  1281. }
  1282. }
  1283. rp->nr_domains = bitmap_weight(&rp->domain_map, RAPL_DOMAIN_MAX);
  1284. if (!rp->nr_domains) {
  1285. pr_debug("no valid rapl domains found in %s\n", rp->name);
  1286. return -ENODEV;
  1287. }
  1288. pr_debug("found %d domains on %s\n", rp->nr_domains, rp->name);
  1289. rp->domains = kzalloc_objs(struct rapl_domain, rp->nr_domains);
  1290. if (!rp->domains)
  1291. return -ENOMEM;
  1292. rapl_init_domains(rp);
  1293. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1294. rapl_get_domain_unit(rd);
  1295. rapl_detect_powerlimit(rd);
  1296. }
  1297. return 0;
  1298. }
  1299. #ifdef CONFIG_PERF_EVENTS
  1300. /*
  1301. * Support for RAPL PMU
  1302. *
  1303. * Register a PMU if any of the registered RAPL Packages have the requirement
  1304. * of exposing its energy counters via Perf PMU.
  1305. *
  1306. * PMU Name:
  1307. * power
  1308. *
  1309. * Events:
  1310. * Name Event id RAPL Domain
  1311. * energy_cores 0x01 RAPL_DOMAIN_PP0
  1312. * energy_pkg 0x02 RAPL_DOMAIN_PACKAGE
  1313. * energy_ram 0x03 RAPL_DOMAIN_DRAM
  1314. * energy_gpu 0x04 RAPL_DOMAIN_PP1
  1315. * energy_psys 0x05 RAPL_DOMAIN_PLATFORM
  1316. *
  1317. * Unit:
  1318. * Joules
  1319. *
  1320. * Scale:
  1321. * 2.3283064365386962890625e-10
  1322. * The same RAPL domain in different RAPL Packages may have different
  1323. * energy units. Use 2.3283064365386962890625e-10 (2^-32) Joules as
  1324. * the fixed unit for all energy counters, and covert each hardware
  1325. * counter increase to N times of PMU event counter increases.
  1326. *
  1327. * This is fully compatible with the current MSR RAPL PMU. This means that
  1328. * userspace programs like turbostat can use the same code to handle RAPL Perf
  1329. * PMU, no matter what RAPL Interface driver (MSR/TPMI, etc) is running
  1330. * underlying on the platform.
  1331. *
  1332. * Note that RAPL Packages can be probed/removed dynamically, and the events
  1333. * supported by each TPMI RAPL device can be different. Thus the RAPL PMU
  1334. * support is done on demand, which means
  1335. * 1. PMU is registered only if it is needed by a RAPL Package. PMU events for
  1336. * unsupported counters are not exposed.
  1337. * 2. PMU is unregistered and registered when a new RAPL Package is probed and
  1338. * supports new counters that are not supported by current PMU.
  1339. * 3. PMU is unregistered when all registered RAPL Packages don't need PMU.
  1340. */
  1341. struct rapl_pmu {
  1342. struct pmu pmu; /* Perf PMU structure */
  1343. u64 timer_ms; /* Maximum expiration time to avoid counter overflow */
  1344. unsigned long domain_map; /* Events supported by current registered PMU */
  1345. bool registered; /* Whether the PMU has been registered or not */
  1346. };
  1347. static struct rapl_pmu rapl_pmu;
  1348. /* PMU helpers */
  1349. static void set_pmu_cpumask(struct rapl_package *rp, cpumask_var_t mask)
  1350. {
  1351. int cpu;
  1352. if (!rp->has_pmu)
  1353. return;
  1354. /* Only TPMI & MSR RAPL are supported for now */
  1355. if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
  1356. return;
  1357. /* TPMI/MSR RAPL uses any CPU in the package for PMU */
  1358. for_each_online_cpu(cpu)
  1359. if (topology_physical_package_id(cpu) == rp->id)
  1360. cpumask_set_cpu(cpu, mask);
  1361. }
  1362. static bool is_rp_pmu_cpu(struct rapl_package *rp, int cpu)
  1363. {
  1364. if (!rp->has_pmu)
  1365. return false;
  1366. /* Only TPMI & MSR RAPL are supported for now */
  1367. if (rp->priv->type != RAPL_IF_TPMI && rp->priv->type != RAPL_IF_MSR)
  1368. return false;
  1369. /* TPMI/MSR RAPL uses any CPU in the package for PMU */
  1370. return topology_physical_package_id(cpu) == rp->id;
  1371. }
  1372. static struct rapl_package_pmu_data *event_to_pmu_data(struct perf_event *event)
  1373. {
  1374. struct rapl_package *rp = event->pmu_private;
  1375. return &rp->pmu_data;
  1376. }
  1377. /* PMU event callbacks */
  1378. static u64 event_read_counter(struct perf_event *event)
  1379. {
  1380. struct rapl_package *rp = event->pmu_private;
  1381. u64 val;
  1382. int ret;
  1383. /* Return 0 for unsupported events */
  1384. if (event->hw.idx < 0)
  1385. return 0;
  1386. ret = rapl_read_data_raw(&rp->domains[event->hw.idx], ENERGY_COUNTER, false, &val, true);
  1387. /* Return 0 for failed read */
  1388. if (ret)
  1389. return 0;
  1390. return val;
  1391. }
  1392. static void __rapl_pmu_event_start(struct perf_event *event)
  1393. {
  1394. struct rapl_package_pmu_data *data = event_to_pmu_data(event);
  1395. if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED)))
  1396. return;
  1397. event->hw.state = 0;
  1398. list_add_tail(&event->active_entry, &data->active_list);
  1399. local64_set(&event->hw.prev_count, event_read_counter(event));
  1400. if (++data->n_active == 1)
  1401. hrtimer_start(&data->hrtimer, data->timer_interval,
  1402. HRTIMER_MODE_REL_PINNED);
  1403. }
  1404. static void rapl_pmu_event_start(struct perf_event *event, int mode)
  1405. {
  1406. struct rapl_package_pmu_data *data = event_to_pmu_data(event);
  1407. unsigned long flags;
  1408. raw_spin_lock_irqsave(&data->lock, flags);
  1409. __rapl_pmu_event_start(event);
  1410. raw_spin_unlock_irqrestore(&data->lock, flags);
  1411. }
  1412. static u64 rapl_event_update(struct perf_event *event)
  1413. {
  1414. struct hw_perf_event *hwc = &event->hw;
  1415. struct rapl_package_pmu_data *data = event_to_pmu_data(event);
  1416. u64 prev_raw_count, new_raw_count;
  1417. s64 delta, sdelta;
  1418. /*
  1419. * Follow the generic code to drain hwc->prev_count.
  1420. * The loop is not expected to run for multiple times.
  1421. */
  1422. prev_raw_count = local64_read(&hwc->prev_count);
  1423. do {
  1424. new_raw_count = event_read_counter(event);
  1425. } while (!local64_try_cmpxchg(&hwc->prev_count,
  1426. &prev_raw_count, new_raw_count));
  1427. /*
  1428. * Now we have the new raw value and have updated the prev
  1429. * timestamp already. We can now calculate the elapsed delta
  1430. * (event-)time and add that to the generic event.
  1431. */
  1432. delta = new_raw_count - prev_raw_count;
  1433. /*
  1434. * Scale delta to smallest unit (2^-32)
  1435. * users must then scale back: count * 1/(1e9*2^32) to get Joules
  1436. * or use ldexp(count, -32).
  1437. * Watts = Joules/Time delta
  1438. */
  1439. sdelta = delta * data->scale[event->hw.flags];
  1440. local64_add(sdelta, &event->count);
  1441. return new_raw_count;
  1442. }
  1443. static void rapl_pmu_event_stop(struct perf_event *event, int mode)
  1444. {
  1445. struct rapl_package_pmu_data *data = event_to_pmu_data(event);
  1446. struct hw_perf_event *hwc = &event->hw;
  1447. unsigned long flags;
  1448. raw_spin_lock_irqsave(&data->lock, flags);
  1449. /* Mark event as deactivated and stopped */
  1450. if (!(hwc->state & PERF_HES_STOPPED)) {
  1451. WARN_ON_ONCE(data->n_active <= 0);
  1452. if (--data->n_active == 0)
  1453. hrtimer_cancel(&data->hrtimer);
  1454. list_del(&event->active_entry);
  1455. WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED);
  1456. hwc->state |= PERF_HES_STOPPED;
  1457. }
  1458. /* Check if update of sw counter is necessary */
  1459. if ((mode & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
  1460. /*
  1461. * Drain the remaining delta count out of a event
  1462. * that we are disabling:
  1463. */
  1464. rapl_event_update(event);
  1465. hwc->state |= PERF_HES_UPTODATE;
  1466. }
  1467. raw_spin_unlock_irqrestore(&data->lock, flags);
  1468. }
  1469. static int rapl_pmu_event_add(struct perf_event *event, int mode)
  1470. {
  1471. struct rapl_package_pmu_data *data = event_to_pmu_data(event);
  1472. struct hw_perf_event *hwc = &event->hw;
  1473. unsigned long flags;
  1474. raw_spin_lock_irqsave(&data->lock, flags);
  1475. hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
  1476. if (mode & PERF_EF_START)
  1477. __rapl_pmu_event_start(event);
  1478. raw_spin_unlock_irqrestore(&data->lock, flags);
  1479. return 0;
  1480. }
  1481. static void rapl_pmu_event_del(struct perf_event *event, int flags)
  1482. {
  1483. rapl_pmu_event_stop(event, PERF_EF_UPDATE);
  1484. }
  1485. /* RAPL PMU event ids, same as shown in sysfs */
  1486. enum perf_rapl_events {
  1487. PERF_RAPL_PP0 = 1, /* all cores */
  1488. PERF_RAPL_PKG, /* entire package */
  1489. PERF_RAPL_RAM, /* DRAM */
  1490. PERF_RAPL_PP1, /* gpu */
  1491. PERF_RAPL_PSYS, /* psys */
  1492. PERF_RAPL_MAX
  1493. };
  1494. #define RAPL_EVENT_MASK GENMASK(7, 0)
  1495. static const int event_to_domain[PERF_RAPL_MAX] = {
  1496. [PERF_RAPL_PP0] = RAPL_DOMAIN_PP0,
  1497. [PERF_RAPL_PKG] = RAPL_DOMAIN_PACKAGE,
  1498. [PERF_RAPL_RAM] = RAPL_DOMAIN_DRAM,
  1499. [PERF_RAPL_PP1] = RAPL_DOMAIN_PP1,
  1500. [PERF_RAPL_PSYS] = RAPL_DOMAIN_PLATFORM,
  1501. };
  1502. static int rapl_pmu_event_init(struct perf_event *event)
  1503. {
  1504. struct rapl_package *pos, *rp = NULL;
  1505. u64 cfg = event->attr.config & RAPL_EVENT_MASK;
  1506. int domain, idx;
  1507. /* Only look at RAPL events */
  1508. if (event->attr.type != event->pmu->type)
  1509. return -ENOENT;
  1510. /* Check for supported events only */
  1511. if (!cfg || cfg >= PERF_RAPL_MAX)
  1512. return -EINVAL;
  1513. if (event->cpu < 0)
  1514. return -EINVAL;
  1515. /* Find out which Package the event belongs to */
  1516. list_for_each_entry(pos, &rapl_packages, plist) {
  1517. if (is_rp_pmu_cpu(pos, event->cpu)) {
  1518. rp = pos;
  1519. break;
  1520. }
  1521. }
  1522. if (!rp)
  1523. return -ENODEV;
  1524. /* Find out which RAPL Domain the event belongs to */
  1525. domain = event_to_domain[cfg];
  1526. event->event_caps |= PERF_EV_CAP_READ_ACTIVE_PKG;
  1527. event->pmu_private = rp; /* Which package */
  1528. event->hw.flags = domain; /* Which domain */
  1529. event->hw.idx = -1;
  1530. /* Find out the index in rp->domains[] to get domain pointer */
  1531. for (idx = 0; idx < rp->nr_domains; idx++) {
  1532. if (rp->domains[idx].id == domain) {
  1533. event->hw.idx = idx;
  1534. break;
  1535. }
  1536. }
  1537. return 0;
  1538. }
  1539. static void rapl_pmu_event_read(struct perf_event *event)
  1540. {
  1541. rapl_event_update(event);
  1542. }
  1543. static enum hrtimer_restart rapl_hrtimer_handle(struct hrtimer *hrtimer)
  1544. {
  1545. struct rapl_package_pmu_data *data =
  1546. container_of(hrtimer, struct rapl_package_pmu_data, hrtimer);
  1547. struct perf_event *event;
  1548. unsigned long flags;
  1549. if (!data->n_active)
  1550. return HRTIMER_NORESTART;
  1551. raw_spin_lock_irqsave(&data->lock, flags);
  1552. list_for_each_entry(event, &data->active_list, active_entry)
  1553. rapl_event_update(event);
  1554. raw_spin_unlock_irqrestore(&data->lock, flags);
  1555. hrtimer_forward_now(hrtimer, data->timer_interval);
  1556. return HRTIMER_RESTART;
  1557. }
  1558. /* PMU sysfs attributes */
  1559. /*
  1560. * There are no default events, but we need to create "events" group (with
  1561. * empty attrs) before updating it with detected events.
  1562. */
  1563. static struct attribute *attrs_empty[] = {
  1564. NULL,
  1565. };
  1566. static struct attribute_group pmu_events_group = {
  1567. .name = "events",
  1568. .attrs = attrs_empty,
  1569. };
  1570. static ssize_t cpumask_show(struct device *dev,
  1571. struct device_attribute *attr, char *buf)
  1572. {
  1573. struct rapl_package *rp;
  1574. cpumask_var_t cpu_mask;
  1575. int ret;
  1576. if (!alloc_cpumask_var(&cpu_mask, GFP_KERNEL))
  1577. return -ENOMEM;
  1578. cpus_read_lock();
  1579. cpumask_clear(cpu_mask);
  1580. /* Choose a cpu for each RAPL Package */
  1581. list_for_each_entry(rp, &rapl_packages, plist) {
  1582. set_pmu_cpumask(rp, cpu_mask);
  1583. }
  1584. cpus_read_unlock();
  1585. ret = cpumap_print_to_pagebuf(true, buf, cpu_mask);
  1586. free_cpumask_var(cpu_mask);
  1587. return ret;
  1588. }
  1589. static DEVICE_ATTR_RO(cpumask);
  1590. static struct attribute *pmu_cpumask_attrs[] = {
  1591. &dev_attr_cpumask.attr,
  1592. NULL
  1593. };
  1594. static struct attribute_group pmu_cpumask_group = {
  1595. .attrs = pmu_cpumask_attrs,
  1596. };
  1597. PMU_FORMAT_ATTR(event, "config:0-7");
  1598. static struct attribute *pmu_format_attr[] = {
  1599. &format_attr_event.attr,
  1600. NULL
  1601. };
  1602. static struct attribute_group pmu_format_group = {
  1603. .name = "format",
  1604. .attrs = pmu_format_attr,
  1605. };
  1606. static const struct attribute_group *pmu_attr_groups[] = {
  1607. &pmu_events_group,
  1608. &pmu_cpumask_group,
  1609. &pmu_format_group,
  1610. NULL
  1611. };
  1612. #define RAPL_EVENT_ATTR_STR(_name, v, str) \
  1613. static struct perf_pmu_events_attr event_attr_##v = { \
  1614. .attr = __ATTR(_name, 0444, perf_event_sysfs_show, NULL), \
  1615. .event_str = str, \
  1616. }
  1617. RAPL_EVENT_ATTR_STR(energy-cores, rapl_cores, "event=0x01");
  1618. RAPL_EVENT_ATTR_STR(energy-pkg, rapl_pkg, "event=0x02");
  1619. RAPL_EVENT_ATTR_STR(energy-ram, rapl_ram, "event=0x03");
  1620. RAPL_EVENT_ATTR_STR(energy-gpu, rapl_gpu, "event=0x04");
  1621. RAPL_EVENT_ATTR_STR(energy-psys, rapl_psys, "event=0x05");
  1622. RAPL_EVENT_ATTR_STR(energy-cores.unit, rapl_unit_cores, "Joules");
  1623. RAPL_EVENT_ATTR_STR(energy-pkg.unit, rapl_unit_pkg, "Joules");
  1624. RAPL_EVENT_ATTR_STR(energy-ram.unit, rapl_unit_ram, "Joules");
  1625. RAPL_EVENT_ATTR_STR(energy-gpu.unit, rapl_unit_gpu, "Joules");
  1626. RAPL_EVENT_ATTR_STR(energy-psys.unit, rapl_unit_psys, "Joules");
  1627. RAPL_EVENT_ATTR_STR(energy-cores.scale, rapl_scale_cores, "2.3283064365386962890625e-10");
  1628. RAPL_EVENT_ATTR_STR(energy-pkg.scale, rapl_scale_pkg, "2.3283064365386962890625e-10");
  1629. RAPL_EVENT_ATTR_STR(energy-ram.scale, rapl_scale_ram, "2.3283064365386962890625e-10");
  1630. RAPL_EVENT_ATTR_STR(energy-gpu.scale, rapl_scale_gpu, "2.3283064365386962890625e-10");
  1631. RAPL_EVENT_ATTR_STR(energy-psys.scale, rapl_scale_psys, "2.3283064365386962890625e-10");
  1632. #define RAPL_EVENT_GROUP(_name, domain) \
  1633. static struct attribute *pmu_attr_##_name[] = { \
  1634. &event_attr_rapl_##_name.attr.attr, \
  1635. &event_attr_rapl_unit_##_name.attr.attr, \
  1636. &event_attr_rapl_scale_##_name.attr.attr, \
  1637. NULL \
  1638. }; \
  1639. static umode_t is_visible_##_name(struct kobject *kobj, struct attribute *attr, int event) \
  1640. { \
  1641. return rapl_pmu.domain_map & BIT(domain) ? attr->mode : 0; \
  1642. } \
  1643. static struct attribute_group pmu_group_##_name = { \
  1644. .name = "events", \
  1645. .attrs = pmu_attr_##_name, \
  1646. .is_visible = is_visible_##_name, \
  1647. }
  1648. RAPL_EVENT_GROUP(cores, RAPL_DOMAIN_PP0);
  1649. RAPL_EVENT_GROUP(pkg, RAPL_DOMAIN_PACKAGE);
  1650. RAPL_EVENT_GROUP(ram, RAPL_DOMAIN_DRAM);
  1651. RAPL_EVENT_GROUP(gpu, RAPL_DOMAIN_PP1);
  1652. RAPL_EVENT_GROUP(psys, RAPL_DOMAIN_PLATFORM);
  1653. static const struct attribute_group *pmu_attr_update[] = {
  1654. &pmu_group_cores,
  1655. &pmu_group_pkg,
  1656. &pmu_group_ram,
  1657. &pmu_group_gpu,
  1658. &pmu_group_psys,
  1659. NULL
  1660. };
  1661. static int rapl_pmu_update(struct rapl_package *rp)
  1662. {
  1663. int ret = 0;
  1664. /* Return if PMU already covers all events supported by current RAPL Package */
  1665. if (rapl_pmu.registered && !(rp->domain_map & (~rapl_pmu.domain_map)))
  1666. goto end;
  1667. /* Unregister previous registered PMU */
  1668. if (rapl_pmu.registered)
  1669. perf_pmu_unregister(&rapl_pmu.pmu);
  1670. rapl_pmu.registered = false;
  1671. rapl_pmu.domain_map |= rp->domain_map;
  1672. memset(&rapl_pmu.pmu, 0, sizeof(struct pmu));
  1673. rapl_pmu.pmu.attr_groups = pmu_attr_groups;
  1674. rapl_pmu.pmu.attr_update = pmu_attr_update;
  1675. rapl_pmu.pmu.task_ctx_nr = perf_invalid_context;
  1676. rapl_pmu.pmu.event_init = rapl_pmu_event_init;
  1677. rapl_pmu.pmu.add = rapl_pmu_event_add;
  1678. rapl_pmu.pmu.del = rapl_pmu_event_del;
  1679. rapl_pmu.pmu.start = rapl_pmu_event_start;
  1680. rapl_pmu.pmu.stop = rapl_pmu_event_stop;
  1681. rapl_pmu.pmu.read = rapl_pmu_event_read;
  1682. rapl_pmu.pmu.module = THIS_MODULE;
  1683. rapl_pmu.pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT;
  1684. ret = perf_pmu_register(&rapl_pmu.pmu, "power", -1);
  1685. if (ret) {
  1686. pr_info("Failed to register PMU\n");
  1687. return ret;
  1688. }
  1689. rapl_pmu.registered = true;
  1690. end:
  1691. rp->has_pmu = true;
  1692. return ret;
  1693. }
  1694. int rapl_package_add_pmu_locked(struct rapl_package *rp)
  1695. {
  1696. struct rapl_package_pmu_data *data = &rp->pmu_data;
  1697. int idx;
  1698. if (rp->has_pmu)
  1699. return -EEXIST;
  1700. for (idx = 0; idx < rp->nr_domains; idx++) {
  1701. struct rapl_domain *rd = &rp->domains[idx];
  1702. int domain = rd->id;
  1703. u64 val;
  1704. if (!test_bit(domain, &rp->domain_map))
  1705. continue;
  1706. /*
  1707. * The RAPL PMU granularity is 2^-32 Joules
  1708. * data->scale[]: times of 2^-32 Joules for each ENERGY COUNTER increase
  1709. */
  1710. val = rd->energy_unit * (1ULL << 32);
  1711. do_div(val, ENERGY_UNIT_SCALE * 1000000);
  1712. data->scale[domain] = val;
  1713. if (!rapl_pmu.timer_ms) {
  1714. struct rapl_primitive_info *rpi = get_rpi(rp, ENERGY_COUNTER);
  1715. /*
  1716. * Calculate the timer rate:
  1717. * Use reference of 200W for scaling the timeout to avoid counter
  1718. * overflows.
  1719. *
  1720. * max_count = rpi->mask >> rpi->shift + 1
  1721. * max_energy_pj = max_count * rd->energy_unit
  1722. * max_time_sec = (max_energy_pj / 1000000000) / 200w
  1723. *
  1724. * rapl_pmu.timer_ms = max_time_sec * 1000 / 2
  1725. */
  1726. val = (rpi->mask >> rpi->shift) + 1;
  1727. val *= rd->energy_unit;
  1728. do_div(val, 1000000 * 200 * 2);
  1729. rapl_pmu.timer_ms = val;
  1730. pr_debug("%llu ms overflow timer\n", rapl_pmu.timer_ms);
  1731. }
  1732. pr_debug("Domain %s: hw unit %lld * 2^-32 Joules\n", rd->name, data->scale[domain]);
  1733. }
  1734. /* Initialize per package PMU data */
  1735. raw_spin_lock_init(&data->lock);
  1736. INIT_LIST_HEAD(&data->active_list);
  1737. data->timer_interval = ms_to_ktime(rapl_pmu.timer_ms);
  1738. hrtimer_setup(&data->hrtimer, rapl_hrtimer_handle, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
  1739. return rapl_pmu_update(rp);
  1740. }
  1741. EXPORT_SYMBOL_GPL(rapl_package_add_pmu_locked);
  1742. int rapl_package_add_pmu(struct rapl_package *rp)
  1743. {
  1744. guard(cpus_read_lock)();
  1745. return rapl_package_add_pmu_locked(rp);
  1746. }
  1747. EXPORT_SYMBOL_GPL(rapl_package_add_pmu);
  1748. void rapl_package_remove_pmu_locked(struct rapl_package *rp)
  1749. {
  1750. struct rapl_package *pos;
  1751. if (!rp->has_pmu)
  1752. return;
  1753. list_for_each_entry(pos, &rapl_packages, plist) {
  1754. /* PMU is still needed */
  1755. if (pos->has_pmu && pos != rp)
  1756. return;
  1757. }
  1758. perf_pmu_unregister(&rapl_pmu.pmu);
  1759. memset(&rapl_pmu, 0, sizeof(struct rapl_pmu));
  1760. }
  1761. EXPORT_SYMBOL_GPL(rapl_package_remove_pmu_locked);
  1762. void rapl_package_remove_pmu(struct rapl_package *rp)
  1763. {
  1764. guard(cpus_read_lock)();
  1765. rapl_package_remove_pmu_locked(rp);
  1766. }
  1767. EXPORT_SYMBOL_GPL(rapl_package_remove_pmu);
  1768. #endif
  1769. /* called from CPU hotplug notifier, hotplug lock held */
  1770. void rapl_remove_package_cpuslocked(struct rapl_package *rp)
  1771. {
  1772. struct rapl_domain *rd, *rd_package = NULL;
  1773. package_power_limit_irq_restore(rp);
  1774. for (rd = rp->domains; rd < rp->domains + rp->nr_domains; rd++) {
  1775. int i;
  1776. for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
  1777. rapl_write_pl_data(rd, i, PL_ENABLE, 0);
  1778. rapl_write_pl_data(rd, i, PL_CLAMP, 0);
  1779. }
  1780. if (rd->id == RAPL_DOMAIN_PACKAGE) {
  1781. rd_package = rd;
  1782. continue;
  1783. }
  1784. pr_debug("remove package, undo power limit on %s: %s\n",
  1785. rp->name, rd->name);
  1786. powercap_unregister_zone(rp->priv->control_type,
  1787. &rd->power_zone);
  1788. }
  1789. /* do parent zone last */
  1790. powercap_unregister_zone(rp->priv->control_type,
  1791. &rd_package->power_zone);
  1792. list_del(&rp->plist);
  1793. kfree(rp);
  1794. }
  1795. EXPORT_SYMBOL_GPL(rapl_remove_package_cpuslocked);
  1796. void rapl_remove_package(struct rapl_package *rp)
  1797. {
  1798. guard(cpus_read_lock)();
  1799. rapl_remove_package_cpuslocked(rp);
  1800. }
  1801. EXPORT_SYMBOL_GPL(rapl_remove_package);
  1802. /*
  1803. * RAPL Package energy counter scope:
  1804. * 1. AMD/HYGON platforms use per-PKG package energy counter
  1805. * 2. For Intel platforms
  1806. * 2.1 CLX-AP platform has per-DIE package energy counter
  1807. * 2.2 Other platforms that uses MSR RAPL are single die systems so the
  1808. * package energy counter can be considered as per-PKG/per-DIE,
  1809. * here it is considered as per-DIE.
  1810. * 2.3 New platforms that use TPMI RAPL doesn't care about the
  1811. * scope because they are not MSR/CPU based.
  1812. */
  1813. #define rapl_msrs_are_pkg_scope() \
  1814. (boot_cpu_data.x86_vendor == X86_VENDOR_AMD || \
  1815. boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)
  1816. /* caller to ensure CPU hotplug lock is held */
  1817. struct rapl_package *rapl_find_package_domain_cpuslocked(int id, struct rapl_if_priv *priv,
  1818. bool id_is_cpu)
  1819. {
  1820. struct rapl_package *rp;
  1821. int uid;
  1822. if (id_is_cpu) {
  1823. uid = rapl_msrs_are_pkg_scope() ?
  1824. topology_physical_package_id(id) : topology_logical_die_id(id);
  1825. if (uid < 0) {
  1826. pr_err("topology_logical_(package/die)_id() returned a negative value");
  1827. return NULL;
  1828. }
  1829. }
  1830. else
  1831. uid = id;
  1832. list_for_each_entry(rp, &rapl_packages, plist) {
  1833. if (rp->id == uid
  1834. && rp->priv->control_type == priv->control_type)
  1835. return rp;
  1836. }
  1837. return NULL;
  1838. }
  1839. EXPORT_SYMBOL_GPL(rapl_find_package_domain_cpuslocked);
  1840. struct rapl_package *rapl_find_package_domain(int id, struct rapl_if_priv *priv, bool id_is_cpu)
  1841. {
  1842. guard(cpus_read_lock)();
  1843. return rapl_find_package_domain_cpuslocked(id, priv, id_is_cpu);
  1844. }
  1845. EXPORT_SYMBOL_GPL(rapl_find_package_domain);
  1846. /* called from CPU hotplug notifier, hotplug lock held */
  1847. struct rapl_package *rapl_add_package_cpuslocked(int id, struct rapl_if_priv *priv, bool id_is_cpu)
  1848. {
  1849. struct rapl_package *rp;
  1850. int ret;
  1851. rp = kzalloc_obj(struct rapl_package);
  1852. if (!rp)
  1853. return ERR_PTR(-ENOMEM);
  1854. if (id_is_cpu) {
  1855. rp->id = rapl_msrs_are_pkg_scope() ?
  1856. topology_physical_package_id(id) : topology_logical_die_id(id);
  1857. if ((int)(rp->id) < 0) {
  1858. pr_err("topology_logical_(package/die)_id() returned a negative value");
  1859. return ERR_PTR(-EINVAL);
  1860. }
  1861. rp->lead_cpu = id;
  1862. if (!rapl_msrs_are_pkg_scope() && topology_max_dies_per_package() > 1)
  1863. snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d-die-%d",
  1864. topology_physical_package_id(id), topology_die_id(id));
  1865. else
  1866. snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d",
  1867. topology_physical_package_id(id));
  1868. } else {
  1869. rp->id = id;
  1870. rp->lead_cpu = -1;
  1871. snprintf(rp->name, PACKAGE_DOMAIN_NAME_LENGTH, "package-%d", id);
  1872. }
  1873. rp->priv = priv;
  1874. ret = rapl_config(rp);
  1875. if (ret)
  1876. goto err_free_package;
  1877. /* check if the package contains valid domains */
  1878. if (rapl_detect_domains(rp)) {
  1879. ret = -ENODEV;
  1880. goto err_free_package;
  1881. }
  1882. ret = rapl_package_register_powercap(rp);
  1883. if (!ret) {
  1884. INIT_LIST_HEAD(&rp->plist);
  1885. list_add(&rp->plist, &rapl_packages);
  1886. return rp;
  1887. }
  1888. err_free_package:
  1889. kfree(rp->domains);
  1890. kfree(rp);
  1891. return ERR_PTR(ret);
  1892. }
  1893. EXPORT_SYMBOL_GPL(rapl_add_package_cpuslocked);
  1894. struct rapl_package *rapl_add_package(int id, struct rapl_if_priv *priv, bool id_is_cpu)
  1895. {
  1896. guard(cpus_read_lock)();
  1897. return rapl_add_package_cpuslocked(id, priv, id_is_cpu);
  1898. }
  1899. EXPORT_SYMBOL_GPL(rapl_add_package);
  1900. static void power_limit_state_save(void)
  1901. {
  1902. struct rapl_package *rp;
  1903. struct rapl_domain *rd;
  1904. int ret, i;
  1905. cpus_read_lock();
  1906. list_for_each_entry(rp, &rapl_packages, plist) {
  1907. if (!rp->power_zone)
  1908. continue;
  1909. rd = power_zone_to_rapl_domain(rp->power_zone);
  1910. for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++) {
  1911. ret = rapl_read_pl_data(rd, i, PL_LIMIT, true,
  1912. &rd->rpl[i].last_power_limit);
  1913. if (ret)
  1914. rd->rpl[i].last_power_limit = 0;
  1915. }
  1916. }
  1917. cpus_read_unlock();
  1918. }
  1919. static void power_limit_state_restore(void)
  1920. {
  1921. struct rapl_package *rp;
  1922. struct rapl_domain *rd;
  1923. int i;
  1924. cpus_read_lock();
  1925. list_for_each_entry(rp, &rapl_packages, plist) {
  1926. if (!rp->power_zone)
  1927. continue;
  1928. rd = power_zone_to_rapl_domain(rp->power_zone);
  1929. for (i = POWER_LIMIT1; i < NR_POWER_LIMITS; i++)
  1930. if (rd->rpl[i].last_power_limit)
  1931. rapl_write_pl_data(rd, i, PL_LIMIT,
  1932. rd->rpl[i].last_power_limit);
  1933. }
  1934. cpus_read_unlock();
  1935. }
  1936. static int rapl_pm_callback(struct notifier_block *nb,
  1937. unsigned long mode, void *_unused)
  1938. {
  1939. switch (mode) {
  1940. case PM_SUSPEND_PREPARE:
  1941. power_limit_state_save();
  1942. break;
  1943. case PM_POST_SUSPEND:
  1944. power_limit_state_restore();
  1945. break;
  1946. }
  1947. return NOTIFY_OK;
  1948. }
  1949. static struct notifier_block rapl_pm_notifier = {
  1950. .notifier_call = rapl_pm_callback,
  1951. };
  1952. static struct platform_device *rapl_msr_platdev;
  1953. static int __init rapl_init(void)
  1954. {
  1955. const struct x86_cpu_id *id;
  1956. int ret;
  1957. id = x86_match_cpu(rapl_ids);
  1958. if (id) {
  1959. defaults_msr = (struct rapl_defaults *)id->driver_data;
  1960. rapl_msr_platdev = platform_device_alloc("intel_rapl_msr", 0);
  1961. if (!rapl_msr_platdev)
  1962. return -ENOMEM;
  1963. ret = platform_device_add(rapl_msr_platdev);
  1964. if (ret) {
  1965. platform_device_put(rapl_msr_platdev);
  1966. return ret;
  1967. }
  1968. }
  1969. ret = register_pm_notifier(&rapl_pm_notifier);
  1970. if (ret && rapl_msr_platdev) {
  1971. platform_device_del(rapl_msr_platdev);
  1972. platform_device_put(rapl_msr_platdev);
  1973. }
  1974. return ret;
  1975. }
  1976. static void __exit rapl_exit(void)
  1977. {
  1978. platform_device_unregister(rapl_msr_platdev);
  1979. unregister_pm_notifier(&rapl_pm_notifier);
  1980. }
  1981. fs_initcall(rapl_init);
  1982. module_exit(rapl_exit);
  1983. MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
  1984. MODULE_AUTHOR("Jacob Pan <jacob.jun.pan@intel.com>");
  1985. MODULE_LICENSE("GPL v2");