therm_throt.c 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Thermal throttle event support code (such as syslog messaging and rate
  4. * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
  5. *
  6. * This allows consistent reporting of CPU thermal throttle events.
  7. *
  8. * Maintains a counter in /sys that keeps track of the number of thermal
  9. * events, such that the user knows how bad the thermal problem might be
  10. * (since the logging to syslog is rate limited).
  11. *
  12. * Author: Dmitriy Zavin (dmitriyz@google.com)
  13. *
  14. * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
  15. * Inspired by Ross Biro's and Al Borchers' counter code.
  16. */
  17. #include <linux/interrupt.h>
  18. #include <linux/notifier.h>
  19. #include <linux/jiffies.h>
  20. #include <linux/kernel.h>
  21. #include <linux/percpu.h>
  22. #include <linux/export.h>
  23. #include <linux/types.h>
  24. #include <linux/init.h>
  25. #include <linux/smp.h>
  26. #include <linux/sysfs.h>
  27. #include <linux/cpu.h>
  28. #include <asm/processor.h>
  29. #include <asm/thermal.h>
  30. #include <asm/traps.h>
  31. #include <asm/apic.h>
  32. #include <asm/irq.h>
  33. #include <asm/msr.h>
  34. #include "intel_hfi.h"
  35. #include "thermal_interrupt.h"
  36. /* How long to wait between reporting thermal events */
  37. #define CHECK_INTERVAL (300 * HZ)
  38. #define THERMAL_THROTTLING_EVENT 0
  39. #define POWER_LIMIT_EVENT 1
  40. /**
  41. * struct _thermal_state - Represent the current thermal event state
  42. * @next_check: Stores the next timestamp, when it is allowed
  43. * to log the next warning message.
  44. * @last_interrupt_time: Stores the timestamp for the last threshold
  45. * high event.
  46. * @therm_work: Delayed workqueue structure
  47. * @count: Stores the current running count for thermal
  48. * or power threshold interrupts.
  49. * @last_count: Stores the previous running count for thermal
  50. * or power threshold interrupts.
  51. * @max_time_ms: This shows the maximum amount of time CPU was
  52. * in throttled state for a single thermal
  53. * threshold high to low state.
  54. * @total_time_ms: This is a cumulative time during which CPU was
  55. * in the throttled state.
  56. * @rate_control_active: Set when a throttling message is logged.
  57. * This is used for the purpose of rate-control.
  58. * @new_event: Stores the last high/low status of the
  59. * THERM_STATUS_PROCHOT or
  60. * THERM_STATUS_POWER_LIMIT.
  61. * @level: Stores whether this _thermal_state instance is
  62. * for a CORE level or for PACKAGE level.
  63. * @sample_index: Index for storing the next sample in the buffer
  64. * temp_samples[].
  65. * @sample_count: Total number of samples collected in the buffer
  66. * temp_samples[].
  67. * @average: The last moving average of temperature samples
  68. * @baseline_temp: Temperature at which thermal threshold high
  69. * interrupt was generated.
  70. * @temp_samples: Storage for temperature samples to calculate
  71. * moving average.
  72. *
  73. * This structure is used to represent data related to thermal state for a CPU.
  74. * There is a separate storage for core and package level for each CPU.
  75. */
  76. struct _thermal_state {
  77. u64 next_check;
  78. u64 last_interrupt_time;
  79. struct delayed_work therm_work;
  80. unsigned long count;
  81. unsigned long last_count;
  82. unsigned long max_time_ms;
  83. unsigned long total_time_ms;
  84. bool rate_control_active;
  85. bool new_event;
  86. u8 level;
  87. u8 sample_index;
  88. u8 sample_count;
  89. u8 average;
  90. u8 baseline_temp;
  91. u8 temp_samples[3];
  92. };
  93. struct thermal_state {
  94. struct _thermal_state core_throttle;
  95. struct _thermal_state core_power_limit;
  96. struct _thermal_state package_throttle;
  97. struct _thermal_state package_power_limit;
  98. struct _thermal_state core_thresh0;
  99. struct _thermal_state core_thresh1;
  100. struct _thermal_state pkg_thresh0;
  101. struct _thermal_state pkg_thresh1;
  102. };
  103. /* Callback to handle core threshold interrupts */
  104. int (*platform_thermal_notify)(__u64 msr_val);
  105. EXPORT_SYMBOL(platform_thermal_notify);
  106. /* Callback to handle core package threshold_interrupts */
  107. int (*platform_thermal_package_notify)(__u64 msr_val);
  108. EXPORT_SYMBOL_GPL(platform_thermal_package_notify);
  109. /* Callback support of rate control, return true, if
  110. * callback has rate control */
  111. bool (*platform_thermal_package_rate_control)(void);
  112. EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control);
  113. static DEFINE_PER_CPU(struct thermal_state, thermal_state);
  114. static atomic_t therm_throt_en = ATOMIC_INIT(0);
  115. static u32 lvtthmr_init __read_mostly;
  116. #ifdef CONFIG_SYSFS
  117. #define define_therm_throt_device_one_ro(_name) \
  118. static DEVICE_ATTR(_name, 0444, \
  119. therm_throt_device_show_##_name, \
  120. NULL) \
  121. #define define_therm_throt_device_show_func(event, name) \
  122. \
  123. static ssize_t therm_throt_device_show_##event##_##name( \
  124. struct device *dev, \
  125. struct device_attribute *attr, \
  126. char *buf) \
  127. { \
  128. unsigned int cpu = dev->id; \
  129. ssize_t ret; \
  130. \
  131. preempt_disable(); /* CPU hotplug */ \
  132. if (cpu_online(cpu)) { \
  133. ret = sysfs_emit(buf, "%lu\n", \
  134. per_cpu(thermal_state, cpu).event.name); \
  135. } else \
  136. ret = 0; \
  137. preempt_enable(); \
  138. \
  139. return ret; \
  140. }
  141. define_therm_throt_device_show_func(core_throttle, count);
  142. define_therm_throt_device_one_ro(core_throttle_count);
  143. define_therm_throt_device_show_func(core_power_limit, count);
  144. define_therm_throt_device_one_ro(core_power_limit_count);
  145. define_therm_throt_device_show_func(package_throttle, count);
  146. define_therm_throt_device_one_ro(package_throttle_count);
  147. define_therm_throt_device_show_func(package_power_limit, count);
  148. define_therm_throt_device_one_ro(package_power_limit_count);
  149. define_therm_throt_device_show_func(core_throttle, max_time_ms);
  150. define_therm_throt_device_one_ro(core_throttle_max_time_ms);
  151. define_therm_throt_device_show_func(package_throttle, max_time_ms);
  152. define_therm_throt_device_one_ro(package_throttle_max_time_ms);
  153. define_therm_throt_device_show_func(core_throttle, total_time_ms);
  154. define_therm_throt_device_one_ro(core_throttle_total_time_ms);
  155. define_therm_throt_device_show_func(package_throttle, total_time_ms);
  156. define_therm_throt_device_one_ro(package_throttle_total_time_ms);
  157. static struct attribute *thermal_throttle_attrs[] = {
  158. &dev_attr_core_throttle_count.attr,
  159. &dev_attr_core_throttle_max_time_ms.attr,
  160. &dev_attr_core_throttle_total_time_ms.attr,
  161. NULL
  162. };
  163. static const struct attribute_group thermal_attr_group = {
  164. .attrs = thermal_throttle_attrs,
  165. .name = "thermal_throttle"
  166. };
  167. #endif /* CONFIG_SYSFS */
  168. #define THERM_THROT_POLL_INTERVAL HZ
  169. #define THERM_STATUS_PROCHOT_LOG BIT(1)
  170. static u64 therm_intr_core_clear_mask;
  171. static u64 therm_intr_pkg_clear_mask;
  172. static void thermal_intr_init_core_clear_mask(void)
  173. {
  174. if (therm_intr_core_clear_mask)
  175. return;
  176. /*
  177. * Reference: Intel SDM Volume 4
  178. * "Table 2-2. IA-32 Architectural MSRs", MSR 0x19C
  179. * IA32_THERM_STATUS.
  180. */
  181. /*
  182. * Bit 1, 3, 5: CPUID.01H:EDX[22] = 1. This driver will not
  183. * enable interrupts, when 0 as it checks for X86_FEATURE_ACPI.
  184. */
  185. therm_intr_core_clear_mask = (BIT(1) | BIT(3) | BIT(5));
  186. /*
  187. * Bit 7 and 9: Thermal Threshold #1 and #2 log
  188. * If CPUID.01H:ECX[8] = 1
  189. */
  190. if (boot_cpu_has(X86_FEATURE_TM2))
  191. therm_intr_core_clear_mask |= (BIT(7) | BIT(9));
  192. /* Bit 11: Power Limitation log (R/WC0) If CPUID.06H:EAX[4] = 1 */
  193. if (boot_cpu_has(X86_FEATURE_PLN))
  194. therm_intr_core_clear_mask |= BIT(11);
  195. /*
  196. * Bit 13: Current Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
  197. * Bit 15: Cross Domain Limit log (R/WC0) If CPUID.06H:EAX[7] = 1
  198. */
  199. if (boot_cpu_has(X86_FEATURE_HWP))
  200. therm_intr_core_clear_mask |= (BIT(13) | BIT(15));
  201. }
  202. static void thermal_intr_init_pkg_clear_mask(void)
  203. {
  204. if (therm_intr_pkg_clear_mask)
  205. return;
  206. /*
  207. * Reference: Intel SDM Volume 4
  208. * "Table 2-2. IA-32 Architectural MSRs", MSR 0x1B1
  209. * IA32_PACKAGE_THERM_STATUS.
  210. */
  211. /* All bits except BIT 26 depend on CPUID.06H: EAX[6] = 1 */
  212. if (boot_cpu_has(X86_FEATURE_PTS))
  213. therm_intr_pkg_clear_mask = (BIT(1) | BIT(3) | BIT(5) | BIT(7) | BIT(9) | BIT(11));
  214. /*
  215. * Intel SDM Volume 2A: Thermal and Power Management Leaf
  216. * Bit 26: CPUID.06H: EAX[19] = 1
  217. */
  218. if (boot_cpu_has(X86_FEATURE_HFI))
  219. therm_intr_pkg_clear_mask |= BIT(26);
  220. }
  221. /*
  222. * Clear the bits in package thermal status register for bit = 1
  223. * in bitmask
  224. */
  225. void thermal_clear_package_intr_status(int level, u64 bit_mask)
  226. {
  227. u64 msr_val;
  228. int msr;
  229. if (level == CORE_LEVEL) {
  230. msr = MSR_IA32_THERM_STATUS;
  231. msr_val = therm_intr_core_clear_mask;
  232. } else {
  233. msr = MSR_IA32_PACKAGE_THERM_STATUS;
  234. msr_val = therm_intr_pkg_clear_mask;
  235. }
  236. msr_val &= ~bit_mask;
  237. wrmsrq(msr, msr_val);
  238. }
  239. EXPORT_SYMBOL_GPL(thermal_clear_package_intr_status);
  240. static void get_therm_status(int level, bool *proc_hot, u8 *temp)
  241. {
  242. int msr;
  243. u64 msr_val;
  244. if (level == CORE_LEVEL)
  245. msr = MSR_IA32_THERM_STATUS;
  246. else
  247. msr = MSR_IA32_PACKAGE_THERM_STATUS;
  248. rdmsrq(msr, msr_val);
  249. if (msr_val & THERM_STATUS_PROCHOT_LOG)
  250. *proc_hot = true;
  251. else
  252. *proc_hot = false;
  253. *temp = (msr_val >> 16) & 0x7F;
  254. }
  255. static void __maybe_unused throttle_active_work(struct work_struct *work)
  256. {
  257. struct _thermal_state *state = container_of(to_delayed_work(work),
  258. struct _thermal_state, therm_work);
  259. unsigned int i, avg, this_cpu = smp_processor_id();
  260. u64 now = get_jiffies_64();
  261. bool hot;
  262. u8 temp;
  263. get_therm_status(state->level, &hot, &temp);
  264. /* temperature value is offset from the max so lesser means hotter */
  265. if (!hot && temp > state->baseline_temp) {
  266. if (state->rate_control_active)
  267. pr_info("CPU%d: %s temperature/speed normal (total events = %lu)\n",
  268. this_cpu,
  269. state->level == CORE_LEVEL ? "Core" : "Package",
  270. state->count);
  271. state->rate_control_active = false;
  272. return;
  273. }
  274. if (time_before64(now, state->next_check) &&
  275. state->rate_control_active)
  276. goto re_arm;
  277. state->next_check = now + CHECK_INTERVAL;
  278. if (state->count != state->last_count) {
  279. /* There was one new thermal interrupt */
  280. state->last_count = state->count;
  281. state->average = 0;
  282. state->sample_count = 0;
  283. state->sample_index = 0;
  284. }
  285. state->temp_samples[state->sample_index] = temp;
  286. state->sample_count++;
  287. state->sample_index = (state->sample_index + 1) % ARRAY_SIZE(state->temp_samples);
  288. if (state->sample_count < ARRAY_SIZE(state->temp_samples))
  289. goto re_arm;
  290. avg = 0;
  291. for (i = 0; i < ARRAY_SIZE(state->temp_samples); ++i)
  292. avg += state->temp_samples[i];
  293. avg /= ARRAY_SIZE(state->temp_samples);
  294. if (state->average > avg) {
  295. pr_warn("CPU%d: %s temperature is above threshold, cpu clock is throttled (total events = %lu)\n",
  296. this_cpu,
  297. state->level == CORE_LEVEL ? "Core" : "Package",
  298. state->count);
  299. state->rate_control_active = true;
  300. }
  301. state->average = avg;
  302. re_arm:
  303. thermal_clear_package_intr_status(state->level, THERM_STATUS_PROCHOT_LOG);
  304. schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
  305. }
  306. /***
  307. * therm_throt_process - Process thermal throttling event from interrupt
  308. * @curr: Whether the condition is current or not (boolean), since the
  309. * thermal interrupt normally gets called both when the thermal
  310. * event begins and once the event has ended.
  311. *
  312. * This function is called by the thermal interrupt after the
  313. * IRQ has been acknowledged.
  314. *
  315. * It will take care of rate limiting and printing messages to the syslog.
  316. */
  317. static void therm_throt_process(bool new_event, int event, int level)
  318. {
  319. struct _thermal_state *state;
  320. unsigned int this_cpu = smp_processor_id();
  321. bool old_event;
  322. u64 now;
  323. struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
  324. now = get_jiffies_64();
  325. if (level == CORE_LEVEL) {
  326. if (event == THERMAL_THROTTLING_EVENT)
  327. state = &pstate->core_throttle;
  328. else if (event == POWER_LIMIT_EVENT)
  329. state = &pstate->core_power_limit;
  330. else
  331. return;
  332. } else if (level == PACKAGE_LEVEL) {
  333. if (event == THERMAL_THROTTLING_EVENT)
  334. state = &pstate->package_throttle;
  335. else if (event == POWER_LIMIT_EVENT)
  336. state = &pstate->package_power_limit;
  337. else
  338. return;
  339. } else
  340. return;
  341. old_event = state->new_event;
  342. state->new_event = new_event;
  343. if (new_event)
  344. state->count++;
  345. if (event != THERMAL_THROTTLING_EVENT)
  346. return;
  347. if (new_event && !state->last_interrupt_time) {
  348. bool hot;
  349. u8 temp;
  350. get_therm_status(state->level, &hot, &temp);
  351. /*
  352. * Ignore short temperature spike as the system is not close
  353. * to PROCHOT. 10C offset is large enough to ignore. It is
  354. * already dropped from the high threshold temperature.
  355. */
  356. if (temp > 10)
  357. return;
  358. state->baseline_temp = temp;
  359. state->last_interrupt_time = now;
  360. schedule_delayed_work_on(this_cpu, &state->therm_work, THERM_THROT_POLL_INTERVAL);
  361. } else if (old_event && state->last_interrupt_time) {
  362. unsigned long throttle_time;
  363. throttle_time = jiffies_delta_to_msecs(now - state->last_interrupt_time);
  364. if (throttle_time > state->max_time_ms)
  365. state->max_time_ms = throttle_time;
  366. state->total_time_ms += throttle_time;
  367. state->last_interrupt_time = 0;
  368. }
  369. }
  370. static int thresh_event_valid(int level, int event)
  371. {
  372. struct _thermal_state *state;
  373. unsigned int this_cpu = smp_processor_id();
  374. struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
  375. u64 now = get_jiffies_64();
  376. if (level == PACKAGE_LEVEL)
  377. state = (event == 0) ? &pstate->pkg_thresh0 :
  378. &pstate->pkg_thresh1;
  379. else
  380. state = (event == 0) ? &pstate->core_thresh0 :
  381. &pstate->core_thresh1;
  382. if (time_before64(now, state->next_check))
  383. return 0;
  384. state->next_check = now + CHECK_INTERVAL;
  385. return 1;
  386. }
  387. static bool int_pln_enable;
  388. static int __init int_pln_enable_setup(char *s)
  389. {
  390. int_pln_enable = true;
  391. return 1;
  392. }
  393. __setup("int_pln_enable", int_pln_enable_setup);
  394. #ifdef CONFIG_SYSFS
  395. /* Add/Remove thermal_throttle interface for CPU device: */
  396. static int thermal_throttle_add_dev(struct device *dev, unsigned int cpu)
  397. {
  398. int err;
  399. struct cpuinfo_x86 *c = &cpu_data(cpu);
  400. err = sysfs_create_group(&dev->kobj, &thermal_attr_group);
  401. if (err)
  402. return err;
  403. if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
  404. err = sysfs_add_file_to_group(&dev->kobj,
  405. &dev_attr_core_power_limit_count.attr,
  406. thermal_attr_group.name);
  407. if (err)
  408. goto del_group;
  409. }
  410. if (cpu_has(c, X86_FEATURE_PTS)) {
  411. err = sysfs_add_file_to_group(&dev->kobj,
  412. &dev_attr_package_throttle_count.attr,
  413. thermal_attr_group.name);
  414. if (err)
  415. goto del_group;
  416. err = sysfs_add_file_to_group(&dev->kobj,
  417. &dev_attr_package_throttle_max_time_ms.attr,
  418. thermal_attr_group.name);
  419. if (err)
  420. goto del_group;
  421. err = sysfs_add_file_to_group(&dev->kobj,
  422. &dev_attr_package_throttle_total_time_ms.attr,
  423. thermal_attr_group.name);
  424. if (err)
  425. goto del_group;
  426. if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable) {
  427. err = sysfs_add_file_to_group(&dev->kobj,
  428. &dev_attr_package_power_limit_count.attr,
  429. thermal_attr_group.name);
  430. if (err)
  431. goto del_group;
  432. }
  433. }
  434. return 0;
  435. del_group:
  436. sysfs_remove_group(&dev->kobj, &thermal_attr_group);
  437. return err;
  438. }
  439. static void thermal_throttle_remove_dev(struct device *dev)
  440. {
  441. sysfs_remove_group(&dev->kobj, &thermal_attr_group);
  442. }
  443. /* Get notified when a cpu comes on/off. Be hotplug friendly. */
  444. static int thermal_throttle_online(unsigned int cpu)
  445. {
  446. struct thermal_state *state = &per_cpu(thermal_state, cpu);
  447. struct device *dev = get_cpu_device(cpu);
  448. u32 l;
  449. state->package_throttle.level = PACKAGE_LEVEL;
  450. state->core_throttle.level = CORE_LEVEL;
  451. INIT_DELAYED_WORK(&state->package_throttle.therm_work, throttle_active_work);
  452. INIT_DELAYED_WORK(&state->core_throttle.therm_work, throttle_active_work);
  453. /*
  454. * The first CPU coming online will enable the HFI. Usually this causes
  455. * hardware to issue an HFI thermal interrupt. Such interrupt will reach
  456. * the CPU once we enable the thermal vector in the local APIC.
  457. */
  458. intel_hfi_online(cpu);
  459. /* Unmask the thermal vector after the above workqueues are initialized. */
  460. l = apic_read(APIC_LVTTHMR);
  461. apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
  462. return thermal_throttle_add_dev(dev, cpu);
  463. }
  464. static int thermal_throttle_offline(unsigned int cpu)
  465. {
  466. struct thermal_state *state = &per_cpu(thermal_state, cpu);
  467. struct device *dev = get_cpu_device(cpu);
  468. u32 l;
  469. /* Mask the thermal vector before draining evtl. pending work */
  470. l = apic_read(APIC_LVTTHMR);
  471. apic_write(APIC_LVTTHMR, l | APIC_LVT_MASKED);
  472. intel_hfi_offline(cpu);
  473. cancel_delayed_work_sync(&state->package_throttle.therm_work);
  474. cancel_delayed_work_sync(&state->core_throttle.therm_work);
  475. state->package_throttle.rate_control_active = false;
  476. state->core_throttle.rate_control_active = false;
  477. thermal_throttle_remove_dev(dev);
  478. return 0;
  479. }
  480. static __init int thermal_throttle_init_device(void)
  481. {
  482. int ret;
  483. if (!atomic_read(&therm_throt_en))
  484. return 0;
  485. intel_hfi_init();
  486. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "x86/therm:online",
  487. thermal_throttle_online,
  488. thermal_throttle_offline);
  489. return ret < 0 ? ret : 0;
  490. }
  491. device_initcall(thermal_throttle_init_device);
  492. #endif /* CONFIG_SYSFS */
  493. static void notify_package_thresholds(__u64 msr_val)
  494. {
  495. bool notify_thres_0 = false;
  496. bool notify_thres_1 = false;
  497. if (!platform_thermal_package_notify)
  498. return;
  499. /* lower threshold check */
  500. if (msr_val & THERM_LOG_THRESHOLD0)
  501. notify_thres_0 = true;
  502. /* higher threshold check */
  503. if (msr_val & THERM_LOG_THRESHOLD1)
  504. notify_thres_1 = true;
  505. if (!notify_thres_0 && !notify_thres_1)
  506. return;
  507. if (platform_thermal_package_rate_control &&
  508. platform_thermal_package_rate_control()) {
  509. /* Rate control is implemented in callback */
  510. platform_thermal_package_notify(msr_val);
  511. return;
  512. }
  513. /* lower threshold reached */
  514. if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0))
  515. platform_thermal_package_notify(msr_val);
  516. /* higher threshold reached */
  517. if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1))
  518. platform_thermal_package_notify(msr_val);
  519. }
  520. static void notify_thresholds(__u64 msr_val)
  521. {
  522. /* check whether the interrupt handler is defined;
  523. * otherwise simply return
  524. */
  525. if (!platform_thermal_notify)
  526. return;
  527. /* lower threshold reached */
  528. if ((msr_val & THERM_LOG_THRESHOLD0) &&
  529. thresh_event_valid(CORE_LEVEL, 0))
  530. platform_thermal_notify(msr_val);
  531. /* higher threshold reached */
  532. if ((msr_val & THERM_LOG_THRESHOLD1) &&
  533. thresh_event_valid(CORE_LEVEL, 1))
  534. platform_thermal_notify(msr_val);
  535. }
  536. void __weak notify_hwp_interrupt(void)
  537. {
  538. wrmsrq_safe(MSR_HWP_STATUS, 0);
  539. }
  540. /* Thermal transition interrupt handler */
  541. void intel_thermal_interrupt(void)
  542. {
  543. __u64 msr_val;
  544. if (static_cpu_has(X86_FEATURE_HWP))
  545. notify_hwp_interrupt();
  546. rdmsrq(MSR_IA32_THERM_STATUS, msr_val);
  547. /* Check for violation of core thermal thresholds*/
  548. notify_thresholds(msr_val);
  549. therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
  550. THERMAL_THROTTLING_EVENT,
  551. CORE_LEVEL);
  552. if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
  553. therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
  554. POWER_LIMIT_EVENT,
  555. CORE_LEVEL);
  556. if (this_cpu_has(X86_FEATURE_PTS)) {
  557. rdmsrq(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
  558. /* check violations of package thermal thresholds */
  559. notify_package_thresholds(msr_val);
  560. therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
  561. THERMAL_THROTTLING_EVENT,
  562. PACKAGE_LEVEL);
  563. if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable)
  564. therm_throt_process(msr_val &
  565. PACKAGE_THERM_STATUS_POWER_LIMIT,
  566. POWER_LIMIT_EVENT,
  567. PACKAGE_LEVEL);
  568. if (this_cpu_has(X86_FEATURE_HFI))
  569. intel_hfi_process_event(msr_val &
  570. PACKAGE_THERM_STATUS_HFI_UPDATED);
  571. }
  572. }
  573. /* Thermal monitoring depends on APIC, ACPI and clock modulation */
  574. static int intel_thermal_supported(struct cpuinfo_x86 *c)
  575. {
  576. if (!boot_cpu_has(X86_FEATURE_APIC))
  577. return 0;
  578. if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC))
  579. return 0;
  580. return 1;
  581. }
  582. bool x86_thermal_enabled(void)
  583. {
  584. return atomic_read(&therm_throt_en);
  585. }
  586. void __init therm_lvt_init(void)
  587. {
  588. /*
  589. * This function is only called on boot CPU. Save the init thermal
  590. * LVT value on BSP and use that value to restore APs' thermal LVT
  591. * entry BIOS programmed later
  592. */
  593. if (intel_thermal_supported(&boot_cpu_data))
  594. lvtthmr_init = apic_read(APIC_LVTTHMR);
  595. }
  596. void intel_init_thermal(struct cpuinfo_x86 *c)
  597. {
  598. unsigned int cpu = smp_processor_id();
  599. int tm2 = 0;
  600. u32 l, h;
  601. if (!intel_thermal_supported(c))
  602. return;
  603. /*
  604. * First check if its enabled already, in which case there might
  605. * be some SMM goo which handles it, so we can't even put a handler
  606. * since it might be delivered via SMI already:
  607. */
  608. rdmsr(MSR_IA32_MISC_ENABLE, l, h);
  609. h = lvtthmr_init;
  610. /*
  611. * The initial value of thermal LVT entries on all APs always reads
  612. * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
  613. * sequence to them and LVT registers are reset to 0s except for
  614. * the mask bits which are set to 1s when APs receive INIT IPI.
  615. * If BIOS takes over the thermal interrupt and sets its interrupt
  616. * delivery mode to SMI (not fixed), it restores the value that the
  617. * BIOS has programmed on AP based on BSP's info we saved since BIOS
  618. * is always setting the same value for all threads/cores.
  619. */
  620. if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
  621. apic_write(APIC_LVTTHMR, lvtthmr_init);
  622. if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
  623. if (system_state == SYSTEM_BOOTING)
  624. pr_debug("CPU%d: Thermal monitoring handled by SMI\n", cpu);
  625. return;
  626. }
  627. /* early Pentium M models use different method for enabling TM2 */
  628. if (cpu_has(c, X86_FEATURE_TM2)) {
  629. if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
  630. rdmsr(MSR_THERM2_CTL, l, h);
  631. if (l & MSR_THERM2_CTL_TM_SELECT)
  632. tm2 = 1;
  633. } else if (l & MSR_IA32_MISC_ENABLE_TM2)
  634. tm2 = 1;
  635. }
  636. /* We'll mask the thermal vector in the lapic till we're ready: */
  637. h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
  638. apic_write(APIC_LVTTHMR, h);
  639. thermal_intr_init_core_clear_mask();
  640. thermal_intr_init_pkg_clear_mask();
  641. rdmsr(MSR_IA32_THERM_INTERRUPT, l, h);
  642. if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
  643. wrmsr(MSR_IA32_THERM_INTERRUPT,
  644. (l | (THERM_INT_LOW_ENABLE
  645. | THERM_INT_HIGH_ENABLE)) & ~THERM_INT_PLN_ENABLE, h);
  646. else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
  647. wrmsr(MSR_IA32_THERM_INTERRUPT,
  648. l | (THERM_INT_LOW_ENABLE
  649. | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h);
  650. else
  651. wrmsr(MSR_IA32_THERM_INTERRUPT,
  652. l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h);
  653. if (cpu_has(c, X86_FEATURE_PTS)) {
  654. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  655. if (cpu_has(c, X86_FEATURE_PLN) && !int_pln_enable)
  656. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  657. (l | (PACKAGE_THERM_INT_LOW_ENABLE
  658. | PACKAGE_THERM_INT_HIGH_ENABLE))
  659. & ~PACKAGE_THERM_INT_PLN_ENABLE, h);
  660. else if (cpu_has(c, X86_FEATURE_PLN) && int_pln_enable)
  661. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  662. l | (PACKAGE_THERM_INT_LOW_ENABLE
  663. | PACKAGE_THERM_INT_HIGH_ENABLE
  664. | PACKAGE_THERM_INT_PLN_ENABLE), h);
  665. else
  666. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  667. l | (PACKAGE_THERM_INT_LOW_ENABLE
  668. | PACKAGE_THERM_INT_HIGH_ENABLE), h);
  669. if (cpu_has(c, X86_FEATURE_HFI)) {
  670. rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
  671. wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT,
  672. l | PACKAGE_THERM_INT_HFI_ENABLE, h);
  673. }
  674. }
  675. rdmsr(MSR_IA32_MISC_ENABLE, l, h);
  676. wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
  677. pr_info_once("CPU0: Thermal monitoring enabled (%s)\n",
  678. tm2 ? "TM2" : "TM1");
  679. /* enable thermal throttle processing */
  680. atomic_set(&therm_throt_en, 1);
  681. }