virtual-cpufreq.c 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2024 Google LLC
  4. */
  5. #include <linux/arch_topology.h>
  6. #include <linux/cpufreq.h>
  7. #include <linux/init.h>
  8. #include <linux/sched.h>
  9. #include <linux/kernel.h>
  10. #include <linux/module.h>
  11. #include <linux/of_address.h>
  12. #include <linux/of_platform.h>
  13. #include <linux/platform_device.h>
  14. #include <linux/slab.h>
  15. /*
  16. * CPU0..CPUn
  17. * +-------------+-------------------------------+--------+-------+
  18. * | Register | Description | Offset | Len |
  19. * +-------------+-------------------------------+--------+-------+
  20. * | cur_perf | read this register to get | 0x0 | 0x4 |
  21. * | | the current perf (integer val | | |
  22. * | | representing perf relative to | | |
  23. * | | max performance) | | |
  24. * | | that vCPU is running at | | |
  25. * +-------------+-------------------------------+--------+-------+
  26. * | set_perf | write to this register to set | 0x4 | 0x4 |
  27. * | | perf value of the vCPU | | |
  28. * +-------------+-------------------------------+--------+-------+
  29. * | perftbl_len | number of entries in perf | 0x8 | 0x4 |
  30. * | | table. A single entry in the | | |
  31. * | | perf table denotes no table | | |
  32. * | | and the entry contains | | |
  33. * | | the maximum perf value | | |
  34. * | | that this vCPU supports. | | |
  35. * | | The guest can request any | | |
  36. * | | value between 1 and max perf | | |
  37. * | | when perftbls are not used. | | |
  38. * +---------------------------------------------+--------+-------+
  39. * | perftbl_sel | write to this register to | 0xc | 0x4 |
  40. * | | select perf table entry to | | |
  41. * | | read from | | |
  42. * +---------------------------------------------+--------+-------+
  43. * | perftbl_rd | read this register to get | 0x10 | 0x4 |
  44. * | | perf value of the selected | | |
  45. * | | entry based on perftbl_sel | | |
  46. * +---------------------------------------------+--------+-------+
  47. * | perf_domain | performance domain number | 0x14 | 0x4 |
  48. * | | that this vCPU belongs to. | | |
  49. * | | vCPUs sharing the same perf | | |
  50. * | | domain number are part of the | | |
  51. * | | same performance domain. | | |
  52. * +-------------+-------------------------------+--------+-------+
  53. */
  54. #define REG_CUR_PERF_STATE_OFFSET 0x0
  55. #define REG_SET_PERF_STATE_OFFSET 0x4
  56. #define REG_PERFTBL_LEN_OFFSET 0x8
  57. #define REG_PERFTBL_SEL_OFFSET 0xc
  58. #define REG_PERFTBL_RD_OFFSET 0x10
  59. #define REG_PERF_DOMAIN_OFFSET 0x14
  60. #define PER_CPU_OFFSET 0x1000
  61. #define PERFTBL_MAX_ENTRIES 64U
  62. static void __iomem *base;
  63. static DEFINE_PER_CPU(u32, perftbl_num_entries);
  64. static void virt_scale_freq_tick(void)
  65. {
  66. int cpu = smp_processor_id();
  67. u32 max_freq = (u32)cpufreq_get_hw_max_freq(cpu);
  68. u64 cur_freq;
  69. unsigned long scale;
  70. cur_freq = (u64)readl_relaxed(base + cpu * PER_CPU_OFFSET
  71. + REG_CUR_PERF_STATE_OFFSET);
  72. cur_freq <<= SCHED_CAPACITY_SHIFT;
  73. scale = (unsigned long)div_u64(cur_freq, max_freq);
  74. scale = min(scale, SCHED_CAPACITY_SCALE);
  75. this_cpu_write(arch_freq_scale, scale);
  76. }
  77. static struct scale_freq_data virt_sfd = {
  78. .source = SCALE_FREQ_SOURCE_VIRT,
  79. .set_freq_scale = virt_scale_freq_tick,
  80. };
  81. static unsigned int virt_cpufreq_set_perf(struct cpufreq_policy *policy,
  82. unsigned int target_freq)
  83. {
  84. writel_relaxed(target_freq,
  85. base + policy->cpu * PER_CPU_OFFSET + REG_SET_PERF_STATE_OFFSET);
  86. return 0;
  87. }
  88. static unsigned int virt_cpufreq_fast_switch(struct cpufreq_policy *policy,
  89. unsigned int target_freq)
  90. {
  91. virt_cpufreq_set_perf(policy, target_freq);
  92. return target_freq;
  93. }
  94. static u32 virt_cpufreq_get_perftbl_entry(int cpu, u32 idx)
  95. {
  96. writel_relaxed(idx, base + cpu * PER_CPU_OFFSET +
  97. REG_PERFTBL_SEL_OFFSET);
  98. return readl_relaxed(base + cpu * PER_CPU_OFFSET +
  99. REG_PERFTBL_RD_OFFSET);
  100. }
  101. static int virt_cpufreq_target(struct cpufreq_policy *policy,
  102. unsigned int target_freq,
  103. unsigned int relation)
  104. {
  105. struct cpufreq_freqs freqs;
  106. int ret = 0;
  107. freqs.old = policy->cur;
  108. freqs.new = target_freq;
  109. cpufreq_freq_transition_begin(policy, &freqs);
  110. ret = virt_cpufreq_set_perf(policy, target_freq);
  111. cpufreq_freq_transition_end(policy, &freqs, ret != 0);
  112. return ret;
  113. }
  114. static int virt_cpufreq_get_sharing_cpus(struct cpufreq_policy *policy)
  115. {
  116. u32 cur_perf_domain, perf_domain;
  117. struct device *cpu_dev;
  118. int cpu;
  119. cur_perf_domain = readl_relaxed(base + policy->cpu *
  120. PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
  121. for_each_present_cpu(cpu) {
  122. cpu_dev = get_cpu_device(cpu);
  123. if (!cpu_dev)
  124. continue;
  125. perf_domain = readl_relaxed(base + cpu *
  126. PER_CPU_OFFSET + REG_PERF_DOMAIN_OFFSET);
  127. if (perf_domain == cur_perf_domain)
  128. cpumask_set_cpu(cpu, policy->cpus);
  129. }
  130. return 0;
  131. }
  132. static int virt_cpufreq_get_freq_info(struct cpufreq_policy *policy)
  133. {
  134. struct cpufreq_frequency_table *table;
  135. u32 num_perftbl_entries, idx;
  136. num_perftbl_entries = per_cpu(perftbl_num_entries, policy->cpu);
  137. if (num_perftbl_entries == 1) {
  138. policy->cpuinfo.min_freq = 1;
  139. policy->cpuinfo.max_freq = virt_cpufreq_get_perftbl_entry(policy->cpu, 0);
  140. policy->min = policy->cpuinfo.min_freq;
  141. policy->max = policy->cpuinfo.max_freq;
  142. policy->cur = policy->max;
  143. return 0;
  144. }
  145. table = kzalloc_objs(*table, num_perftbl_entries + 1);
  146. if (!table)
  147. return -ENOMEM;
  148. for (idx = 0; idx < num_perftbl_entries; idx++)
  149. table[idx].frequency = virt_cpufreq_get_perftbl_entry(policy->cpu, idx);
  150. table[idx].frequency = CPUFREQ_TABLE_END;
  151. policy->freq_table = table;
  152. return 0;
  153. }
  154. static int virt_cpufreq_cpu_init(struct cpufreq_policy *policy)
  155. {
  156. struct device *cpu_dev;
  157. int ret;
  158. cpu_dev = get_cpu_device(policy->cpu);
  159. if (!cpu_dev)
  160. return -ENODEV;
  161. ret = virt_cpufreq_get_freq_info(policy);
  162. if (ret) {
  163. dev_warn(cpu_dev, "failed to get cpufreq info\n");
  164. return ret;
  165. }
  166. ret = virt_cpufreq_get_sharing_cpus(policy);
  167. if (ret) {
  168. dev_warn(cpu_dev, "failed to get sharing cpumask\n");
  169. return ret;
  170. }
  171. /*
  172. * To simplify and improve latency of handling frequency requests on
  173. * the host side, this ensures that the vCPU thread triggering the MMIO
  174. * abort is the same thread whose performance constraints (Ex. uclamp
  175. * settings) need to be updated. This simplifies the VMM (Virtual
  176. * Machine Manager) having to find the correct vCPU thread and/or
  177. * facing permission issues when configuring other threads.
  178. */
  179. policy->dvfs_possible_from_any_cpu = false;
  180. policy->fast_switch_possible = true;
  181. /*
  182. * Using the default SCALE_FREQ_SOURCE_CPUFREQ is insufficient since
  183. * the actual physical CPU frequency may not match requested frequency
  184. * from the vCPU thread due to frequency update latencies or other
  185. * inputs to the physical CPU frequency selection. This additional FIE
  186. * source allows for more accurate freq_scale updates and only takes
  187. * effect if another FIE source such as AMUs have not been registered.
  188. */
  189. topology_set_scale_freq_source(&virt_sfd, policy->cpus);
  190. return 0;
  191. }
  192. static void virt_cpufreq_cpu_exit(struct cpufreq_policy *policy)
  193. {
  194. topology_clear_scale_freq_source(SCALE_FREQ_SOURCE_VIRT, policy->related_cpus);
  195. kfree(policy->freq_table);
  196. }
  197. static int virt_cpufreq_online(struct cpufreq_policy *policy)
  198. {
  199. /* Nothing to restore. */
  200. return 0;
  201. }
  202. static int virt_cpufreq_offline(struct cpufreq_policy *policy)
  203. {
  204. /* Dummy offline() to avoid exit() being called and freeing resources. */
  205. return 0;
  206. }
  207. static int virt_cpufreq_verify_policy(struct cpufreq_policy_data *policy)
  208. {
  209. if (policy->freq_table)
  210. return cpufreq_frequency_table_verify(policy);
  211. cpufreq_verify_within_cpu_limits(policy);
  212. return 0;
  213. }
  214. static struct cpufreq_driver cpufreq_virt_driver = {
  215. .name = "virt-cpufreq",
  216. .init = virt_cpufreq_cpu_init,
  217. .exit = virt_cpufreq_cpu_exit,
  218. .online = virt_cpufreq_online,
  219. .offline = virt_cpufreq_offline,
  220. .verify = virt_cpufreq_verify_policy,
  221. .target = virt_cpufreq_target,
  222. .fast_switch = virt_cpufreq_fast_switch,
  223. };
  224. static int virt_cpufreq_driver_probe(struct platform_device *pdev)
  225. {
  226. u32 num_perftbl_entries;
  227. int ret, cpu;
  228. base = devm_platform_ioremap_resource(pdev, 0);
  229. if (IS_ERR(base))
  230. return PTR_ERR(base);
  231. for_each_possible_cpu(cpu) {
  232. num_perftbl_entries = readl_relaxed(base + cpu * PER_CPU_OFFSET +
  233. REG_PERFTBL_LEN_OFFSET);
  234. if (!num_perftbl_entries || num_perftbl_entries > PERFTBL_MAX_ENTRIES)
  235. return -ENODEV;
  236. per_cpu(perftbl_num_entries, cpu) = num_perftbl_entries;
  237. }
  238. ret = cpufreq_register_driver(&cpufreq_virt_driver);
  239. if (ret) {
  240. dev_err(&pdev->dev, "Virtual CPUFreq driver failed to register: %d\n", ret);
  241. return ret;
  242. }
  243. dev_dbg(&pdev->dev, "Virtual CPUFreq driver initialized\n");
  244. return 0;
  245. }
  246. static void virt_cpufreq_driver_remove(struct platform_device *pdev)
  247. {
  248. cpufreq_unregister_driver(&cpufreq_virt_driver);
  249. }
  250. static const struct of_device_id virt_cpufreq_match[] = {
  251. { .compatible = "qemu,virtual-cpufreq", .data = NULL},
  252. {}
  253. };
  254. MODULE_DEVICE_TABLE(of, virt_cpufreq_match);
  255. static struct platform_driver virt_cpufreq_driver = {
  256. .probe = virt_cpufreq_driver_probe,
  257. .remove = virt_cpufreq_driver_remove,
  258. .driver = {
  259. .name = "virt-cpufreq",
  260. .of_match_table = virt_cpufreq_match,
  261. },
  262. };
  263. static int __init virt_cpufreq_init(void)
  264. {
  265. return platform_driver_register(&virt_cpufreq_driver);
  266. }
  267. postcore_initcall(virt_cpufreq_init);
  268. static void __exit virt_cpufreq_exit(void)
  269. {
  270. platform_driver_unregister(&virt_cpufreq_driver);
  271. }
  272. module_exit(virt_cpufreq_exit);
  273. MODULE_DESCRIPTION("Virtual cpufreq driver");
  274. MODULE_LICENSE("GPL");