marvell_pem_pmu.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Marvell PEM(PCIe RC) Performance Monitor Driver
  4. *
  5. * Copyright (C) 2024 Marvell.
  6. */
  7. #include <linux/acpi.h>
  8. #include <linux/init.h>
  9. #include <linux/io.h>
  10. #include <linux/module.h>
  11. #include <linux/perf_event.h>
  12. #include <linux/platform_device.h>
  13. /*
  14. * Each of these events maps to a free running 64 bit counter
  15. * with no event control, but can be reset.
  16. */
  17. enum pem_events {
  18. IB_TLP_NPR,
  19. IB_TLP_PR,
  20. IB_TLP_CPL,
  21. IB_TLP_DWORDS_NPR,
  22. IB_TLP_DWORDS_PR,
  23. IB_TLP_DWORDS_CPL,
  24. IB_INFLIGHT,
  25. IB_READS,
  26. IB_REQ_NO_RO_NCB,
  27. IB_REQ_NO_RO_EBUS,
  28. OB_TLP_NPR,
  29. OB_TLP_PR,
  30. OB_TLP_CPL,
  31. OB_TLP_DWORDS_NPR,
  32. OB_TLP_DWORDS_PR,
  33. OB_TLP_DWORDS_CPL,
  34. OB_INFLIGHT,
  35. OB_READS,
  36. OB_MERGES_NPR,
  37. OB_MERGES_PR,
  38. OB_MERGES_CPL,
  39. ATS_TRANS,
  40. ATS_TRANS_LATENCY,
  41. ATS_PRI,
  42. ATS_PRI_LATENCY,
  43. ATS_INV,
  44. ATS_INV_LATENCY,
  45. PEM_EVENTIDS_MAX
  46. };
  47. static u64 eventid_to_offset_table[] = {
  48. [IB_TLP_NPR] = 0x0,
  49. [IB_TLP_PR] = 0x8,
  50. [IB_TLP_CPL] = 0x10,
  51. [IB_TLP_DWORDS_NPR] = 0x100,
  52. [IB_TLP_DWORDS_PR] = 0x108,
  53. [IB_TLP_DWORDS_CPL] = 0x110,
  54. [IB_INFLIGHT] = 0x200,
  55. [IB_READS] = 0x300,
  56. [IB_REQ_NO_RO_NCB] = 0x400,
  57. [IB_REQ_NO_RO_EBUS] = 0x408,
  58. [OB_TLP_NPR] = 0x500,
  59. [OB_TLP_PR] = 0x508,
  60. [OB_TLP_CPL] = 0x510,
  61. [OB_TLP_DWORDS_NPR] = 0x600,
  62. [OB_TLP_DWORDS_PR] = 0x608,
  63. [OB_TLP_DWORDS_CPL] = 0x610,
  64. [OB_INFLIGHT] = 0x700,
  65. [OB_READS] = 0x800,
  66. [OB_MERGES_NPR] = 0x900,
  67. [OB_MERGES_PR] = 0x908,
  68. [OB_MERGES_CPL] = 0x910,
  69. [ATS_TRANS] = 0x2D18,
  70. [ATS_TRANS_LATENCY] = 0x2D20,
  71. [ATS_PRI] = 0x2D28,
  72. [ATS_PRI_LATENCY] = 0x2D30,
  73. [ATS_INV] = 0x2D38,
  74. [ATS_INV_LATENCY] = 0x2D40,
  75. };
  76. struct pem_pmu {
  77. struct pmu pmu;
  78. void __iomem *base;
  79. unsigned int cpu;
  80. struct device *dev;
  81. struct hlist_node node;
  82. };
  83. #define to_pem_pmu(p) container_of(p, struct pem_pmu, pmu)
  84. static int eventid_to_offset(int eventid)
  85. {
  86. return eventid_to_offset_table[eventid];
  87. }
  88. /* Events */
  89. static ssize_t pem_pmu_event_show(struct device *dev,
  90. struct device_attribute *attr,
  91. char *page)
  92. {
  93. struct perf_pmu_events_attr *pmu_attr;
  94. pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr);
  95. return sysfs_emit(page, "event=0x%02llx\n", pmu_attr->id);
  96. }
  97. #define PEM_EVENT_ATTR(_name, _id) \
  98. (&((struct perf_pmu_events_attr[]) { \
  99. { .attr = __ATTR(_name, 0444, pem_pmu_event_show, NULL), \
  100. .id = _id, } \
  101. })[0].attr.attr)
  102. static struct attribute *pem_perf_events_attrs[] = {
  103. PEM_EVENT_ATTR(ib_tlp_npr, IB_TLP_NPR),
  104. PEM_EVENT_ATTR(ib_tlp_pr, IB_TLP_PR),
  105. PEM_EVENT_ATTR(ib_tlp_cpl_partid, IB_TLP_CPL),
  106. PEM_EVENT_ATTR(ib_tlp_dwords_npr, IB_TLP_DWORDS_NPR),
  107. PEM_EVENT_ATTR(ib_tlp_dwords_pr, IB_TLP_DWORDS_PR),
  108. PEM_EVENT_ATTR(ib_tlp_dwords_cpl_partid, IB_TLP_DWORDS_CPL),
  109. PEM_EVENT_ATTR(ib_inflight, IB_INFLIGHT),
  110. PEM_EVENT_ATTR(ib_reads, IB_READS),
  111. PEM_EVENT_ATTR(ib_req_no_ro_ncb, IB_REQ_NO_RO_NCB),
  112. PEM_EVENT_ATTR(ib_req_no_ro_ebus, IB_REQ_NO_RO_EBUS),
  113. PEM_EVENT_ATTR(ob_tlp_npr_partid, OB_TLP_NPR),
  114. PEM_EVENT_ATTR(ob_tlp_pr_partid, OB_TLP_PR),
  115. PEM_EVENT_ATTR(ob_tlp_cpl_partid, OB_TLP_CPL),
  116. PEM_EVENT_ATTR(ob_tlp_dwords_npr_partid, OB_TLP_DWORDS_NPR),
  117. PEM_EVENT_ATTR(ob_tlp_dwords_pr_partid, OB_TLP_DWORDS_PR),
  118. PEM_EVENT_ATTR(ob_tlp_dwords_cpl_partid, OB_TLP_DWORDS_CPL),
  119. PEM_EVENT_ATTR(ob_inflight_partid, OB_INFLIGHT),
  120. PEM_EVENT_ATTR(ob_reads_partid, OB_READS),
  121. PEM_EVENT_ATTR(ob_merges_npr_partid, OB_MERGES_NPR),
  122. PEM_EVENT_ATTR(ob_merges_pr_partid, OB_MERGES_PR),
  123. PEM_EVENT_ATTR(ob_merges_cpl_partid, OB_MERGES_CPL),
  124. PEM_EVENT_ATTR(ats_trans, ATS_TRANS),
  125. PEM_EVENT_ATTR(ats_trans_latency, ATS_TRANS_LATENCY),
  126. PEM_EVENT_ATTR(ats_pri, ATS_PRI),
  127. PEM_EVENT_ATTR(ats_pri_latency, ATS_PRI_LATENCY),
  128. PEM_EVENT_ATTR(ats_inv, ATS_INV),
  129. PEM_EVENT_ATTR(ats_inv_latency, ATS_INV_LATENCY),
  130. NULL
  131. };
  132. static struct attribute_group pem_perf_events_attr_group = {
  133. .name = "events",
  134. .attrs = pem_perf_events_attrs,
  135. };
  136. PMU_FORMAT_ATTR(event, "config:0-5");
  137. static struct attribute *pem_perf_format_attrs[] = {
  138. &format_attr_event.attr,
  139. NULL
  140. };
  141. static struct attribute_group pem_perf_format_attr_group = {
  142. .name = "format",
  143. .attrs = pem_perf_format_attrs,
  144. };
  145. /* cpumask */
  146. static ssize_t pem_perf_cpumask_show(struct device *dev,
  147. struct device_attribute *attr,
  148. char *buf)
  149. {
  150. struct pem_pmu *pmu = dev_get_drvdata(dev);
  151. return cpumap_print_to_pagebuf(true, buf, cpumask_of(pmu->cpu));
  152. }
  153. static struct device_attribute pem_perf_cpumask_attr =
  154. __ATTR(cpumask, 0444, pem_perf_cpumask_show, NULL);
  155. static struct attribute *pem_perf_cpumask_attrs[] = {
  156. &pem_perf_cpumask_attr.attr,
  157. NULL
  158. };
  159. static struct attribute_group pem_perf_cpumask_attr_group = {
  160. .attrs = pem_perf_cpumask_attrs,
  161. };
  162. static const struct attribute_group *pem_perf_attr_groups[] = {
  163. &pem_perf_events_attr_group,
  164. &pem_perf_cpumask_attr_group,
  165. &pem_perf_format_attr_group,
  166. NULL
  167. };
  168. static int pem_perf_event_init(struct perf_event *event)
  169. {
  170. struct pem_pmu *pmu = to_pem_pmu(event->pmu);
  171. struct hw_perf_event *hwc = &event->hw;
  172. struct perf_event *sibling;
  173. if (event->attr.type != event->pmu->type)
  174. return -ENOENT;
  175. if (event->attr.config >= PEM_EVENTIDS_MAX)
  176. return -EINVAL;
  177. if (is_sampling_event(event) ||
  178. event->attach_state & PERF_ATTACH_TASK) {
  179. return -EOPNOTSUPP;
  180. }
  181. if (event->cpu < 0)
  182. return -EOPNOTSUPP;
  183. /* We must NOT create groups containing mixed PMUs */
  184. if (event->group_leader->pmu != event->pmu &&
  185. !is_software_event(event->group_leader))
  186. return -EINVAL;
  187. for_each_sibling_event(sibling, event->group_leader) {
  188. if (sibling->pmu != event->pmu &&
  189. !is_software_event(sibling))
  190. return -EINVAL;
  191. }
  192. /*
  193. * Set ownership of event to one CPU, same event can not be observed
  194. * on multiple cpus at same time.
  195. */
  196. event->cpu = pmu->cpu;
  197. hwc->idx = -1;
  198. return 0;
  199. }
  200. static u64 pem_perf_read_counter(struct pem_pmu *pmu,
  201. struct perf_event *event, int eventid)
  202. {
  203. return readq_relaxed(pmu->base + eventid_to_offset(eventid));
  204. }
  205. static void pem_perf_event_update(struct perf_event *event)
  206. {
  207. struct pem_pmu *pmu = to_pem_pmu(event->pmu);
  208. struct hw_perf_event *hwc = &event->hw;
  209. u64 prev_count, new_count;
  210. do {
  211. prev_count = local64_read(&hwc->prev_count);
  212. new_count = pem_perf_read_counter(pmu, event, hwc->idx);
  213. } while (local64_xchg(&hwc->prev_count, new_count) != prev_count);
  214. local64_add((new_count - prev_count), &event->count);
  215. }
  216. static void pem_perf_event_start(struct perf_event *event, int flags)
  217. {
  218. struct pem_pmu *pmu = to_pem_pmu(event->pmu);
  219. struct hw_perf_event *hwc = &event->hw;
  220. int eventid = hwc->idx;
  221. /*
  222. * All counters are free-running and associated with
  223. * a fixed event to track in Hardware
  224. */
  225. local64_set(&hwc->prev_count,
  226. pem_perf_read_counter(pmu, event, eventid));
  227. hwc->state = 0;
  228. }
  229. static int pem_perf_event_add(struct perf_event *event, int flags)
  230. {
  231. struct hw_perf_event *hwc = &event->hw;
  232. hwc->idx = event->attr.config;
  233. if (WARN_ON_ONCE(hwc->idx >= PEM_EVENTIDS_MAX))
  234. return -EINVAL;
  235. hwc->state |= PERF_HES_STOPPED;
  236. if (flags & PERF_EF_START)
  237. pem_perf_event_start(event, flags);
  238. return 0;
  239. }
  240. static void pem_perf_event_stop(struct perf_event *event, int flags)
  241. {
  242. struct hw_perf_event *hwc = &event->hw;
  243. if (flags & PERF_EF_UPDATE)
  244. pem_perf_event_update(event);
  245. hwc->state |= PERF_HES_STOPPED;
  246. }
  247. static void pem_perf_event_del(struct perf_event *event, int flags)
  248. {
  249. struct hw_perf_event *hwc = &event->hw;
  250. pem_perf_event_stop(event, PERF_EF_UPDATE);
  251. hwc->idx = -1;
  252. }
  253. static int pem_pmu_offline_cpu(unsigned int cpu, struct hlist_node *node)
  254. {
  255. struct pem_pmu *pmu = hlist_entry_safe(node, struct pem_pmu, node);
  256. unsigned int target;
  257. if (cpu != pmu->cpu)
  258. return 0;
  259. target = cpumask_any_but(cpu_online_mask, cpu);
  260. if (target >= nr_cpu_ids)
  261. return 0;
  262. perf_pmu_migrate_context(&pmu->pmu, cpu, target);
  263. pmu->cpu = target;
  264. return 0;
  265. }
  266. static int pem_perf_probe(struct platform_device *pdev)
  267. {
  268. struct pem_pmu *pem_pmu;
  269. struct resource *res;
  270. void __iomem *base;
  271. char *name;
  272. int ret;
  273. pem_pmu = devm_kzalloc(&pdev->dev, sizeof(*pem_pmu), GFP_KERNEL);
  274. if (!pem_pmu)
  275. return -ENOMEM;
  276. pem_pmu->dev = &pdev->dev;
  277. platform_set_drvdata(pdev, pem_pmu);
  278. base = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
  279. if (IS_ERR(base))
  280. return PTR_ERR(base);
  281. pem_pmu->base = base;
  282. pem_pmu->pmu = (struct pmu) {
  283. .module = THIS_MODULE,
  284. .capabilities = PERF_PMU_CAP_NO_EXCLUDE,
  285. .task_ctx_nr = perf_invalid_context,
  286. .attr_groups = pem_perf_attr_groups,
  287. .event_init = pem_perf_event_init,
  288. .add = pem_perf_event_add,
  289. .del = pem_perf_event_del,
  290. .start = pem_perf_event_start,
  291. .stop = pem_perf_event_stop,
  292. .read = pem_perf_event_update,
  293. };
  294. /* Choose this cpu to collect perf data */
  295. pem_pmu->cpu = raw_smp_processor_id();
  296. name = devm_kasprintf(pem_pmu->dev, GFP_KERNEL, "mrvl_pcie_rc_pmu_%llx",
  297. res->start);
  298. if (!name)
  299. return -ENOMEM;
  300. cpuhp_state_add_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
  301. &pem_pmu->node);
  302. ret = perf_pmu_register(&pem_pmu->pmu, name, -1);
  303. if (ret)
  304. goto error;
  305. return 0;
  306. error:
  307. cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
  308. &pem_pmu->node);
  309. return ret;
  310. }
  311. static void pem_perf_remove(struct platform_device *pdev)
  312. {
  313. struct pem_pmu *pem_pmu = platform_get_drvdata(pdev);
  314. cpuhp_state_remove_instance_nocalls(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
  315. &pem_pmu->node);
  316. perf_pmu_unregister(&pem_pmu->pmu);
  317. }
  318. #ifdef CONFIG_ACPI
  319. static const struct acpi_device_id pem_pmu_acpi_match[] = {
  320. {"MRVL000E", 0},
  321. {}
  322. };
  323. MODULE_DEVICE_TABLE(acpi, pem_pmu_acpi_match);
  324. #endif
  325. static struct platform_driver pem_pmu_driver = {
  326. .driver = {
  327. .name = "pem-pmu",
  328. .acpi_match_table = ACPI_PTR(pem_pmu_acpi_match),
  329. .suppress_bind_attrs = true,
  330. },
  331. .probe = pem_perf_probe,
  332. .remove = pem_perf_remove,
  333. };
  334. static int __init pem_pmu_init(void)
  335. {
  336. int ret;
  337. ret = cpuhp_setup_state_multi(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE,
  338. "perf/marvell/pem:online", NULL,
  339. pem_pmu_offline_cpu);
  340. if (ret)
  341. return ret;
  342. ret = platform_driver_register(&pem_pmu_driver);
  343. if (ret)
  344. cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
  345. return ret;
  346. }
  347. static void __exit pem_pmu_exit(void)
  348. {
  349. platform_driver_unregister(&pem_pmu_driver);
  350. cpuhp_remove_multi_state(CPUHP_AP_PERF_ARM_MRVL_PEM_ONLINE);
  351. }
  352. module_init(pem_pmu_init);
  353. module_exit(pem_pmu_exit);
  354. MODULE_DESCRIPTION("Marvell PEM Perf driver");
  355. MODULE_AUTHOR("Gowthami Thiagarajan <gthiagarajan@marvell.com>");
  356. MODULE_LICENSE("GPL");