a72_edac.c 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Cortex A72 EDAC L1 and L2 cache error detection
  4. *
  5. * Copyright (c) 2020 Pengutronix, Sascha Hauer <s.hauer@pengutronix.de>
  6. * Copyright (c) 2025 Microsoft Corporation, <vijayb@linux.microsoft.com>
  7. *
  8. * Based on Code from:
  9. * Copyright (c) 2018, NXP Semiconductor
  10. * Author: York Sun <york.sun@nxp.com>
  11. */
  12. #include <linux/module.h>
  13. #include <linux/of.h>
  14. #include <linux/bitfield.h>
  15. #include <asm/smp_plat.h>
  16. #include "edac_module.h"
  17. #define DRVNAME "a72-edac"
  18. #define SYS_CPUMERRSR_EL1 sys_reg(3, 1, 15, 2, 2)
  19. #define SYS_L2MERRSR_EL1 sys_reg(3, 1, 15, 2, 3)
  20. #define CPUMERRSR_EL1_RAMID GENMASK(30, 24)
  21. #define L2MERRSR_EL1_CPUID_WAY GENMASK(21, 18)
  22. #define CPUMERRSR_EL1_VALID BIT(31)
  23. #define CPUMERRSR_EL1_FATAL BIT(63)
  24. #define L2MERRSR_EL1_VALID BIT(31)
  25. #define L2MERRSR_EL1_FATAL BIT(63)
  26. #define L1_I_TAG_RAM 0x00
  27. #define L1_I_DATA_RAM 0x01
  28. #define L1_D_TAG_RAM 0x08
  29. #define L1_D_DATA_RAM 0x09
  30. #define TLB_RAM 0x18
  31. #define MESSAGE_SIZE 64
  32. struct mem_err_synd_reg {
  33. u64 cpu_mesr;
  34. u64 l2_mesr;
  35. };
  36. static struct cpumask compat_mask;
  37. static void report_errors(struct edac_device_ctl_info *edac_ctl, int cpu,
  38. struct mem_err_synd_reg *mesr)
  39. {
  40. u64 cpu_mesr = mesr->cpu_mesr;
  41. u64 l2_mesr = mesr->l2_mesr;
  42. char msg[MESSAGE_SIZE];
  43. if (cpu_mesr & CPUMERRSR_EL1_VALID) {
  44. const char *str;
  45. bool fatal = cpu_mesr & CPUMERRSR_EL1_FATAL;
  46. switch (FIELD_GET(CPUMERRSR_EL1_RAMID, cpu_mesr)) {
  47. case L1_I_TAG_RAM:
  48. str = "L1-I Tag RAM";
  49. break;
  50. case L1_I_DATA_RAM:
  51. str = "L1-I Data RAM";
  52. break;
  53. case L1_D_TAG_RAM:
  54. str = "L1-D Tag RAM";
  55. break;
  56. case L1_D_DATA_RAM:
  57. str = "L1-D Data RAM";
  58. break;
  59. case TLB_RAM:
  60. str = "TLB RAM";
  61. break;
  62. default:
  63. str = "Unspecified";
  64. break;
  65. }
  66. snprintf(msg, MESSAGE_SIZE, "%s %s error(s) on CPU %d",
  67. str, fatal ? "fatal" : "correctable", cpu);
  68. if (fatal)
  69. edac_device_handle_ue(edac_ctl, cpu, 0, msg);
  70. else
  71. edac_device_handle_ce(edac_ctl, cpu, 0, msg);
  72. }
  73. if (l2_mesr & L2MERRSR_EL1_VALID) {
  74. bool fatal = l2_mesr & L2MERRSR_EL1_FATAL;
  75. snprintf(msg, MESSAGE_SIZE, "L2 %s error(s) on CPU %d CPUID/WAY 0x%lx",
  76. fatal ? "fatal" : "correctable", cpu,
  77. FIELD_GET(L2MERRSR_EL1_CPUID_WAY, l2_mesr));
  78. if (fatal)
  79. edac_device_handle_ue(edac_ctl, cpu, 1, msg);
  80. else
  81. edac_device_handle_ce(edac_ctl, cpu, 1, msg);
  82. }
  83. }
  84. static void read_errors(void *data)
  85. {
  86. struct mem_err_synd_reg *mesr = data;
  87. mesr->cpu_mesr = read_sysreg_s(SYS_CPUMERRSR_EL1);
  88. if (mesr->cpu_mesr & CPUMERRSR_EL1_VALID) {
  89. write_sysreg_s(0, SYS_CPUMERRSR_EL1);
  90. isb();
  91. }
  92. mesr->l2_mesr = read_sysreg_s(SYS_L2MERRSR_EL1);
  93. if (mesr->l2_mesr & L2MERRSR_EL1_VALID) {
  94. write_sysreg_s(0, SYS_L2MERRSR_EL1);
  95. isb();
  96. }
  97. }
  98. static void a72_edac_check(struct edac_device_ctl_info *edac_ctl)
  99. {
  100. struct mem_err_synd_reg mesr;
  101. int cpu;
  102. cpus_read_lock();
  103. for_each_cpu_and(cpu, cpu_online_mask, &compat_mask) {
  104. smp_call_function_single(cpu, read_errors, &mesr, true);
  105. report_errors(edac_ctl, cpu, &mesr);
  106. }
  107. cpus_read_unlock();
  108. }
  109. static int a72_edac_probe(struct platform_device *pdev)
  110. {
  111. struct edac_device_ctl_info *edac_ctl;
  112. struct device *dev = &pdev->dev;
  113. int rc;
  114. edac_ctl = edac_device_alloc_ctl_info(0, "cpu",
  115. num_possible_cpus(), "L", 2, 1,
  116. edac_device_alloc_index());
  117. if (!edac_ctl)
  118. return -ENOMEM;
  119. edac_ctl->edac_check = a72_edac_check;
  120. edac_ctl->dev = dev;
  121. edac_ctl->mod_name = dev_name(dev);
  122. edac_ctl->dev_name = dev_name(dev);
  123. edac_ctl->ctl_name = DRVNAME;
  124. dev_set_drvdata(dev, edac_ctl);
  125. rc = edac_device_add_device(edac_ctl);
  126. if (rc)
  127. goto out_dev;
  128. return 0;
  129. out_dev:
  130. edac_device_free_ctl_info(edac_ctl);
  131. return rc;
  132. }
  133. static void a72_edac_remove(struct platform_device *pdev)
  134. {
  135. struct edac_device_ctl_info *edac_ctl = dev_get_drvdata(&pdev->dev);
  136. edac_device_del_device(edac_ctl->dev);
  137. edac_device_free_ctl_info(edac_ctl);
  138. }
  139. static const struct of_device_id cortex_arm64_edac_of_match[] = {
  140. { .compatible = "arm,cortex-a72" },
  141. {}
  142. };
  143. MODULE_DEVICE_TABLE(of, cortex_arm64_edac_of_match);
  144. static struct platform_driver a72_edac_driver = {
  145. .probe = a72_edac_probe,
  146. .remove = a72_edac_remove,
  147. .driver = {
  148. .name = DRVNAME,
  149. },
  150. };
  151. static struct platform_device *a72_pdev;
  152. static int __init a72_edac_driver_init(void)
  153. {
  154. int cpu;
  155. for_each_possible_cpu(cpu) {
  156. struct device_node *np __free(device_node) = of_cpu_device_node_get(cpu);
  157. if (np) {
  158. if (of_match_node(cortex_arm64_edac_of_match, np) &&
  159. of_property_read_bool(np, "edac-enabled")) {
  160. cpumask_set_cpu(cpu, &compat_mask);
  161. }
  162. } else {
  163. pr_warn("failed to find device node for CPU %d\n", cpu);
  164. }
  165. }
  166. if (cpumask_empty(&compat_mask))
  167. return 0;
  168. a72_pdev = platform_device_register_simple(DRVNAME, -1, NULL, 0);
  169. if (IS_ERR(a72_pdev)) {
  170. pr_err("failed to register A72 EDAC device\n");
  171. return PTR_ERR(a72_pdev);
  172. }
  173. return platform_driver_register(&a72_edac_driver);
  174. }
  175. static void __exit a72_edac_driver_exit(void)
  176. {
  177. platform_device_unregister(a72_pdev);
  178. platform_driver_unregister(&a72_edac_driver);
  179. }
  180. module_init(a72_edac_driver_init);
  181. module_exit(a72_edac_driver_exit);
  182. MODULE_LICENSE("GPL");
  183. MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
  184. MODULE_DESCRIPTION("Cortex A72 L1 and L2 cache EDAC driver");