imh_base.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Driver for Intel(R) servers with Integrated Memory/IO Hub-based memory controller.
  4. * Copyright (c) 2025, Intel Corporation.
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/io.h>
  8. #include <asm/cpu_device_id.h>
  9. #include <asm/intel-family.h>
  10. #include <asm/mce.h>
  11. #include <asm/cpu.h>
  12. #include "edac_module.h"
  13. #include "skx_common.h"
  14. #define IMH_REVISION "v0.0.1"
  15. #define EDAC_MOD_STR "imh_edac"
  16. /* Debug macros */
  17. #define imh_printk(level, fmt, arg...) \
  18. edac_printk(level, "imh", fmt, ##arg)
  19. /* Configuration Agent(Ubox) */
  20. #define MMIO_BASE_H(reg) (((u64)GET_BITFIELD(reg, 0, 29)) << 23)
  21. #define SOCKET_ID(reg) GET_BITFIELD(reg, 0, 3)
  22. /* PUNIT */
  23. #define DDR_IMC_BITMAP(reg) GET_BITFIELD(reg, 23, 30)
  24. /* Memory Controller */
  25. #define ECC_ENABLED(reg) GET_BITFIELD(reg, 2, 2)
  26. #define DIMM_POPULATED(reg) GET_BITFIELD(reg, 15, 15)
  27. /* System Cache Agent(SCA) */
  28. #define TOLM(reg) (((u64)GET_BITFIELD(reg, 16, 31)) << 16)
  29. #define TOHM(reg) (((u64)GET_BITFIELD(reg, 16, 51)) << 16)
  30. /* Home Agent (HA) */
  31. #define NMCACHING(reg) GET_BITFIELD(reg, 8, 8)
  32. /**
  33. * struct local_reg - A register as described in the local package view.
  34. *
  35. * @pkg: (input) The package where the register is located.
  36. * @pbase: (input) The IP MMIO base physical address in the local package view.
  37. * @size: (input) The IP MMIO size.
  38. * @offset: (input) The register offset from the IP MMIO base @pbase.
  39. * @width: (input) The register width in byte.
  40. * @vbase: (internal) The IP MMIO base virtual address.
  41. * @val: (output) The register value.
  42. */
  43. struct local_reg {
  44. int pkg;
  45. u64 pbase;
  46. u32 size;
  47. u32 offset;
  48. u8 width;
  49. void __iomem *vbase;
  50. u64 val;
  51. };
  52. #define DEFINE_LOCAL_REG(name, cfg, package, north, ip_name, ip_idx, reg_name) \
  53. struct local_reg name = { \
  54. .pkg = package, \
  55. .pbase = (north ? (cfg)->mmio_base_l_north : \
  56. (cfg)->mmio_base_l_south) + \
  57. (cfg)->ip_name##_base + \
  58. (cfg)->ip_name##_size * (ip_idx), \
  59. .size = (cfg)->ip_name##_size, \
  60. .offset = (cfg)->ip_name##_reg_##reg_name##_offset, \
  61. .width = (cfg)->ip_name##_reg_##reg_name##_width, \
  62. }
  63. static u64 readx(void __iomem *addr, u8 width)
  64. {
  65. switch (width) {
  66. case 1:
  67. return readb(addr);
  68. case 2:
  69. return readw(addr);
  70. case 4:
  71. return readl(addr);
  72. case 8:
  73. return readq(addr);
  74. default:
  75. imh_printk(KERN_ERR, "Invalid reg 0x%p width %d\n", addr, width);
  76. return 0;
  77. }
  78. }
  79. static void __read_local_reg(void *reg)
  80. {
  81. struct local_reg *r = (struct local_reg *)reg;
  82. r->val = readx(r->vbase + r->offset, r->width);
  83. }
  84. /* Read a local-view register. */
  85. static bool read_local_reg(struct local_reg *reg)
  86. {
  87. int cpu;
  88. /* Get the target CPU in the package @reg->pkg. */
  89. for_each_online_cpu(cpu) {
  90. if (reg->pkg == topology_physical_package_id(cpu))
  91. break;
  92. }
  93. if (cpu >= nr_cpu_ids)
  94. return false;
  95. reg->vbase = ioremap(reg->pbase, reg->size);
  96. if (!reg->vbase) {
  97. imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", reg->pbase);
  98. return false;
  99. }
  100. /* Get the target CPU to read the register. */
  101. smp_call_function_single(cpu, __read_local_reg, reg, 1);
  102. iounmap(reg->vbase);
  103. return true;
  104. }
  105. /* Get the bitmap of memory controller instances in package @pkg. */
  106. static u32 get_imc_bitmap(struct res_config *cfg, int pkg, bool north)
  107. {
  108. DEFINE_LOCAL_REG(reg, cfg, pkg, north, pcu, 0, capid3);
  109. if (!read_local_reg(&reg))
  110. return 0;
  111. edac_dbg(2, "Pkg%d %s mc instances bitmap 0x%llx (reg 0x%llx)\n",
  112. pkg, north ? "north" : "south",
  113. DDR_IMC_BITMAP(reg.val), reg.val);
  114. return DDR_IMC_BITMAP(reg.val);
  115. }
  116. static void imc_release(struct device *dev)
  117. {
  118. edac_dbg(2, "imc device %s released\n", dev_name(dev));
  119. kfree(dev);
  120. }
  121. static int __get_ddr_munits(struct res_config *cfg, struct skx_dev *d,
  122. bool north, int lmc)
  123. {
  124. unsigned long size = cfg->ddr_chan_mmio_sz * cfg->ddr_chan_num;
  125. unsigned long bitmap = get_imc_bitmap(cfg, d->pkg, north);
  126. void __iomem *mbase;
  127. struct device *dev;
  128. int i, rc, pmc;
  129. u64 base;
  130. for_each_set_bit(i, &bitmap, sizeof(bitmap) * 8) {
  131. base = north ? d->mmio_base_h_north : d->mmio_base_h_south;
  132. base += cfg->ddr_imc_base + size * i;
  133. edac_dbg(2, "Pkg%d mc%d mmio base 0x%llx size 0x%lx\n",
  134. d->pkg, lmc, base, size);
  135. /* Set up the imc MMIO. */
  136. mbase = ioremap(base, size);
  137. if (!mbase) {
  138. imh_printk(KERN_ERR, "Failed to ioremap 0x%llx\n", base);
  139. return -ENOMEM;
  140. }
  141. d->imc[lmc].mbase = mbase;
  142. d->imc[lmc].lmc = lmc;
  143. /* Create the imc device instance. */
  144. dev = kzalloc_obj(*dev);
  145. if (!dev)
  146. return -ENOMEM;
  147. dev->release = imc_release;
  148. device_initialize(dev);
  149. rc = dev_set_name(dev, "0x%llx", base);
  150. if (rc) {
  151. imh_printk(KERN_ERR, "Failed to set dev name\n");
  152. put_device(dev);
  153. return rc;
  154. }
  155. d->imc[lmc].dev = dev;
  156. /* Set up the imc index mapping. */
  157. pmc = north ? i : 8 + i;
  158. skx_set_mc_mapping(d, pmc, lmc);
  159. lmc++;
  160. }
  161. return lmc;
  162. }
  163. static bool get_ddr_munits(struct res_config *cfg, struct skx_dev *d)
  164. {
  165. int lmc = __get_ddr_munits(cfg, d, true, 0);
  166. if (lmc < 0)
  167. return false;
  168. lmc = __get_ddr_munits(cfg, d, false, lmc);
  169. if (lmc <= 0)
  170. return false;
  171. return true;
  172. }
  173. static bool get_socket_id(struct res_config *cfg, struct skx_dev *d)
  174. {
  175. DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ubox, 0, socket_id);
  176. u8 src_id;
  177. int i;
  178. if (!read_local_reg(&reg))
  179. return false;
  180. src_id = SOCKET_ID(reg.val);
  181. edac_dbg(2, "socket id 0x%x (reg 0x%llx)\n", src_id, reg.val);
  182. for (i = 0; i < cfg->ddr_imc_num; i++)
  183. d->imc[i].src_id = src_id;
  184. return true;
  185. }
  186. /* Get TOLM (Top Of Low Memory) and TOHM (Top Of High Memory) parameters. */
  187. static bool imh_get_tolm_tohm(struct res_config *cfg, u64 *tolm, u64 *tohm)
  188. {
  189. DEFINE_LOCAL_REG(reg, cfg, 0, true, sca, 0, tolm);
  190. if (!read_local_reg(&reg))
  191. return false;
  192. *tolm = TOLM(reg.val);
  193. edac_dbg(2, "tolm 0x%llx (reg 0x%llx)\n", *tolm, reg.val);
  194. DEFINE_LOCAL_REG(reg2, cfg, 0, true, sca, 0, tohm);
  195. if (!read_local_reg(&reg2))
  196. return false;
  197. *tohm = TOHM(reg2.val);
  198. edac_dbg(2, "tohm 0x%llx (reg 0x%llx)\n", *tohm, reg2.val);
  199. return true;
  200. }
  201. /* Get the system-view MMIO_BASE_H for {north,south}-IMH. */
  202. static int imh_get_all_mmio_base_h(struct res_config *cfg, struct list_head *edac_list)
  203. {
  204. int i, n = topology_max_packages(), imc_num = cfg->ddr_imc_num + cfg->hbm_imc_num;
  205. struct skx_dev *d;
  206. for (i = 0; i < n; i++) {
  207. d = kzalloc_flex(*d, imc, imc_num);
  208. if (!d)
  209. return -ENOMEM;
  210. DEFINE_LOCAL_REG(reg, cfg, i, true, ubox, 0, mmio_base);
  211. /* Get MMIO_BASE_H for the north-IMH. */
  212. if (!read_local_reg(&reg) || !reg.val) {
  213. kfree(d);
  214. imh_printk(KERN_ERR, "Pkg%d has no north mmio_base_h\n", i);
  215. return -ENODEV;
  216. }
  217. d->mmio_base_h_north = MMIO_BASE_H(reg.val);
  218. edac_dbg(2, "Pkg%d north mmio_base_h 0x%llx (reg 0x%llx)\n",
  219. i, d->mmio_base_h_north, reg.val);
  220. /* Get MMIO_BASE_H for the south-IMH (optional). */
  221. DEFINE_LOCAL_REG(reg2, cfg, i, false, ubox, 0, mmio_base);
  222. if (read_local_reg(&reg2)) {
  223. d->mmio_base_h_south = MMIO_BASE_H(reg2.val);
  224. edac_dbg(2, "Pkg%d south mmio_base_h 0x%llx (reg 0x%llx)\n",
  225. i, d->mmio_base_h_south, reg2.val);
  226. }
  227. d->pkg = i;
  228. d->num_imc = imc_num;
  229. skx_init_mc_mapping(d);
  230. list_add_tail(&d->list, edac_list);
  231. }
  232. return 0;
  233. }
  234. /* Get the number of per-package memory controllers. */
  235. static int imh_get_imc_num(struct res_config *cfg)
  236. {
  237. int imc_num = hweight32(get_imc_bitmap(cfg, 0, true)) +
  238. hweight32(get_imc_bitmap(cfg, 0, false));
  239. if (!imc_num) {
  240. imh_printk(KERN_ERR, "Invalid mc number\n");
  241. return -ENODEV;
  242. }
  243. if (cfg->ddr_imc_num != imc_num) {
  244. /*
  245. * Update the configuration data to reflect the number of
  246. * present DDR memory controllers.
  247. */
  248. cfg->ddr_imc_num = imc_num;
  249. edac_dbg(2, "Set ddr mc number %d\n", imc_num);
  250. }
  251. return 0;
  252. }
  253. /* Get all memory controllers' parameters. */
  254. static int imh_get_munits(struct res_config *cfg, struct list_head *edac_list)
  255. {
  256. struct skx_imc *imc;
  257. struct skx_dev *d;
  258. u8 mc = 0;
  259. int i;
  260. list_for_each_entry(d, edac_list, list) {
  261. if (!get_ddr_munits(cfg, d)) {
  262. imh_printk(KERN_ERR, "No mc found\n");
  263. return -ENODEV;
  264. }
  265. if (!get_socket_id(cfg, d)) {
  266. imh_printk(KERN_ERR, "Failed to get socket id\n");
  267. return -ENODEV;
  268. }
  269. for (i = 0; i < cfg->ddr_imc_num; i++) {
  270. imc = &d->imc[i];
  271. if (!imc->mbase)
  272. continue;
  273. imc->chan_mmio_sz = cfg->ddr_chan_mmio_sz;
  274. imc->num_channels = cfg->ddr_chan_num;
  275. imc->num_dimms = cfg->ddr_dimm_num;
  276. imc->mc = mc++;
  277. }
  278. }
  279. return 0;
  280. }
  281. static bool check_2lm_enabled(struct res_config *cfg, struct skx_dev *d, int ha_idx)
  282. {
  283. DEFINE_LOCAL_REG(reg, cfg, d->pkg, true, ha, ha_idx, mode);
  284. if (!read_local_reg(&reg))
  285. return false;
  286. if (!NMCACHING(reg.val))
  287. return false;
  288. edac_dbg(2, "2-level memory configuration (reg 0x%llx, ha idx %d)\n", reg.val, ha_idx);
  289. return true;
  290. }
  291. /* Check whether the system has a 2-level memory configuration. */
  292. static bool imh_2lm_enabled(struct res_config *cfg, struct list_head *head)
  293. {
  294. struct skx_dev *d;
  295. int i;
  296. list_for_each_entry(d, head, list) {
  297. for (i = 0; i < cfg->ddr_imc_num; i++)
  298. if (check_2lm_enabled(cfg, d, i))
  299. return true;
  300. }
  301. return false;
  302. }
  303. /* Helpers to read memory controller registers */
  304. static u64 read_imc_reg(struct skx_imc *imc, int chan, u32 offset, u8 width)
  305. {
  306. return readx(imc->mbase + imc->chan_mmio_sz * chan + offset, width);
  307. }
  308. static u32 read_imc_mcmtr(struct res_config *cfg, struct skx_imc *imc, int chan)
  309. {
  310. return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_mcmtr_offset, cfg->ddr_reg_mcmtr_width);
  311. }
  312. static u32 read_imc_dimmmtr(struct res_config *cfg, struct skx_imc *imc, int chan, int dimm)
  313. {
  314. return (u32)read_imc_reg(imc, chan, cfg->ddr_reg_dimmmtr_offset +
  315. cfg->ddr_reg_dimmmtr_width * dimm,
  316. cfg->ddr_reg_dimmmtr_width);
  317. }
  318. static bool ecc_enabled(u32 mcmtr)
  319. {
  320. return (bool)ECC_ENABLED(mcmtr);
  321. }
  322. static bool dimm_populated(u32 dimmmtr)
  323. {
  324. return (bool)DIMM_POPULATED(dimmmtr);
  325. }
  326. /* Get each DIMM's configurations of the memory controller @mci. */
  327. static int imh_get_dimm_config(struct mem_ctl_info *mci, struct res_config *cfg)
  328. {
  329. struct skx_pvt *pvt = mci->pvt_info;
  330. struct skx_imc *imc = pvt->imc;
  331. struct dimm_info *dimm;
  332. u32 mcmtr, dimmmtr;
  333. int i, j, ndimms;
  334. for (i = 0; i < imc->num_channels; i++) {
  335. if (!imc->mbase)
  336. continue;
  337. mcmtr = read_imc_mcmtr(cfg, imc, i);
  338. for (ndimms = 0, j = 0; j < imc->num_dimms; j++) {
  339. dimmmtr = read_imc_dimmmtr(cfg, imc, i, j);
  340. edac_dbg(1, "mcmtr 0x%x dimmmtr 0x%x (mc%d ch%d dimm%d)\n",
  341. mcmtr, dimmmtr, imc->mc, i, j);
  342. if (!dimm_populated(dimmmtr))
  343. continue;
  344. dimm = edac_get_dimm(mci, i, j, 0);
  345. ndimms += skx_get_dimm_info(dimmmtr, 0, 0, dimm,
  346. imc, i, j, cfg);
  347. }
  348. if (ndimms && !ecc_enabled(mcmtr)) {
  349. imh_printk(KERN_ERR, "ECC is disabled on mc%d ch%d\n",
  350. imc->mc, i);
  351. return -ENODEV;
  352. }
  353. }
  354. return 0;
  355. }
  356. /* Register all memory controllers to the EDAC core. */
  357. static int imh_register_mci(struct res_config *cfg, struct list_head *edac_list)
  358. {
  359. struct skx_imc *imc;
  360. struct skx_dev *d;
  361. int i, rc;
  362. list_for_each_entry(d, edac_list, list) {
  363. for (i = 0; i < cfg->ddr_imc_num; i++) {
  364. imc = &d->imc[i];
  365. if (!imc->mbase)
  366. continue;
  367. rc = skx_register_mci(imc, imc->dev,
  368. dev_name(imc->dev),
  369. "Intel IMH-based Socket",
  370. EDAC_MOD_STR,
  371. imh_get_dimm_config, cfg);
  372. if (rc)
  373. return rc;
  374. }
  375. }
  376. return 0;
  377. }
  378. static struct res_config dmr_cfg = {
  379. .type = DMR,
  380. .support_ddr5 = true,
  381. .mmio_base_l_north = 0xf6800000,
  382. .mmio_base_l_south = 0xf6000000,
  383. .ddr_chan_num = 1,
  384. .ddr_dimm_num = 2,
  385. .ddr_imc_base = 0x39b000,
  386. .ddr_chan_mmio_sz = 0x8000,
  387. .ddr_reg_mcmtr_offset = 0x360,
  388. .ddr_reg_mcmtr_width = 4,
  389. .ddr_reg_dimmmtr_offset = 0x370,
  390. .ddr_reg_dimmmtr_width = 4,
  391. .ubox_base = 0x0,
  392. .ubox_size = 0x2000,
  393. .ubox_reg_mmio_base_offset = 0x580,
  394. .ubox_reg_mmio_base_width = 4,
  395. .ubox_reg_socket_id_offset = 0x1080,
  396. .ubox_reg_socket_id_width = 4,
  397. .pcu_base = 0x3000,
  398. .pcu_size = 0x10000,
  399. .pcu_reg_capid3_offset = 0x290,
  400. .pcu_reg_capid3_width = 4,
  401. .sca_base = 0x24c000,
  402. .sca_size = 0x2500,
  403. .sca_reg_tolm_offset = 0x2100,
  404. .sca_reg_tolm_width = 8,
  405. .sca_reg_tohm_offset = 0x2108,
  406. .sca_reg_tohm_width = 8,
  407. .ha_base = 0x3eb000,
  408. .ha_size = 0x1000,
  409. .ha_reg_mode_offset = 0x4a0,
  410. .ha_reg_mode_width = 4,
  411. };
  412. static const struct x86_cpu_id imh_cpuids[] = {
  413. X86_MATCH_VFM(INTEL_DIAMONDRAPIDS_X, &dmr_cfg),
  414. {}
  415. };
  416. MODULE_DEVICE_TABLE(x86cpu, imh_cpuids);
  417. static struct notifier_block imh_mce_dec = {
  418. .notifier_call = skx_mce_check_error,
  419. .priority = MCE_PRIO_EDAC,
  420. };
  421. static int __init imh_init(void)
  422. {
  423. const struct x86_cpu_id *id;
  424. struct list_head *edac_list;
  425. struct res_config *cfg;
  426. const char *owner;
  427. u64 tolm, tohm;
  428. int rc;
  429. edac_dbg(2, "\n");
  430. if (ghes_get_devices())
  431. return -EBUSY;
  432. owner = edac_get_owner();
  433. if (owner && strncmp(owner, EDAC_MOD_STR, sizeof(EDAC_MOD_STR)))
  434. return -EBUSY;
  435. if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
  436. return -ENODEV;
  437. id = x86_match_cpu(imh_cpuids);
  438. if (!id)
  439. return -ENODEV;
  440. cfg = (struct res_config *)id->driver_data;
  441. skx_set_res_cfg(cfg);
  442. if (!imh_get_tolm_tohm(cfg, &tolm, &tohm))
  443. return -ENODEV;
  444. skx_set_hi_lo(tolm, tohm);
  445. rc = imh_get_imc_num(cfg);
  446. if (rc < 0)
  447. goto fail;
  448. edac_list = skx_get_edac_list();
  449. rc = imh_get_all_mmio_base_h(cfg, edac_list);
  450. if (rc)
  451. goto fail;
  452. rc = imh_get_munits(cfg, edac_list);
  453. if (rc)
  454. goto fail;
  455. skx_set_mem_cfg(imh_2lm_enabled(cfg, edac_list));
  456. rc = imh_register_mci(cfg, edac_list);
  457. if (rc)
  458. goto fail;
  459. rc = skx_adxl_get();
  460. if (rc)
  461. goto fail;
  462. opstate_init();
  463. mce_register_decode_chain(&imh_mce_dec);
  464. skx_setup_debug("imh_test");
  465. imh_printk(KERN_INFO, "%s\n", IMH_REVISION);
  466. return 0;
  467. fail:
  468. skx_remove();
  469. return rc;
  470. }
  471. static void __exit imh_exit(void)
  472. {
  473. edac_dbg(2, "\n");
  474. skx_teardown_debug();
  475. mce_unregister_decode_chain(&imh_mce_dec);
  476. skx_adxl_put();
  477. skx_remove();
  478. }
  479. module_init(imh_init);
  480. module_exit(imh_exit);
  481. MODULE_LICENSE("GPL");
  482. MODULE_AUTHOR("Qiuxu Zhuo");
  483. MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller");