pci.c 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /* Copyright(c) 2020 Intel Corporation. All rights reserved. */
  3. #include <linux/unaligned.h>
  4. #include <linux/io-64-nonatomic-lo-hi.h>
  5. #include <linux/moduleparam.h>
  6. #include <linux/module.h>
  7. #include <linux/delay.h>
  8. #include <linux/sizes.h>
  9. #include <linux/mutex.h>
  10. #include <linux/list.h>
  11. #include <linux/pci.h>
  12. #include <linux/aer.h>
  13. #include <linux/io.h>
  14. #include <cxl/mailbox.h>
  15. #include "cxlmem.h"
  16. #include "cxlpci.h"
  17. #include "cxl.h"
  18. #include "pmu.h"
  19. /**
  20. * DOC: cxl pci
  21. *
  22. * This implements the PCI exclusive functionality for a CXL device as it is
  23. * defined by the Compute Express Link specification. CXL devices may surface
  24. * certain functionality even if it isn't CXL enabled. While this driver is
  25. * focused around the PCI specific aspects of a CXL device, it binds to the
  26. * specific CXL memory device class code, and therefore the implementation of
  27. * cxl_pci is focused around CXL memory devices.
  28. *
  29. * The driver has several responsibilities, mainly:
  30. * - Create the memX device and register on the CXL bus.
  31. * - Enumerate device's register interface and map them.
  32. * - Registers nvdimm bridge device with cxl_core.
  33. * - Registers a CXL mailbox with cxl_core.
  34. */
  35. #define cxl_doorbell_busy(cxlds) \
  36. (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \
  37. CXLDEV_MBOX_CTRL_DOORBELL)
  38. /* CXL 2.0 - 8.2.8.4 */
  39. #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
  40. /*
  41. * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to
  42. * dictate how long to wait for the mailbox to become ready. The new
  43. * field allows the device to tell software the amount of time to wait
  44. * before mailbox ready. This field per the spec theoretically allows
  45. * for up to 255 seconds. 255 seconds is unreasonably long, its longer
  46. * than the maximum SATA port link recovery wait. Default to 60 seconds
  47. * until someone builds a CXL device that needs more time in practice.
  48. */
  49. static unsigned short mbox_ready_timeout = 60;
  50. module_param(mbox_ready_timeout, ushort, 0644);
  51. MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready");
  52. static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds)
  53. {
  54. const unsigned long start = jiffies;
  55. unsigned long end = start;
  56. while (cxl_doorbell_busy(cxlds)) {
  57. end = jiffies;
  58. if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
  59. /* Check again in case preempted before timeout test */
  60. if (!cxl_doorbell_busy(cxlds))
  61. break;
  62. return -ETIMEDOUT;
  63. }
  64. cpu_relax();
  65. }
  66. dev_dbg(cxlds->dev, "Doorbell wait took %dms",
  67. jiffies_to_msecs(end) - jiffies_to_msecs(start));
  68. return 0;
  69. }
  70. #define cxl_err(dev, status, msg) \
  71. dev_err_ratelimited(dev, msg ", device state %s%s\n", \
  72. status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
  73. status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
  74. #define cxl_cmd_err(dev, cmd, status, msg) \
  75. dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \
  76. (cmd)->opcode, \
  77. status & CXLMDEV_DEV_FATAL ? " fatal" : "", \
  78. status & CXLMDEV_FW_HALT ? " firmware-halt" : "")
  79. /*
  80. * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique
  81. * wrapper object for each irq within the same cxlds.
  82. */
  83. struct cxl_dev_id {
  84. struct cxl_dev_state *cxlds;
  85. };
  86. static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq,
  87. irq_handler_t thread_fn)
  88. {
  89. struct device *dev = cxlds->dev;
  90. struct cxl_dev_id *dev_id;
  91. dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL);
  92. if (!dev_id)
  93. return -ENOMEM;
  94. dev_id->cxlds = cxlds;
  95. return devm_request_threaded_irq(dev, irq, NULL, thread_fn,
  96. IRQF_SHARED | IRQF_ONESHOT, NULL,
  97. dev_id);
  98. }
  99. static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds)
  100. {
  101. u64 reg;
  102. reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
  103. return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100;
  104. }
  105. static irqreturn_t cxl_pci_mbox_irq(int irq, void *id)
  106. {
  107. u64 reg;
  108. u16 opcode;
  109. struct cxl_dev_id *dev_id = id;
  110. struct cxl_dev_state *cxlds = dev_id->cxlds;
  111. struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
  112. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
  113. if (!cxl_mbox_background_complete(cxlds))
  114. return IRQ_NONE;
  115. reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
  116. opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg);
  117. if (opcode == CXL_MBOX_OP_SANITIZE) {
  118. mutex_lock(&cxl_mbox->mbox_mutex);
  119. if (mds->security.sanitize_node)
  120. mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0);
  121. mutex_unlock(&cxl_mbox->mbox_mutex);
  122. } else {
  123. /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */
  124. rcuwait_wake_up(&cxl_mbox->mbox_wait);
  125. }
  126. return IRQ_HANDLED;
  127. }
  128. /*
  129. * Sanitization operation polling mode.
  130. */
  131. static void cxl_mbox_sanitize_work(struct work_struct *work)
  132. {
  133. struct cxl_memdev_state *mds =
  134. container_of(work, typeof(*mds), security.poll_dwork.work);
  135. struct cxl_dev_state *cxlds = &mds->cxlds;
  136. struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
  137. mutex_lock(&cxl_mbox->mbox_mutex);
  138. if (cxl_mbox_background_complete(cxlds)) {
  139. mds->security.poll_tmo_secs = 0;
  140. if (mds->security.sanitize_node)
  141. sysfs_notify_dirent(mds->security.sanitize_node);
  142. mds->security.sanitize_active = false;
  143. dev_dbg(cxlds->dev, "Sanitization operation ended\n");
  144. } else {
  145. int timeout = mds->security.poll_tmo_secs + 10;
  146. mds->security.poll_tmo_secs = min(15 * 60, timeout);
  147. schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ);
  148. }
  149. mutex_unlock(&cxl_mbox->mbox_mutex);
  150. }
  151. /**
  152. * __cxl_pci_mbox_send_cmd() - Execute a mailbox command
  153. * @cxl_mbox: CXL mailbox context
  154. * @mbox_cmd: Command to send to the memory device.
  155. *
  156. * Context: Any context. Expects mbox_mutex to be held.
  157. * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
  158. * Caller should check the return code in @mbox_cmd to make sure it
  159. * succeeded.
  160. *
  161. * This is a generic form of the CXL mailbox send command thus only using the
  162. * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
  163. * devices, and perhaps other types of CXL devices may have further information
  164. * available upon error conditions. Driver facilities wishing to send mailbox
  165. * commands should use the wrapper command.
  166. *
  167. * The CXL spec allows for up to two mailboxes. The intention is for the primary
  168. * mailbox to be OS controlled and the secondary mailbox to be used by system
  169. * firmware. This allows the OS and firmware to communicate with the device and
  170. * not need to coordinate with each other. The driver only uses the primary
  171. * mailbox.
  172. */
  173. static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox,
  174. struct cxl_mbox_cmd *mbox_cmd)
  175. {
  176. struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox);
  177. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
  178. void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET;
  179. struct device *dev = cxlds->dev;
  180. u64 cmd_reg, status_reg;
  181. size_t out_len;
  182. int rc;
  183. lockdep_assert_held(&cxl_mbox->mbox_mutex);
  184. /*
  185. * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
  186. * 1. Caller reads MB Control Register to verify doorbell is clear
  187. * 2. Caller writes Command Register
  188. * 3. Caller writes Command Payload Registers if input payload is non-empty
  189. * 4. Caller writes MB Control Register to set doorbell
  190. * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured
  191. * 6. Caller reads MB Status Register to fetch Return code
  192. * 7. If command successful, Caller reads Command Register to get Payload Length
  193. * 8. If output payload is non-empty, host reads Command Payload Registers
  194. *
  195. * Hardware is free to do whatever it wants before the doorbell is rung,
  196. * and isn't allowed to change anything after it clears the doorbell. As
  197. * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
  198. * also happen in any order (though some orders might not make sense).
  199. */
  200. /* #1 */
  201. if (cxl_doorbell_busy(cxlds)) {
  202. u64 md_status =
  203. readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
  204. cxl_cmd_err(cxlds->dev, mbox_cmd, md_status,
  205. "mailbox queue busy");
  206. return -EBUSY;
  207. }
  208. /*
  209. * With sanitize polling, hardware might be done and the poller still
  210. * not be in sync. Ensure no new command comes in until so. Keep the
  211. * hardware semantics and only allow device health status.
  212. */
  213. if (mds->security.poll_tmo_secs > 0) {
  214. if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO)
  215. return -EBUSY;
  216. }
  217. cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
  218. mbox_cmd->opcode);
  219. if (mbox_cmd->size_in) {
  220. if (WARN_ON(!mbox_cmd->payload_in))
  221. return -EINVAL;
  222. cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
  223. mbox_cmd->size_in);
  224. memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
  225. }
  226. /* #2, #3 */
  227. writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
  228. /* #4 */
  229. dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode);
  230. writel(CXLDEV_MBOX_CTRL_DOORBELL,
  231. cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
  232. /* #5 */
  233. rc = cxl_pci_mbox_wait_for_doorbell(cxlds);
  234. if (rc == -ETIMEDOUT) {
  235. u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
  236. cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout");
  237. return rc;
  238. }
  239. /* #6 */
  240. status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET);
  241. mbox_cmd->return_code =
  242. FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
  243. /*
  244. * Handle the background command in a synchronous manner.
  245. *
  246. * All other mailbox commands will serialize/queue on the mbox_mutex,
  247. * which we currently hold. Furthermore this also guarantees that
  248. * cxl_mbox_background_complete() checks are safe amongst each other,
  249. * in that no new bg operation can occur in between.
  250. *
  251. * Background operations are timesliced in accordance with the nature
  252. * of the command. In the event of timeout, the mailbox state is
  253. * indeterminate until the next successful command submission and the
  254. * driver can get back in sync with the hardware state.
  255. */
  256. if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) {
  257. u64 bg_status_reg;
  258. int i, timeout;
  259. /*
  260. * Sanitization is a special case which monopolizes the device
  261. * and cannot be timesliced. Handle asynchronously instead,
  262. * and allow userspace to poll(2) for completion.
  263. */
  264. if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) {
  265. if (mds->security.sanitize_active)
  266. return -EBUSY;
  267. /* give first timeout a second */
  268. timeout = 1;
  269. mds->security.poll_tmo_secs = timeout;
  270. mds->security.sanitize_active = true;
  271. schedule_delayed_work(&mds->security.poll_dwork,
  272. timeout * HZ);
  273. dev_dbg(dev, "Sanitization operation started\n");
  274. goto success;
  275. }
  276. dev_dbg(dev, "Mailbox background operation (0x%04x) started\n",
  277. mbox_cmd->opcode);
  278. timeout = mbox_cmd->poll_interval_ms;
  279. for (i = 0; i < mbox_cmd->poll_count; i++) {
  280. if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait,
  281. cxl_mbox_background_complete(cxlds),
  282. TASK_UNINTERRUPTIBLE,
  283. msecs_to_jiffies(timeout)) > 0)
  284. break;
  285. }
  286. if (!cxl_mbox_background_complete(cxlds)) {
  287. dev_err(dev, "timeout waiting for background (%d ms)\n",
  288. timeout * mbox_cmd->poll_count);
  289. return -ETIMEDOUT;
  290. }
  291. bg_status_reg = readq(cxlds->regs.mbox +
  292. CXLDEV_MBOX_BG_CMD_STATUS_OFFSET);
  293. mbox_cmd->return_code =
  294. FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK,
  295. bg_status_reg);
  296. dev_dbg(dev,
  297. "Mailbox background operation (0x%04x) completed\n",
  298. mbox_cmd->opcode);
  299. }
  300. if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) {
  301. dev_dbg(dev, "Mailbox operation had an error: %s\n",
  302. cxl_mbox_cmd_rc2str(mbox_cmd));
  303. return 0; /* completed but caller must check return_code */
  304. }
  305. success:
  306. /* #7 */
  307. cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET);
  308. out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
  309. /* #8 */
  310. if (out_len && mbox_cmd->payload_out) {
  311. /*
  312. * Sanitize the copy. If hardware misbehaves, out_len per the
  313. * spec can actually be greater than the max allowed size (21
  314. * bits available but spec defined 1M max). The caller also may
  315. * have requested less data than the hardware supplied even
  316. * within spec.
  317. */
  318. size_t n;
  319. n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len);
  320. memcpy_fromio(mbox_cmd->payload_out, payload, n);
  321. mbox_cmd->size_out = n;
  322. } else {
  323. mbox_cmd->size_out = 0;
  324. }
  325. return 0;
  326. }
  327. static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox,
  328. struct cxl_mbox_cmd *cmd)
  329. {
  330. int rc;
  331. mutex_lock(&cxl_mbox->mbox_mutex);
  332. rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd);
  333. mutex_unlock(&cxl_mbox->mbox_mutex);
  334. return rc;
  335. }
  336. static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail)
  337. {
  338. struct cxl_dev_state *cxlds = &mds->cxlds;
  339. struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox;
  340. const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET);
  341. struct device *dev = cxlds->dev;
  342. unsigned long timeout;
  343. int irq, msgnum;
  344. u64 md_status;
  345. u32 ctrl;
  346. timeout = jiffies + mbox_ready_timeout * HZ;
  347. do {
  348. md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET);
  349. if (md_status & CXLMDEV_MBOX_IF_READY)
  350. break;
  351. if (msleep_interruptible(100))
  352. break;
  353. } while (!time_after(jiffies, timeout));
  354. if (!(md_status & CXLMDEV_MBOX_IF_READY)) {
  355. cxl_err(dev, md_status, "timeout awaiting mailbox ready");
  356. return -ETIMEDOUT;
  357. }
  358. /*
  359. * A command may be in flight from a previous driver instance,
  360. * think kexec, do one doorbell wait so that
  361. * __cxl_pci_mbox_send_cmd() can assume that it is the only
  362. * source for future doorbell busy events.
  363. */
  364. if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) {
  365. cxl_err(dev, md_status, "timeout awaiting mailbox idle");
  366. return -ETIMEDOUT;
  367. }
  368. cxl_mbox->mbox_send = cxl_pci_mbox_send;
  369. cxl_mbox->payload_size =
  370. 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
  371. /*
  372. * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
  373. *
  374. * If the size is too small, mandatory commands will not work and so
  375. * there's no point in going forward. If the size is too large, there's
  376. * no harm is soft limiting it.
  377. */
  378. cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M);
  379. if (cxl_mbox->payload_size < 256) {
  380. dev_err(dev, "Mailbox is too small (%zub)",
  381. cxl_mbox->payload_size);
  382. return -ENXIO;
  383. }
  384. dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size);
  385. INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work);
  386. /* background command interrupts are optional */
  387. if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail)
  388. return 0;
  389. msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap);
  390. irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum);
  391. if (irq < 0)
  392. return 0;
  393. if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq))
  394. return 0;
  395. dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n");
  396. /* enable background command mbox irq support */
  397. ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
  398. ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ;
  399. writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET);
  400. return 0;
  401. }
  402. /*
  403. * Assume that any RCIEP that emits the CXL memory expander class code
  404. * is an RCD
  405. */
  406. static bool is_cxl_restricted(struct pci_dev *pdev)
  407. {
  408. return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END;
  409. }
  410. static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev,
  411. struct cxl_register_map *map,
  412. struct cxl_dport *dport)
  413. {
  414. resource_size_t component_reg_phys;
  415. *map = (struct cxl_register_map) {
  416. .host = &pdev->dev,
  417. .resource = CXL_RESOURCE_NONE,
  418. };
  419. struct cxl_port *port __free(put_cxl_port) =
  420. cxl_pci_find_port(pdev, &dport);
  421. if (!port)
  422. return -EPROBE_DEFER;
  423. component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport);
  424. if (component_reg_phys == CXL_RESOURCE_NONE)
  425. return -ENXIO;
  426. map->resource = component_reg_phys;
  427. map->reg_type = CXL_REGLOC_RBI_COMPONENT;
  428. map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE;
  429. return 0;
  430. }
  431. static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type,
  432. struct cxl_register_map *map)
  433. {
  434. int rc;
  435. rc = cxl_find_regblock(pdev, type, map);
  436. /*
  437. * If the Register Locator DVSEC does not exist, check if it
  438. * is an RCH and try to extract the Component Registers from
  439. * an RCRB.
  440. */
  441. if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) {
  442. struct cxl_dport *dport;
  443. struct cxl_port *port __free(put_cxl_port) =
  444. cxl_pci_find_port(pdev, &dport);
  445. if (!port)
  446. return -EPROBE_DEFER;
  447. rc = cxl_rcrb_get_comp_regs(pdev, map, dport);
  448. if (rc)
  449. return rc;
  450. rc = cxl_dport_map_rcd_linkcap(pdev, dport);
  451. if (rc)
  452. return rc;
  453. } else if (rc) {
  454. return rc;
  455. }
  456. return cxl_setup_regs(map);
  457. }
  458. static void free_event_buf(void *buf)
  459. {
  460. kvfree(buf);
  461. }
  462. /*
  463. * There is a single buffer for reading event logs from the mailbox. All logs
  464. * share this buffer protected by the mds->event_log_lock.
  465. */
  466. static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds)
  467. {
  468. struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
  469. struct cxl_get_event_payload *buf;
  470. buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL);
  471. if (!buf)
  472. return -ENOMEM;
  473. mds->event.buf = buf;
  474. return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf);
  475. }
  476. static bool cxl_alloc_irq_vectors(struct pci_dev *pdev)
  477. {
  478. int nvecs;
  479. /*
  480. * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must
  481. * not generate INTx messages if that function participates in
  482. * CXL.cache or CXL.mem.
  483. *
  484. * Additionally pci_alloc_irq_vectors() handles calling
  485. * pci_free_irq_vectors() automatically despite not being called
  486. * pcim_*. See pci_setup_msi_context().
  487. */
  488. nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS,
  489. PCI_IRQ_MSIX | PCI_IRQ_MSI);
  490. if (nvecs < 1) {
  491. dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs);
  492. return false;
  493. }
  494. return true;
  495. }
  496. static irqreturn_t cxl_event_thread(int irq, void *id)
  497. {
  498. struct cxl_dev_id *dev_id = id;
  499. struct cxl_dev_state *cxlds = dev_id->cxlds;
  500. struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds);
  501. u32 status;
  502. do {
  503. /*
  504. * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status;
  505. * ignore the reserved upper 32 bits
  506. */
  507. status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET);
  508. /* Ignore logs unknown to the driver */
  509. status &= CXLDEV_EVENT_STATUS_ALL;
  510. if (!status)
  511. break;
  512. cxl_mem_get_event_records(mds, status);
  513. cond_resched();
  514. } while (status);
  515. return IRQ_HANDLED;
  516. }
  517. static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting)
  518. {
  519. struct pci_dev *pdev = to_pci_dev(cxlds->dev);
  520. int irq;
  521. if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX)
  522. return -ENXIO;
  523. irq = pci_irq_vector(pdev,
  524. FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting));
  525. if (irq < 0)
  526. return irq;
  527. return cxl_request_irq(cxlds, irq, cxl_event_thread);
  528. }
  529. static int cxl_event_get_int_policy(struct cxl_memdev_state *mds,
  530. struct cxl_event_interrupt_policy *policy)
  531. {
  532. struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
  533. struct cxl_mbox_cmd mbox_cmd = {
  534. .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY,
  535. .payload_out = policy,
  536. .size_out = sizeof(*policy),
  537. };
  538. int rc;
  539. rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
  540. if (rc < 0)
  541. dev_err(mds->cxlds.dev,
  542. "Failed to get event interrupt policy : %d", rc);
  543. return rc;
  544. }
  545. static int cxl_event_config_msgnums(struct cxl_memdev_state *mds,
  546. struct cxl_event_interrupt_policy *policy)
  547. {
  548. struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox;
  549. struct cxl_mbox_cmd mbox_cmd;
  550. int rc;
  551. *policy = (struct cxl_event_interrupt_policy) {
  552. .info_settings = CXL_INT_MSI_MSIX,
  553. .warn_settings = CXL_INT_MSI_MSIX,
  554. .failure_settings = CXL_INT_MSI_MSIX,
  555. .fatal_settings = CXL_INT_MSI_MSIX,
  556. };
  557. mbox_cmd = (struct cxl_mbox_cmd) {
  558. .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY,
  559. .payload_in = policy,
  560. .size_in = sizeof(*policy),
  561. };
  562. rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd);
  563. if (rc < 0) {
  564. dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d",
  565. rc);
  566. return rc;
  567. }
  568. /* Retrieve final interrupt settings */
  569. return cxl_event_get_int_policy(mds, policy);
  570. }
  571. static int cxl_event_irqsetup(struct cxl_memdev_state *mds)
  572. {
  573. struct cxl_dev_state *cxlds = &mds->cxlds;
  574. struct cxl_event_interrupt_policy policy;
  575. int rc;
  576. rc = cxl_event_config_msgnums(mds, &policy);
  577. if (rc)
  578. return rc;
  579. rc = cxl_event_req_irq(cxlds, policy.info_settings);
  580. if (rc) {
  581. dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n");
  582. return rc;
  583. }
  584. rc = cxl_event_req_irq(cxlds, policy.warn_settings);
  585. if (rc) {
  586. dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n");
  587. return rc;
  588. }
  589. rc = cxl_event_req_irq(cxlds, policy.failure_settings);
  590. if (rc) {
  591. dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n");
  592. return rc;
  593. }
  594. rc = cxl_event_req_irq(cxlds, policy.fatal_settings);
  595. if (rc) {
  596. dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n");
  597. return rc;
  598. }
  599. return 0;
  600. }
  601. static bool cxl_event_int_is_fw(u8 setting)
  602. {
  603. u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting);
  604. return mode == CXL_INT_FW;
  605. }
  606. static int cxl_event_config(struct pci_host_bridge *host_bridge,
  607. struct cxl_memdev_state *mds, bool irq_avail)
  608. {
  609. struct cxl_event_interrupt_policy policy;
  610. int rc;
  611. /*
  612. * When BIOS maintains CXL error reporting control, it will process
  613. * event records. Only one agent can do so.
  614. */
  615. if (!host_bridge->native_cxl_error)
  616. return 0;
  617. if (!irq_avail) {
  618. dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n");
  619. return 0;
  620. }
  621. rc = cxl_event_get_int_policy(mds, &policy);
  622. if (rc)
  623. return rc;
  624. if (cxl_event_int_is_fw(policy.info_settings) ||
  625. cxl_event_int_is_fw(policy.warn_settings) ||
  626. cxl_event_int_is_fw(policy.failure_settings) ||
  627. cxl_event_int_is_fw(policy.fatal_settings)) {
  628. dev_err(mds->cxlds.dev,
  629. "FW still in control of Event Logs despite _OSC settings\n");
  630. return -EBUSY;
  631. }
  632. rc = cxl_mem_alloc_event_buf(mds);
  633. if (rc)
  634. return rc;
  635. rc = cxl_event_irqsetup(mds);
  636. if (rc)
  637. return rc;
  638. cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL);
  639. return 0;
  640. }
  641. static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds)
  642. {
  643. int rc;
  644. /*
  645. * Fail the init if there's no mailbox. For a type3 this is out of spec.
  646. */
  647. if (!cxlds->reg_map.device_map.mbox.valid)
  648. return -ENODEV;
  649. rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev);
  650. if (rc)
  651. return rc;
  652. return 0;
  653. }
  654. static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width)
  655. {
  656. struct cxl_dev_state *cxlds = dev_get_drvdata(dev);
  657. struct cxl_memdev *cxlmd = cxlds->cxlmd;
  658. struct device *root_dev;
  659. struct cxl_dport *dport;
  660. struct cxl_port *root __free(put_cxl_port) =
  661. cxl_mem_find_port(cxlmd, &dport);
  662. if (!root)
  663. return -ENXIO;
  664. root_dev = root->uport_dev;
  665. if (!root_dev)
  666. return -ENXIO;
  667. if (!dport->regs.rcd_pcie_cap)
  668. return -ENXIO;
  669. guard(device)(root_dev);
  670. if (!root_dev->driver)
  671. return -ENXIO;
  672. switch (width) {
  673. case 2:
  674. return sysfs_emit(buf, "%#x\n",
  675. readw(dport->regs.rcd_pcie_cap + offset));
  676. case 4:
  677. return sysfs_emit(buf, "%#x\n",
  678. readl(dport->regs.rcd_pcie_cap + offset));
  679. default:
  680. return -EINVAL;
  681. }
  682. }
  683. static ssize_t rcd_link_cap_show(struct device *dev,
  684. struct device_attribute *attr, char *buf)
  685. {
  686. return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32));
  687. }
  688. static DEVICE_ATTR_RO(rcd_link_cap);
  689. static ssize_t rcd_link_ctrl_show(struct device *dev,
  690. struct device_attribute *attr, char *buf)
  691. {
  692. return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16));
  693. }
  694. static DEVICE_ATTR_RO(rcd_link_ctrl);
  695. static ssize_t rcd_link_status_show(struct device *dev,
  696. struct device_attribute *attr, char *buf)
  697. {
  698. return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16));
  699. }
  700. static DEVICE_ATTR_RO(rcd_link_status);
  701. static struct attribute *cxl_rcd_attrs[] = {
  702. &dev_attr_rcd_link_cap.attr,
  703. &dev_attr_rcd_link_ctrl.attr,
  704. &dev_attr_rcd_link_status.attr,
  705. NULL
  706. };
  707. static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n)
  708. {
  709. struct device *dev = kobj_to_dev(kobj);
  710. struct pci_dev *pdev = to_pci_dev(dev);
  711. if (is_cxl_restricted(pdev))
  712. return a->mode;
  713. return 0;
  714. }
  715. static struct attribute_group cxl_rcd_group = {
  716. .attrs = cxl_rcd_attrs,
  717. .is_visible = cxl_rcd_visible,
  718. };
  719. __ATTRIBUTE_GROUPS(cxl_rcd);
  720. static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  721. {
  722. struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus);
  723. struct cxl_dpa_info range_info = { 0 };
  724. struct cxl_memdev_state *mds;
  725. struct cxl_dev_state *cxlds;
  726. struct cxl_register_map map;
  727. struct cxl_memdev *cxlmd;
  728. int rc, pmu_count;
  729. unsigned int i;
  730. bool irq_avail;
  731. rc = pcim_enable_device(pdev);
  732. if (rc)
  733. return rc;
  734. pci_set_master(pdev);
  735. mds = cxl_memdev_state_create(&pdev->dev);
  736. if (IS_ERR(mds))
  737. return PTR_ERR(mds);
  738. cxlds = &mds->cxlds;
  739. pci_set_drvdata(pdev, cxlds);
  740. cxlds->rcd = is_cxl_restricted(pdev);
  741. cxlds->serial = pci_get_dsn(pdev);
  742. cxlds->cxl_dvsec = pci_find_dvsec_capability(
  743. pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE);
  744. if (!cxlds->cxl_dvsec)
  745. dev_warn(&pdev->dev,
  746. "Device DVSEC not present, skip CXL.mem init\n");
  747. rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map);
  748. if (rc)
  749. return rc;
  750. rc = cxl_map_device_regs(&map, &cxlds->regs);
  751. if (rc)
  752. return rc;
  753. /*
  754. * If the component registers can't be found, the cxl_pci driver may
  755. * still be useful for management functions so don't return an error.
  756. */
  757. rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT,
  758. &cxlds->reg_map);
  759. if (rc)
  760. dev_warn(&pdev->dev, "No component registers (%d)\n", rc);
  761. else if (!cxlds->reg_map.component_map.ras.valid)
  762. dev_dbg(&pdev->dev, "RAS registers not found\n");
  763. rc = cxl_pci_type3_init_mailbox(cxlds);
  764. if (rc)
  765. return rc;
  766. rc = cxl_await_media_ready(cxlds);
  767. if (rc == 0)
  768. cxlds->media_ready = true;
  769. else
  770. dev_warn(&pdev->dev, "Media not active (%d)\n", rc);
  771. irq_avail = cxl_alloc_irq_vectors(pdev);
  772. rc = cxl_pci_setup_mailbox(mds, irq_avail);
  773. if (rc)
  774. return rc;
  775. rc = cxl_enumerate_cmds(mds);
  776. if (rc)
  777. return rc;
  778. rc = cxl_set_timestamp(mds);
  779. if (rc)
  780. return rc;
  781. rc = cxl_poison_state_init(mds);
  782. if (rc)
  783. return rc;
  784. rc = cxl_dev_state_identify(mds);
  785. if (rc)
  786. return rc;
  787. rc = cxl_mem_dpa_fetch(mds, &range_info);
  788. if (rc)
  789. return rc;
  790. rc = cxl_dpa_setup(cxlds, &range_info);
  791. if (rc)
  792. return rc;
  793. rc = devm_cxl_setup_features(cxlds);
  794. if (rc)
  795. dev_dbg(&pdev->dev, "No CXL Features discovered\n");
  796. cxlmd = devm_cxl_add_memdev(cxlds, NULL);
  797. if (IS_ERR(cxlmd))
  798. return PTR_ERR(cxlmd);
  799. rc = devm_cxl_setup_fw_upload(&pdev->dev, mds);
  800. if (rc)
  801. return rc;
  802. rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd);
  803. if (rc)
  804. return rc;
  805. rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
  806. if (rc)
  807. dev_dbg(&pdev->dev, "No CXL FWCTL setup\n");
  808. pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU);
  809. if (pmu_count < 0)
  810. return pmu_count;
  811. for (i = 0; i < pmu_count; i++) {
  812. struct cxl_pmu_regs pmu_regs;
  813. rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i);
  814. if (rc) {
  815. dev_dbg(&pdev->dev, "Could not find PMU regblock\n");
  816. break;
  817. }
  818. rc = cxl_map_pmu_regs(&map, &pmu_regs);
  819. if (rc) {
  820. dev_dbg(&pdev->dev, "Could not map PMU regs\n");
  821. break;
  822. }
  823. rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV);
  824. if (rc) {
  825. dev_dbg(&pdev->dev, "Could not add PMU instance\n");
  826. break;
  827. }
  828. }
  829. rc = cxl_event_config(host_bridge, mds, irq_avail);
  830. if (rc)
  831. return rc;
  832. pci_save_state(pdev);
  833. return rc;
  834. }
  835. static const struct pci_device_id cxl_mem_pci_tbl[] = {
  836. /* PCI class code for CXL.mem Type-3 Devices */
  837. { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
  838. { /* terminate list */ },
  839. };
  840. MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
  841. static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev)
  842. {
  843. struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
  844. struct cxl_memdev *cxlmd = cxlds->cxlmd;
  845. struct device *dev = &cxlmd->dev;
  846. dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n",
  847. dev_name(dev));
  848. pci_restore_state(pdev);
  849. if (device_attach(dev) <= 0)
  850. return PCI_ERS_RESULT_DISCONNECT;
  851. return PCI_ERS_RESULT_RECOVERED;
  852. }
  853. static void cxl_error_resume(struct pci_dev *pdev)
  854. {
  855. struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
  856. struct cxl_memdev *cxlmd = cxlds->cxlmd;
  857. struct device *dev = &cxlmd->dev;
  858. dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev),
  859. dev->driver ? "successful" : "failed");
  860. }
  861. static void cxl_reset_done(struct pci_dev *pdev)
  862. {
  863. struct cxl_dev_state *cxlds = pci_get_drvdata(pdev);
  864. struct cxl_memdev *cxlmd = cxlds->cxlmd;
  865. struct device *dev = &pdev->dev;
  866. /*
  867. * FLR does not expect to touch the HDM decoders and related
  868. * registers. SBR, however, will wipe all device configurations.
  869. * Issue a warning if there was an active decoder before the reset
  870. * that no longer exists.
  871. */
  872. guard(device)(&cxlmd->dev);
  873. if (cxlmd->endpoint &&
  874. cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) {
  875. dev_crit(dev, "SBR happened without memory regions removal.\n");
  876. dev_crit(dev, "System may be unstable if regions hosted system memory.\n");
  877. add_taint(TAINT_USER, LOCKDEP_STILL_OK);
  878. }
  879. }
  880. static const struct pci_error_handlers cxl_error_handlers = {
  881. .error_detected = cxl_error_detected,
  882. .slot_reset = cxl_slot_reset,
  883. .resume = cxl_error_resume,
  884. .cor_error_detected = cxl_cor_error_detected,
  885. .reset_done = cxl_reset_done,
  886. };
  887. static struct pci_driver cxl_pci_driver = {
  888. .name = KBUILD_MODNAME,
  889. .id_table = cxl_mem_pci_tbl,
  890. .probe = cxl_pci_probe,
  891. .err_handler = &cxl_error_handlers,
  892. .dev_groups = cxl_rcd_groups,
  893. .driver = {
  894. .probe_type = PROBE_PREFER_ASYNCHRONOUS,
  895. },
  896. };
  897. #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0)
  898. static void cxl_handle_cper_event(enum cxl_event_type ev_type,
  899. struct cxl_cper_event_rec *rec)
  900. {
  901. struct cper_cxl_event_devid *device_id = &rec->hdr.device_id;
  902. struct pci_dev *pdev __free(pci_dev_put) = NULL;
  903. enum cxl_event_log_type log_type;
  904. struct cxl_dev_state *cxlds;
  905. unsigned int devfn;
  906. u32 hdr_flags;
  907. pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type,
  908. device_id->segment_num, device_id->bus_num,
  909. device_id->device_num, device_id->func_num);
  910. devfn = PCI_DEVFN(device_id->device_num, device_id->func_num);
  911. pdev = pci_get_domain_bus_and_slot(device_id->segment_num,
  912. device_id->bus_num, devfn);
  913. if (!pdev)
  914. return;
  915. guard(device)(&pdev->dev);
  916. if (pdev->driver != &cxl_pci_driver)
  917. return;
  918. cxlds = pci_get_drvdata(pdev);
  919. if (!cxlds)
  920. return;
  921. /* Fabricate a log type */
  922. hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags);
  923. log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags);
  924. cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type,
  925. &uuid_null, &rec->event);
  926. }
  927. static void cxl_cper_work_fn(struct work_struct *work)
  928. {
  929. struct cxl_cper_work_data wd;
  930. while (cxl_cper_kfifo_get(&wd))
  931. cxl_handle_cper_event(wd.event_type, &wd.rec);
  932. }
  933. static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn);
  934. static int __init cxl_pci_driver_init(void)
  935. {
  936. int rc;
  937. rc = pci_register_driver(&cxl_pci_driver);
  938. if (rc)
  939. return rc;
  940. rc = cxl_cper_register_work(&cxl_cper_work);
  941. if (rc)
  942. pci_unregister_driver(&cxl_pci_driver);
  943. return rc;
  944. }
  945. static void __exit cxl_pci_driver_exit(void)
  946. {
  947. cxl_cper_unregister_work(&cxl_cper_work);
  948. cancel_work_sync(&cxl_cper_work);
  949. pci_unregister_driver(&cxl_pci_driver);
  950. }
  951. module_init(cxl_pci_driver_init);
  952. module_exit(cxl_pci_driver_exit);
  953. MODULE_DESCRIPTION("CXL: PCI manageability");
  954. MODULE_LICENSE("GPL v2");
  955. MODULE_IMPORT_NS("CXL");