iommu.c 48 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * IOMMU API for RISC-V IOMMU implementations.
  4. *
  5. * Copyright © 2022-2024 Rivos Inc.
  6. * Copyright © 2023 FORTH-ICS/CARV
  7. *
  8. * Authors
  9. * Tomasz Jeznach <tjeznach@rivosinc.com>
  10. * Nick Kossifidis <mick@ics.forth.gr>
  11. */
  12. #define pr_fmt(fmt) "riscv-iommu: " fmt
  13. #include <linux/acpi.h>
  14. #include <linux/acpi_rimt.h>
  15. #include <linux/compiler.h>
  16. #include <linux/crash_dump.h>
  17. #include <linux/init.h>
  18. #include <linux/iommu.h>
  19. #include <linux/iopoll.h>
  20. #include <linux/kernel.h>
  21. #include <linux/pci.h>
  22. #include "../iommu-pages.h"
  23. #include "iommu-bits.h"
  24. #include "iommu.h"
  25. /* Timeouts in [us] */
  26. #define RISCV_IOMMU_QCSR_TIMEOUT 150000
  27. #define RISCV_IOMMU_QUEUE_TIMEOUT 150000
  28. #define RISCV_IOMMU_DDTP_TIMEOUT 10000000
  29. #define RISCV_IOMMU_IOTINVAL_TIMEOUT 90000000
  30. /* Number of entries per CMD/FLT queue, should be <= INT_MAX */
  31. #define RISCV_IOMMU_DEF_CQ_COUNT 8192
  32. #define RISCV_IOMMU_DEF_FQ_COUNT 4096
  33. /* RISC-V IOMMU PPN <> PHYS address conversions, PHYS <=> PPN[53:10] */
  34. #define phys_to_ppn(pa) (((pa) >> 2) & (((1ULL << 44) - 1) << 10))
  35. #define ppn_to_phys(pn) (((pn) << 2) & (((1ULL << 44) - 1) << 12))
  36. #define dev_to_iommu(dev) \
  37. iommu_get_iommu_dev(dev, struct riscv_iommu_device, iommu)
  38. /* IOMMU PSCID allocation namespace. */
  39. static DEFINE_IDA(riscv_iommu_pscids);
  40. #define RISCV_IOMMU_MAX_PSCID (BIT(20) - 1)
  41. /* Device resource-managed allocations */
  42. struct riscv_iommu_devres {
  43. void *addr;
  44. };
  45. static void riscv_iommu_devres_pages_release(struct device *dev, void *res)
  46. {
  47. struct riscv_iommu_devres *devres = res;
  48. iommu_free_pages(devres->addr);
  49. }
  50. static int riscv_iommu_devres_pages_match(struct device *dev, void *res, void *p)
  51. {
  52. struct riscv_iommu_devres *devres = res;
  53. struct riscv_iommu_devres *target = p;
  54. return devres->addr == target->addr;
  55. }
  56. static void *riscv_iommu_get_pages(struct riscv_iommu_device *iommu,
  57. unsigned int size)
  58. {
  59. struct riscv_iommu_devres *devres;
  60. void *addr;
  61. addr = iommu_alloc_pages_node_sz(dev_to_node(iommu->dev),
  62. GFP_KERNEL_ACCOUNT, size);
  63. if (unlikely(!addr))
  64. return NULL;
  65. devres = devres_alloc(riscv_iommu_devres_pages_release,
  66. sizeof(struct riscv_iommu_devres), GFP_KERNEL);
  67. if (unlikely(!devres)) {
  68. iommu_free_pages(addr);
  69. return NULL;
  70. }
  71. devres->addr = addr;
  72. devres_add(iommu->dev, devres);
  73. return addr;
  74. }
  75. static void riscv_iommu_free_pages(struct riscv_iommu_device *iommu, void *addr)
  76. {
  77. struct riscv_iommu_devres devres = { .addr = addr };
  78. devres_release(iommu->dev, riscv_iommu_devres_pages_release,
  79. riscv_iommu_devres_pages_match, &devres);
  80. }
  81. /*
  82. * Hardware queue allocation and management.
  83. */
  84. /* Setup queue base, control registers and default queue length */
  85. #define RISCV_IOMMU_QUEUE_INIT(q, name) do { \
  86. struct riscv_iommu_queue *_q = q; \
  87. _q->qid = RISCV_IOMMU_INTR_ ## name; \
  88. _q->qbr = RISCV_IOMMU_REG_ ## name ## B; \
  89. _q->qcr = RISCV_IOMMU_REG_ ## name ## CSR; \
  90. _q->mask = _q->mask ?: (RISCV_IOMMU_DEF_ ## name ## _COUNT) - 1;\
  91. } while (0)
  92. /* Note: offsets are the same for all queues */
  93. #define Q_HEAD(q) ((q)->qbr + (RISCV_IOMMU_REG_CQH - RISCV_IOMMU_REG_CQB))
  94. #define Q_TAIL(q) ((q)->qbr + (RISCV_IOMMU_REG_CQT - RISCV_IOMMU_REG_CQB))
  95. #define Q_ITEM(q, index) ((q)->mask & (index))
  96. #define Q_IPSR(q) BIT((q)->qid)
  97. /*
  98. * Discover queue ring buffer hardware configuration, allocate in-memory
  99. * ring buffer or use fixed I/O memory location, configure queue base register.
  100. * Must be called before hardware queue is enabled.
  101. *
  102. * @queue - data structure, configured with RISCV_IOMMU_QUEUE_INIT()
  103. * @entry_size - queue single element size in bytes.
  104. */
  105. static int riscv_iommu_queue_alloc(struct riscv_iommu_device *iommu,
  106. struct riscv_iommu_queue *queue,
  107. size_t entry_size)
  108. {
  109. unsigned int logsz;
  110. u64 qb, rb;
  111. /*
  112. * Use WARL base register property to discover maximum allowed
  113. * number of entries and optional fixed IO address for queue location.
  114. */
  115. riscv_iommu_writeq(iommu, queue->qbr, RISCV_IOMMU_QUEUE_LOG2SZ_FIELD);
  116. qb = riscv_iommu_readq(iommu, queue->qbr);
  117. /*
  118. * Calculate and verify hardware supported queue length, as reported
  119. * by the field LOG2SZ, where max queue length is equal to 2^(LOG2SZ + 1).
  120. * Update queue size based on hardware supported value.
  121. */
  122. logsz = ilog2(queue->mask);
  123. if (logsz > FIELD_GET(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, qb))
  124. logsz = FIELD_GET(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, qb);
  125. /*
  126. * Use WARL base register property to discover an optional fixed IO
  127. * address for queue ring buffer location. Otherwise allocate contiguous
  128. * system memory.
  129. */
  130. if (FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb)) {
  131. const size_t queue_size = entry_size << (logsz + 1);
  132. queue->phys = pfn_to_phys(FIELD_GET(RISCV_IOMMU_PPN_FIELD, qb));
  133. queue->base = devm_ioremap(iommu->dev, queue->phys, queue_size);
  134. } else {
  135. do {
  136. const size_t queue_size = entry_size << (logsz + 1);
  137. queue->base = riscv_iommu_get_pages(
  138. iommu, max(queue_size, SZ_4K));
  139. queue->phys = __pa(queue->base);
  140. } while (!queue->base && logsz-- > 0);
  141. }
  142. if (!queue->base)
  143. return -ENOMEM;
  144. qb = phys_to_ppn(queue->phys) |
  145. FIELD_PREP(RISCV_IOMMU_QUEUE_LOG2SZ_FIELD, logsz);
  146. /* Update base register and read back to verify hw accepted our write */
  147. riscv_iommu_writeq(iommu, queue->qbr, qb);
  148. rb = riscv_iommu_readq(iommu, queue->qbr);
  149. if (rb != qb) {
  150. dev_err(iommu->dev, "queue #%u allocation failed\n", queue->qid);
  151. return -ENODEV;
  152. }
  153. /* Update actual queue mask */
  154. queue->mask = (2U << logsz) - 1;
  155. dev_dbg(iommu->dev, "queue #%u allocated 2^%u entries",
  156. queue->qid, logsz + 1);
  157. return 0;
  158. }
  159. /* Check interrupt queue status, IPSR */
  160. static irqreturn_t riscv_iommu_queue_ipsr(int irq, void *data)
  161. {
  162. struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
  163. if (riscv_iommu_readl(queue->iommu, RISCV_IOMMU_REG_IPSR) & Q_IPSR(queue))
  164. return IRQ_WAKE_THREAD;
  165. return IRQ_NONE;
  166. }
  167. static int riscv_iommu_queue_vec(struct riscv_iommu_device *iommu, int n)
  168. {
  169. /* Reuse ICVEC.CIV mask for all interrupt vectors mapping. */
  170. return (iommu->icvec >> (n * 4)) & RISCV_IOMMU_ICVEC_CIV;
  171. }
  172. /*
  173. * Enable queue processing in the hardware, register interrupt handler.
  174. *
  175. * @queue - data structure, already allocated with riscv_iommu_queue_alloc()
  176. * @irq_handler - threaded interrupt handler.
  177. */
  178. static int riscv_iommu_queue_enable(struct riscv_iommu_device *iommu,
  179. struct riscv_iommu_queue *queue,
  180. irq_handler_t irq_handler)
  181. {
  182. const unsigned int irq = iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)];
  183. u32 csr;
  184. int rc;
  185. if (queue->iommu)
  186. return -EBUSY;
  187. /* Polling not implemented */
  188. if (!irq)
  189. return -ENODEV;
  190. queue->iommu = iommu;
  191. rc = request_threaded_irq(irq, riscv_iommu_queue_ipsr, irq_handler,
  192. IRQF_ONESHOT | IRQF_SHARED,
  193. dev_name(iommu->dev), queue);
  194. if (rc) {
  195. queue->iommu = NULL;
  196. return rc;
  197. }
  198. /* Empty queue before enabling it */
  199. if (queue->qid == RISCV_IOMMU_INTR_CQ)
  200. riscv_iommu_writel(queue->iommu, Q_TAIL(queue), 0);
  201. else
  202. riscv_iommu_writel(queue->iommu, Q_HEAD(queue), 0);
  203. /*
  204. * Enable queue with interrupts, clear any memory fault if any.
  205. * Wait for the hardware to acknowledge request and activate queue
  206. * processing.
  207. * Note: All CSR bitfields are in the same offsets for all queues.
  208. */
  209. riscv_iommu_writel(iommu, queue->qcr,
  210. RISCV_IOMMU_QUEUE_ENABLE |
  211. RISCV_IOMMU_QUEUE_INTR_ENABLE |
  212. RISCV_IOMMU_QUEUE_MEM_FAULT);
  213. riscv_iommu_readl_timeout(iommu, queue->qcr,
  214. csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
  215. 10, RISCV_IOMMU_QCSR_TIMEOUT);
  216. if (RISCV_IOMMU_QUEUE_ACTIVE != (csr & (RISCV_IOMMU_QUEUE_ACTIVE |
  217. RISCV_IOMMU_QUEUE_BUSY |
  218. RISCV_IOMMU_QUEUE_MEM_FAULT))) {
  219. /* Best effort to stop and disable failing hardware queue. */
  220. riscv_iommu_writel(iommu, queue->qcr, 0);
  221. free_irq(irq, queue);
  222. queue->iommu = NULL;
  223. dev_err(iommu->dev, "queue #%u failed to start\n", queue->qid);
  224. return -EBUSY;
  225. }
  226. /* Clear any pending interrupt flag. */
  227. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
  228. return 0;
  229. }
  230. /*
  231. * Disable queue. Wait for the hardware to acknowledge request and
  232. * stop processing enqueued requests. Report errors but continue.
  233. */
  234. static void riscv_iommu_queue_disable(struct riscv_iommu_queue *queue)
  235. {
  236. struct riscv_iommu_device *iommu = queue->iommu;
  237. u32 csr;
  238. if (!iommu)
  239. return;
  240. free_irq(iommu->irqs[riscv_iommu_queue_vec(iommu, queue->qid)], queue);
  241. riscv_iommu_writel(iommu, queue->qcr, 0);
  242. riscv_iommu_readl_timeout(iommu, queue->qcr,
  243. csr, !(csr & RISCV_IOMMU_QUEUE_BUSY),
  244. 10, RISCV_IOMMU_QCSR_TIMEOUT);
  245. if (csr & (RISCV_IOMMU_QUEUE_ACTIVE | RISCV_IOMMU_QUEUE_BUSY))
  246. dev_err(iommu->dev, "fail to disable hardware queue #%u, csr 0x%x\n",
  247. queue->qid, csr);
  248. queue->iommu = NULL;
  249. }
  250. /*
  251. * Returns number of available valid queue entries and the first item index.
  252. * Update shadow producer index if necessary.
  253. */
  254. static int riscv_iommu_queue_consume(struct riscv_iommu_queue *queue,
  255. unsigned int *index)
  256. {
  257. unsigned int head = atomic_read(&queue->head);
  258. unsigned int tail = atomic_read(&queue->tail);
  259. unsigned int last = Q_ITEM(queue, tail);
  260. int available = (int)(tail - head);
  261. *index = head;
  262. if (available > 0)
  263. return available;
  264. /* read hardware producer index, check reserved register bits are not set. */
  265. if (riscv_iommu_readl_timeout(queue->iommu, Q_TAIL(queue),
  266. tail, (tail & ~queue->mask) == 0,
  267. 0, RISCV_IOMMU_QUEUE_TIMEOUT)) {
  268. dev_err_once(queue->iommu->dev,
  269. "Hardware error: queue access timeout\n");
  270. return 0;
  271. }
  272. if (tail == last)
  273. return 0;
  274. /* update shadow producer index */
  275. return (int)(atomic_add_return((tail - last) & queue->mask, &queue->tail) - head);
  276. }
  277. /*
  278. * Release processed queue entries, should match riscv_iommu_queue_consume() calls.
  279. */
  280. static void riscv_iommu_queue_release(struct riscv_iommu_queue *queue, int count)
  281. {
  282. const unsigned int head = atomic_add_return(count, &queue->head);
  283. riscv_iommu_writel(queue->iommu, Q_HEAD(queue), Q_ITEM(queue, head));
  284. }
  285. /* Return actual consumer index based on hardware reported queue head index. */
  286. static unsigned int riscv_iommu_queue_cons(struct riscv_iommu_queue *queue)
  287. {
  288. const unsigned int cons = atomic_read(&queue->head);
  289. const unsigned int last = Q_ITEM(queue, cons);
  290. unsigned int head;
  291. if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
  292. !(head & ~queue->mask),
  293. 0, RISCV_IOMMU_QUEUE_TIMEOUT))
  294. return cons;
  295. return cons + ((head - last) & queue->mask);
  296. }
  297. /* Wait for submitted item to be processed. */
  298. static int riscv_iommu_queue_wait(struct riscv_iommu_queue *queue,
  299. unsigned int index,
  300. unsigned int timeout_us)
  301. {
  302. unsigned int cons = atomic_read(&queue->head);
  303. /* Already processed by the consumer */
  304. if ((int)(cons - index) > 0)
  305. return 0;
  306. /* Monitor consumer index */
  307. return readx_poll_timeout(riscv_iommu_queue_cons, queue, cons,
  308. (int)(cons - index) > 0, 0, timeout_us);
  309. }
  310. /* Enqueue an entry and wait to be processed if timeout_us > 0
  311. *
  312. * Error handling for IOMMU hardware not responding in reasonable time
  313. * will be added as separate patch series along with other RAS features.
  314. * For now, only report hardware failure and continue.
  315. */
  316. static unsigned int riscv_iommu_queue_send(struct riscv_iommu_queue *queue,
  317. void *entry, size_t entry_size)
  318. {
  319. unsigned int prod;
  320. unsigned int head;
  321. unsigned int tail;
  322. unsigned long flags;
  323. /* Do not preempt submission flow. */
  324. local_irq_save(flags);
  325. /* 1. Allocate some space in the queue */
  326. prod = atomic_inc_return(&queue->prod) - 1;
  327. head = atomic_read(&queue->head);
  328. /* 2. Wait for space availability. */
  329. if ((prod - head) > queue->mask) {
  330. if (readx_poll_timeout(atomic_read, &queue->head,
  331. head, (prod - head) < queue->mask,
  332. 0, RISCV_IOMMU_QUEUE_TIMEOUT))
  333. goto err_busy;
  334. } else if ((prod - head) == queue->mask) {
  335. const unsigned int last = Q_ITEM(queue, head);
  336. if (riscv_iommu_readl_timeout(queue->iommu, Q_HEAD(queue), head,
  337. !(head & ~queue->mask) && head != last,
  338. 0, RISCV_IOMMU_QUEUE_TIMEOUT))
  339. goto err_busy;
  340. atomic_add((head - last) & queue->mask, &queue->head);
  341. }
  342. /* 3. Store entry in the ring buffer */
  343. memcpy(queue->base + Q_ITEM(queue, prod) * entry_size, entry, entry_size);
  344. /* 4. Wait for all previous entries to be ready */
  345. if (readx_poll_timeout(atomic_read, &queue->tail, tail, prod == tail,
  346. 0, RISCV_IOMMU_QUEUE_TIMEOUT))
  347. goto err_busy;
  348. /*
  349. * 5. Make sure the ring buffer update (whether in normal or I/O memory) is
  350. * completed and visible before signaling the tail doorbell to fetch
  351. * the next command. 'fence ow, ow'
  352. */
  353. dma_wmb();
  354. riscv_iommu_writel(queue->iommu, Q_TAIL(queue), Q_ITEM(queue, prod + 1));
  355. /*
  356. * 6. Make sure the doorbell write to the device has finished before updating
  357. * the shadow tail index in normal memory. 'fence o, w'
  358. */
  359. mmiowb();
  360. atomic_inc(&queue->tail);
  361. /* 7. Complete submission and restore local interrupts */
  362. local_irq_restore(flags);
  363. return prod;
  364. err_busy:
  365. local_irq_restore(flags);
  366. dev_err_once(queue->iommu->dev, "Hardware error: command enqueue failed\n");
  367. return prod;
  368. }
  369. /*
  370. * IOMMU Command queue chapter 3.1
  371. */
  372. /* Command queue interrupt handler thread function */
  373. static irqreturn_t riscv_iommu_cmdq_process(int irq, void *data)
  374. {
  375. const struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
  376. unsigned int ctrl;
  377. /* Clear MF/CQ errors, complete error recovery to be implemented. */
  378. ctrl = riscv_iommu_readl(queue->iommu, queue->qcr);
  379. if (ctrl & (RISCV_IOMMU_CQCSR_CQMF | RISCV_IOMMU_CQCSR_CMD_TO |
  380. RISCV_IOMMU_CQCSR_CMD_ILL | RISCV_IOMMU_CQCSR_FENCE_W_IP)) {
  381. riscv_iommu_writel(queue->iommu, queue->qcr, ctrl);
  382. dev_warn(queue->iommu->dev,
  383. "Queue #%u error; fault:%d timeout:%d illegal:%d fence_w_ip:%d\n",
  384. queue->qid,
  385. !!(ctrl & RISCV_IOMMU_CQCSR_CQMF),
  386. !!(ctrl & RISCV_IOMMU_CQCSR_CMD_TO),
  387. !!(ctrl & RISCV_IOMMU_CQCSR_CMD_ILL),
  388. !!(ctrl & RISCV_IOMMU_CQCSR_FENCE_W_IP));
  389. }
  390. /* Placeholder for command queue interrupt notifiers */
  391. /* Clear command interrupt pending. */
  392. riscv_iommu_writel(queue->iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
  393. return IRQ_HANDLED;
  394. }
  395. /* Send command to the IOMMU command queue */
  396. static void riscv_iommu_cmd_send(struct riscv_iommu_device *iommu,
  397. struct riscv_iommu_command *cmd)
  398. {
  399. riscv_iommu_queue_send(&iommu->cmdq, cmd, sizeof(*cmd));
  400. }
  401. /* Send IOFENCE.C command and wait for all scheduled commands to complete. */
  402. static void riscv_iommu_cmd_sync(struct riscv_iommu_device *iommu,
  403. unsigned int timeout_us)
  404. {
  405. struct riscv_iommu_command cmd;
  406. unsigned int prod;
  407. riscv_iommu_cmd_iofence(&cmd);
  408. prod = riscv_iommu_queue_send(&iommu->cmdq, &cmd, sizeof(cmd));
  409. if (!timeout_us)
  410. return;
  411. if (riscv_iommu_queue_wait(&iommu->cmdq, prod, timeout_us))
  412. dev_err_once(iommu->dev,
  413. "Hardware error: command execution timeout\n");
  414. }
  415. /*
  416. * IOMMU Fault/Event queue chapter 3.2
  417. */
  418. static void riscv_iommu_fault(struct riscv_iommu_device *iommu,
  419. struct riscv_iommu_fq_record *event)
  420. {
  421. unsigned int err = FIELD_GET(RISCV_IOMMU_FQ_HDR_CAUSE, event->hdr);
  422. unsigned int devid = FIELD_GET(RISCV_IOMMU_FQ_HDR_DID, event->hdr);
  423. /* Placeholder for future fault handling implementation, report only. */
  424. if (err)
  425. dev_warn_ratelimited(iommu->dev,
  426. "Fault %d devid: 0x%x iotval: %llx iotval2: %llx\n",
  427. err, devid, event->iotval, event->iotval2);
  428. }
  429. /* Fault queue interrupt handler thread function */
  430. static irqreturn_t riscv_iommu_fltq_process(int irq, void *data)
  431. {
  432. struct riscv_iommu_queue *queue = (struct riscv_iommu_queue *)data;
  433. struct riscv_iommu_device *iommu = queue->iommu;
  434. struct riscv_iommu_fq_record *events;
  435. unsigned int ctrl, idx;
  436. int cnt, len;
  437. events = (struct riscv_iommu_fq_record *)queue->base;
  438. /* Clear fault interrupt pending and process all received fault events. */
  439. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_IPSR, Q_IPSR(queue));
  440. do {
  441. cnt = riscv_iommu_queue_consume(queue, &idx);
  442. for (len = 0; len < cnt; idx++, len++)
  443. riscv_iommu_fault(iommu, &events[Q_ITEM(queue, idx)]);
  444. riscv_iommu_queue_release(queue, cnt);
  445. } while (cnt > 0);
  446. /* Clear MF/OF errors, complete error recovery to be implemented. */
  447. ctrl = riscv_iommu_readl(iommu, queue->qcr);
  448. if (ctrl & (RISCV_IOMMU_FQCSR_FQMF | RISCV_IOMMU_FQCSR_FQOF)) {
  449. riscv_iommu_writel(iommu, queue->qcr, ctrl);
  450. dev_warn(iommu->dev,
  451. "Queue #%u error; memory fault:%d overflow:%d\n",
  452. queue->qid,
  453. !!(ctrl & RISCV_IOMMU_FQCSR_FQMF),
  454. !!(ctrl & RISCV_IOMMU_FQCSR_FQOF));
  455. }
  456. return IRQ_HANDLED;
  457. }
  458. /* Lookup and initialize device context info structure. */
  459. static struct riscv_iommu_dc *riscv_iommu_get_dc(struct riscv_iommu_device *iommu,
  460. unsigned int devid)
  461. {
  462. const bool base_format = !(iommu->caps & RISCV_IOMMU_CAPABILITIES_MSI_FLAT);
  463. unsigned int depth;
  464. unsigned long ddt, old, new;
  465. void *ptr;
  466. u8 ddi_bits[3] = { 0 };
  467. u64 *ddtp = NULL;
  468. /* Make sure the mode is valid */
  469. if (iommu->ddt_mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL ||
  470. iommu->ddt_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_3LVL)
  471. return NULL;
  472. /*
  473. * Device id partitioning for base format:
  474. * DDI[0]: bits 0 - 6 (1st level) (7 bits)
  475. * DDI[1]: bits 7 - 15 (2nd level) (9 bits)
  476. * DDI[2]: bits 16 - 23 (3rd level) (8 bits)
  477. *
  478. * For extended format:
  479. * DDI[0]: bits 0 - 5 (1st level) (6 bits)
  480. * DDI[1]: bits 6 - 14 (2nd level) (9 bits)
  481. * DDI[2]: bits 15 - 23 (3rd level) (9 bits)
  482. */
  483. if (base_format) {
  484. ddi_bits[0] = 7;
  485. ddi_bits[1] = 7 + 9;
  486. ddi_bits[2] = 7 + 9 + 8;
  487. } else {
  488. ddi_bits[0] = 6;
  489. ddi_bits[1] = 6 + 9;
  490. ddi_bits[2] = 6 + 9 + 9;
  491. }
  492. /* Make sure device id is within range */
  493. depth = iommu->ddt_mode - RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL;
  494. if (devid >= (1 << ddi_bits[depth]))
  495. return NULL;
  496. /* Get to the level of the non-leaf node that holds the device context */
  497. for (ddtp = iommu->ddt_root; depth-- > 0;) {
  498. const int split = ddi_bits[depth];
  499. /*
  500. * Each non-leaf node is 64bits wide and on each level
  501. * nodes are indexed by DDI[depth].
  502. */
  503. ddtp += (devid >> split) & 0x1FF;
  504. /*
  505. * Check if this node has been populated and if not
  506. * allocate a new level and populate it.
  507. */
  508. do {
  509. ddt = READ_ONCE(*(unsigned long *)ddtp);
  510. if (ddt & RISCV_IOMMU_DDTE_V) {
  511. ddtp = __va(ppn_to_phys(ddt));
  512. break;
  513. }
  514. ptr = riscv_iommu_get_pages(iommu, SZ_4K);
  515. if (!ptr)
  516. return NULL;
  517. new = phys_to_ppn(__pa(ptr)) | RISCV_IOMMU_DDTE_V;
  518. old = cmpxchg_relaxed((unsigned long *)ddtp, ddt, new);
  519. if (old == ddt) {
  520. ddtp = (u64 *)ptr;
  521. break;
  522. }
  523. /* Race setting DDT detected, re-read and retry. */
  524. riscv_iommu_free_pages(iommu, ptr);
  525. } while (1);
  526. }
  527. /*
  528. * Grab the node that matches DDI[depth], note that when using base
  529. * format the device context is 4 * 64bits, and the extended format
  530. * is 8 * 64bits, hence the (3 - base_format) below.
  531. */
  532. ddtp += (devid & ((64 << base_format) - 1)) << (3 - base_format);
  533. return (struct riscv_iommu_dc *)ddtp;
  534. }
  535. /*
  536. * This is best effort IOMMU translation shutdown flow.
  537. * Disable IOMMU without waiting for hardware response.
  538. */
  539. void riscv_iommu_disable(struct riscv_iommu_device *iommu)
  540. {
  541. riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP,
  542. FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE,
  543. RISCV_IOMMU_DDTP_IOMMU_MODE_BARE));
  544. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_CQCSR, 0);
  545. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FQCSR, 0);
  546. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_PQCSR, 0);
  547. }
  548. #define riscv_iommu_read_ddtp(iommu) ({ \
  549. u64 ddtp; \
  550. riscv_iommu_readq_timeout((iommu), RISCV_IOMMU_REG_DDTP, ddtp, \
  551. !(ddtp & RISCV_IOMMU_DDTP_BUSY), 10, \
  552. RISCV_IOMMU_DDTP_TIMEOUT); \
  553. ddtp; })
  554. static int riscv_iommu_iodir_alloc(struct riscv_iommu_device *iommu)
  555. {
  556. u64 ddtp;
  557. unsigned int mode;
  558. ddtp = riscv_iommu_read_ddtp(iommu);
  559. if (ddtp & RISCV_IOMMU_DDTP_BUSY)
  560. return -EBUSY;
  561. /*
  562. * It is optional for the hardware to report a fixed address for device
  563. * directory root page when DDT.MODE is OFF or BARE.
  564. */
  565. mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp);
  566. if (mode == RISCV_IOMMU_DDTP_IOMMU_MODE_BARE ||
  567. mode == RISCV_IOMMU_DDTP_IOMMU_MODE_OFF) {
  568. /* Use WARL to discover hardware fixed DDT PPN */
  569. riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP,
  570. FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, mode));
  571. ddtp = riscv_iommu_read_ddtp(iommu);
  572. if (ddtp & RISCV_IOMMU_DDTP_BUSY)
  573. return -EBUSY;
  574. iommu->ddt_phys = ppn_to_phys(ddtp);
  575. if (iommu->ddt_phys)
  576. iommu->ddt_root = devm_ioremap(iommu->dev,
  577. iommu->ddt_phys, PAGE_SIZE);
  578. if (iommu->ddt_root)
  579. memset(iommu->ddt_root, 0, PAGE_SIZE);
  580. }
  581. if (!iommu->ddt_root) {
  582. iommu->ddt_root = riscv_iommu_get_pages(iommu, SZ_4K);
  583. iommu->ddt_phys = __pa(iommu->ddt_root);
  584. }
  585. if (!iommu->ddt_root)
  586. return -ENOMEM;
  587. return 0;
  588. }
  589. /*
  590. * Discover supported DDT modes starting from requested value,
  591. * configure DDTP register with accepted mode and root DDT address.
  592. * Accepted iommu->ddt_mode is updated on success.
  593. */
  594. static int riscv_iommu_iodir_set_mode(struct riscv_iommu_device *iommu,
  595. unsigned int ddtp_mode)
  596. {
  597. struct device *dev = iommu->dev;
  598. u64 ddtp, rq_ddtp;
  599. unsigned int mode, rq_mode = ddtp_mode;
  600. struct riscv_iommu_command cmd;
  601. ddtp = riscv_iommu_read_ddtp(iommu);
  602. if (ddtp & RISCV_IOMMU_DDTP_BUSY)
  603. return -EBUSY;
  604. /* Disallow state transition from xLVL to xLVL. */
  605. mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp);
  606. if (mode != RISCV_IOMMU_DDTP_IOMMU_MODE_BARE &&
  607. mode != RISCV_IOMMU_DDTP_IOMMU_MODE_OFF &&
  608. rq_mode != RISCV_IOMMU_DDTP_IOMMU_MODE_BARE &&
  609. rq_mode != RISCV_IOMMU_DDTP_IOMMU_MODE_OFF)
  610. return -EINVAL;
  611. do {
  612. rq_ddtp = FIELD_PREP(RISCV_IOMMU_DDTP_IOMMU_MODE, rq_mode);
  613. if (rq_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_BARE)
  614. rq_ddtp |= phys_to_ppn(iommu->ddt_phys);
  615. riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_DDTP, rq_ddtp);
  616. ddtp = riscv_iommu_read_ddtp(iommu);
  617. if (ddtp & RISCV_IOMMU_DDTP_BUSY) {
  618. dev_err(dev, "timeout when setting ddtp (ddt mode: %u, read: %llx)\n",
  619. rq_mode, ddtp);
  620. return -EBUSY;
  621. }
  622. /* Verify IOMMU hardware accepts new DDTP config. */
  623. mode = FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp);
  624. if (rq_mode == mode)
  625. break;
  626. /* Hardware mandatory DDTP mode has not been accepted. */
  627. if (rq_mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL && rq_ddtp != ddtp) {
  628. dev_err(dev, "DDTP update failed hw: %llx vs %llx\n",
  629. ddtp, rq_ddtp);
  630. return -EINVAL;
  631. }
  632. /*
  633. * Mode field is WARL, an IOMMU may support a subset of
  634. * directory table levels in which case if we tried to set
  635. * an unsupported number of levels we'll readback either
  636. * a valid xLVL or off/bare. If we got off/bare, try again
  637. * with a smaller xLVL.
  638. */
  639. if (mode < RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL &&
  640. rq_mode > RISCV_IOMMU_DDTP_IOMMU_MODE_1LVL) {
  641. dev_dbg(dev, "DDTP hw mode %u vs %u\n", mode, rq_mode);
  642. rq_mode--;
  643. continue;
  644. }
  645. /*
  646. * We tried all supported modes and IOMMU hardware failed to
  647. * accept new settings, something went very wrong since off/bare
  648. * and at least one xLVL must be supported.
  649. */
  650. dev_err(dev, "DDTP hw mode %u, failed to set %u\n",
  651. mode, ddtp_mode);
  652. return -EINVAL;
  653. } while (1);
  654. iommu->ddt_mode = mode;
  655. if (mode != ddtp_mode)
  656. dev_dbg(dev, "DDTP hw mode %u, requested %u\n", mode, ddtp_mode);
  657. /* Invalidate device context cache */
  658. riscv_iommu_cmd_iodir_inval_ddt(&cmd);
  659. riscv_iommu_cmd_send(iommu, &cmd);
  660. /* Invalidate address translation cache */
  661. riscv_iommu_cmd_inval_vma(&cmd);
  662. riscv_iommu_cmd_send(iommu, &cmd);
  663. /* IOFENCE.C */
  664. riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
  665. return 0;
  666. }
  667. /* This struct contains protection domain specific IOMMU driver data. */
  668. struct riscv_iommu_domain {
  669. struct iommu_domain domain;
  670. struct list_head bonds;
  671. spinlock_t lock; /* protect bonds list updates. */
  672. int pscid;
  673. bool amo_enabled;
  674. int numa_node;
  675. unsigned int pgd_mode;
  676. unsigned long *pgd_root;
  677. };
  678. #define iommu_domain_to_riscv(iommu_domain) \
  679. container_of(iommu_domain, struct riscv_iommu_domain, domain)
  680. /* Private IOMMU data for managed devices, dev_iommu_priv_* */
  681. struct riscv_iommu_info {
  682. struct riscv_iommu_domain *domain;
  683. };
  684. /*
  685. * Linkage between an iommu_domain and attached devices.
  686. *
  687. * Protection domain requiring IOATC and DevATC translation cache invalidations,
  688. * should be linked to attached devices using a riscv_iommu_bond structure.
  689. * Devices should be linked to the domain before first use and unlinked after
  690. * the translations from the referenced protection domain can no longer be used.
  691. * Blocking and identity domains are not tracked here, as the IOMMU hardware
  692. * does not cache negative and/or identity (BARE mode) translations, and DevATC
  693. * is disabled for those protection domains.
  694. *
  695. * The device pointer and IOMMU data remain stable in the bond struct after
  696. * _probe_device() where it's attached to the managed IOMMU, up to the
  697. * completion of the _release_device() call. The release of the bond structure
  698. * is synchronized with the device release.
  699. */
  700. struct riscv_iommu_bond {
  701. struct list_head list;
  702. struct rcu_head rcu;
  703. struct device *dev;
  704. };
  705. static int riscv_iommu_bond_link(struct riscv_iommu_domain *domain,
  706. struct device *dev)
  707. {
  708. struct riscv_iommu_device *iommu = dev_to_iommu(dev);
  709. struct riscv_iommu_bond *bond;
  710. struct list_head *bonds;
  711. bond = kzalloc_obj(*bond);
  712. if (!bond)
  713. return -ENOMEM;
  714. bond->dev = dev;
  715. /*
  716. * List of devices attached to the domain is arranged based on
  717. * managed IOMMU device.
  718. */
  719. spin_lock(&domain->lock);
  720. list_for_each(bonds, &domain->bonds)
  721. if (dev_to_iommu(list_entry(bonds, struct riscv_iommu_bond, list)->dev) == iommu)
  722. break;
  723. list_add_rcu(&bond->list, bonds);
  724. spin_unlock(&domain->lock);
  725. /* Synchronize with riscv_iommu_iotlb_inval() sequence. See comment below. */
  726. smp_mb();
  727. return 0;
  728. }
  729. static void riscv_iommu_bond_unlink(struct riscv_iommu_domain *domain,
  730. struct device *dev)
  731. {
  732. struct riscv_iommu_device *iommu = dev_to_iommu(dev);
  733. struct riscv_iommu_bond *bond, *found = NULL;
  734. struct riscv_iommu_command cmd;
  735. int count = 0;
  736. if (!domain)
  737. return;
  738. spin_lock(&domain->lock);
  739. list_for_each_entry(bond, &domain->bonds, list) {
  740. if (found && count)
  741. break;
  742. else if (bond->dev == dev)
  743. found = bond;
  744. else if (dev_to_iommu(bond->dev) == iommu)
  745. count++;
  746. }
  747. if (found)
  748. list_del_rcu(&found->list);
  749. spin_unlock(&domain->lock);
  750. kfree_rcu(found, rcu);
  751. /*
  752. * If this was the last bond between this domain and the IOMMU
  753. * invalidate all cached entries for domain's PSCID.
  754. */
  755. if (!count) {
  756. riscv_iommu_cmd_inval_vma(&cmd);
  757. riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
  758. riscv_iommu_cmd_send(iommu, &cmd);
  759. riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
  760. }
  761. }
  762. /*
  763. * Send IOTLB.INVAL for whole address space for ranges larger than 2MB.
  764. * This limit will be replaced with range invalidations, if supported by
  765. * the hardware, when RISC-V IOMMU architecture specification update for
  766. * range invalidations update will be available.
  767. */
  768. #define RISCV_IOMMU_IOTLB_INVAL_LIMIT (2 << 20)
  769. static void riscv_iommu_iotlb_inval(struct riscv_iommu_domain *domain,
  770. unsigned long start, unsigned long end)
  771. {
  772. struct riscv_iommu_bond *bond;
  773. struct riscv_iommu_device *iommu, *prev;
  774. struct riscv_iommu_command cmd;
  775. unsigned long len = end - start + 1;
  776. unsigned long iova;
  777. /*
  778. * For each IOMMU linked with this protection domain (via bonds->dev),
  779. * an IOTLB invaliation command will be submitted and executed.
  780. *
  781. * Possbile race with domain attach flow is handled by sequencing
  782. * bond creation - riscv_iommu_bond_link(), and device directory
  783. * update - riscv_iommu_iodir_update().
  784. *
  785. * PTE Update / IOTLB Inval Device attach & directory update
  786. * -------------------------- --------------------------
  787. * update page table entries add dev to the bond list
  788. * FENCE RW,RW FENCE RW,RW
  789. * For all IOMMUs: (can be empty) Update FSC/PSCID
  790. * FENCE IOW,IOW FENCE IOW,IOW
  791. * IOTLB.INVAL IODIR.INVAL
  792. * IOFENCE.C
  793. *
  794. * If bond list is not updated with new device, directory context will
  795. * be configured with already valid page table content. If an IOMMU is
  796. * linked to the protection domain it will receive invalidation
  797. * requests for updated page table entries.
  798. */
  799. smp_mb();
  800. rcu_read_lock();
  801. prev = NULL;
  802. list_for_each_entry_rcu(bond, &domain->bonds, list) {
  803. iommu = dev_to_iommu(bond->dev);
  804. /*
  805. * IOTLB invalidation request can be safely omitted if already sent
  806. * to the IOMMU for the same PSCID, and with domain->bonds list
  807. * arranged based on the device's IOMMU, it's sufficient to check
  808. * last device the invalidation was sent to.
  809. */
  810. if (iommu == prev)
  811. continue;
  812. riscv_iommu_cmd_inval_vma(&cmd);
  813. riscv_iommu_cmd_inval_set_pscid(&cmd, domain->pscid);
  814. if (len && len < RISCV_IOMMU_IOTLB_INVAL_LIMIT) {
  815. for (iova = start; iova < end; iova += PAGE_SIZE) {
  816. riscv_iommu_cmd_inval_set_addr(&cmd, iova);
  817. riscv_iommu_cmd_send(iommu, &cmd);
  818. }
  819. } else {
  820. riscv_iommu_cmd_send(iommu, &cmd);
  821. }
  822. prev = iommu;
  823. }
  824. prev = NULL;
  825. list_for_each_entry_rcu(bond, &domain->bonds, list) {
  826. iommu = dev_to_iommu(bond->dev);
  827. if (iommu == prev)
  828. continue;
  829. riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
  830. prev = iommu;
  831. }
  832. rcu_read_unlock();
  833. }
  834. #define RISCV_IOMMU_FSC_BARE 0
  835. /*
  836. * Update IODIR for the device.
  837. *
  838. * During the execution of riscv_iommu_probe_device(), IODIR entries are
  839. * allocated for the device's identifiers. Device context invalidation
  840. * becomes necessary only if one of the updated entries was previously
  841. * marked as valid, given that invalid device context entries are not
  842. * cached by the IOMMU hardware.
  843. * In this implementation, updating a valid device context while the
  844. * device is not quiesced might be disruptive, potentially causing
  845. * interim translation faults.
  846. */
  847. static void riscv_iommu_iodir_update(struct riscv_iommu_device *iommu,
  848. struct device *dev, u64 fsc, u64 ta)
  849. {
  850. struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
  851. struct riscv_iommu_dc *dc;
  852. struct riscv_iommu_command cmd;
  853. bool sync_required = false;
  854. u64 tc;
  855. int i;
  856. for (i = 0; i < fwspec->num_ids; i++) {
  857. dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);
  858. tc = READ_ONCE(dc->tc);
  859. if (!(tc & RISCV_IOMMU_DC_TC_V))
  860. continue;
  861. WRITE_ONCE(dc->tc, tc & ~RISCV_IOMMU_DC_TC_V);
  862. /* Invalidate device context cached values */
  863. riscv_iommu_cmd_iodir_inval_ddt(&cmd);
  864. riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);
  865. riscv_iommu_cmd_send(iommu, &cmd);
  866. sync_required = true;
  867. }
  868. if (sync_required)
  869. riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
  870. /*
  871. * For device context with DC_TC_PDTV = 0, translation attributes valid bit
  872. * is stored as DC_TC_V bit (both sharing the same location at BIT(0)).
  873. */
  874. for (i = 0; i < fwspec->num_ids; i++) {
  875. dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);
  876. tc = READ_ONCE(dc->tc);
  877. tc |= ta & RISCV_IOMMU_DC_TC_V;
  878. WRITE_ONCE(dc->fsc, fsc);
  879. WRITE_ONCE(dc->ta, ta & RISCV_IOMMU_PC_TA_PSCID);
  880. /* Update device context, write TC.V as the last step. */
  881. dma_wmb();
  882. WRITE_ONCE(dc->tc, tc);
  883. /* Invalidate device context after update */
  884. riscv_iommu_cmd_iodir_inval_ddt(&cmd);
  885. riscv_iommu_cmd_iodir_set_did(&cmd, fwspec->ids[i]);
  886. riscv_iommu_cmd_send(iommu, &cmd);
  887. }
  888. riscv_iommu_cmd_sync(iommu, RISCV_IOMMU_IOTINVAL_TIMEOUT);
  889. }
  890. /*
  891. * IOVA page translation tree management.
  892. */
  893. static void riscv_iommu_iotlb_flush_all(struct iommu_domain *iommu_domain)
  894. {
  895. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  896. riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
  897. }
  898. static void riscv_iommu_iotlb_sync(struct iommu_domain *iommu_domain,
  899. struct iommu_iotlb_gather *gather)
  900. {
  901. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  902. riscv_iommu_iotlb_inval(domain, gather->start, gather->end);
  903. }
  904. #define PT_SHIFT (PAGE_SHIFT - ilog2(sizeof(pte_t)))
  905. #define _io_pte_present(pte) ((pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE))
  906. #define _io_pte_leaf(pte) ((pte) & _PAGE_LEAF)
  907. #define _io_pte_none(pte) ((pte) == 0)
  908. #define _io_pte_entry(pn, prot) ((_PAGE_PFN_MASK & ((pn) << _PAGE_PFN_SHIFT)) | (prot))
  909. static void riscv_iommu_pte_free(struct riscv_iommu_domain *domain,
  910. unsigned long pte,
  911. struct iommu_pages_list *freelist)
  912. {
  913. unsigned long *ptr;
  914. int i;
  915. if (!_io_pte_present(pte) || _io_pte_leaf(pte))
  916. return;
  917. ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
  918. /* Recursively free all sub page table pages */
  919. for (i = 0; i < PTRS_PER_PTE; i++) {
  920. pte = READ_ONCE(ptr[i]);
  921. if (!_io_pte_none(pte) && cmpxchg_relaxed(ptr + i, pte, 0) == pte)
  922. riscv_iommu_pte_free(domain, pte, freelist);
  923. }
  924. if (freelist)
  925. iommu_pages_list_add(freelist, ptr);
  926. else
  927. iommu_free_pages(ptr);
  928. }
  929. static unsigned long *riscv_iommu_pte_alloc(struct riscv_iommu_domain *domain,
  930. unsigned long iova, size_t pgsize,
  931. gfp_t gfp)
  932. {
  933. unsigned long *ptr = domain->pgd_root;
  934. unsigned long pte, old;
  935. int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
  936. void *addr;
  937. do {
  938. const int shift = PAGE_SHIFT + PT_SHIFT * level;
  939. ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
  940. /*
  941. * Note: returned entry might be a non-leaf if there was
  942. * existing mapping with smaller granularity. Up to the caller
  943. * to replace and invalidate.
  944. */
  945. if (((size_t)1 << shift) == pgsize)
  946. return ptr;
  947. pte_retry:
  948. pte = READ_ONCE(*ptr);
  949. /*
  950. * This is very likely incorrect as we should not be adding
  951. * new mapping with smaller granularity on top
  952. * of existing 2M/1G mapping. Fail.
  953. */
  954. if (_io_pte_present(pte) && _io_pte_leaf(pte))
  955. return NULL;
  956. /*
  957. * Non-leaf entry is missing, allocate and try to add to the
  958. * page table. This might race with other mappings, retry.
  959. */
  960. if (_io_pte_none(pte)) {
  961. addr = iommu_alloc_pages_node_sz(domain->numa_node, gfp,
  962. SZ_4K);
  963. if (!addr)
  964. return NULL;
  965. old = pte;
  966. pte = _io_pte_entry(virt_to_pfn(addr), _PAGE_TABLE);
  967. if (cmpxchg_relaxed(ptr, old, pte) != old) {
  968. iommu_free_pages(addr);
  969. goto pte_retry;
  970. }
  971. }
  972. ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
  973. } while (level-- > 0);
  974. return NULL;
  975. }
  976. static unsigned long *riscv_iommu_pte_fetch(struct riscv_iommu_domain *domain,
  977. unsigned long iova, size_t *pte_pgsize)
  978. {
  979. unsigned long *ptr = domain->pgd_root;
  980. unsigned long pte;
  981. int level = domain->pgd_mode - RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39 + 2;
  982. do {
  983. const int shift = PAGE_SHIFT + PT_SHIFT * level;
  984. ptr += ((iova >> shift) & (PTRS_PER_PTE - 1));
  985. pte = READ_ONCE(*ptr);
  986. if (_io_pte_present(pte) && _io_pte_leaf(pte)) {
  987. *pte_pgsize = (size_t)1 << shift;
  988. return ptr;
  989. }
  990. if (_io_pte_none(pte))
  991. return NULL;
  992. ptr = (unsigned long *)pfn_to_virt(__page_val_to_pfn(pte));
  993. } while (level-- > 0);
  994. return NULL;
  995. }
  996. static int riscv_iommu_map_pages(struct iommu_domain *iommu_domain,
  997. unsigned long iova, phys_addr_t phys,
  998. size_t pgsize, size_t pgcount, int prot,
  999. gfp_t gfp, size_t *mapped)
  1000. {
  1001. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  1002. size_t size = 0;
  1003. unsigned long *ptr;
  1004. unsigned long pte, old, pte_prot;
  1005. int rc = 0;
  1006. struct iommu_pages_list freelist = IOMMU_PAGES_LIST_INIT(freelist);
  1007. if (!(prot & IOMMU_WRITE))
  1008. pte_prot = _PAGE_BASE | _PAGE_READ;
  1009. else if (domain->amo_enabled)
  1010. pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE;
  1011. else
  1012. pte_prot = _PAGE_BASE | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY;
  1013. while (pgcount) {
  1014. ptr = riscv_iommu_pte_alloc(domain, iova, pgsize, gfp);
  1015. if (!ptr) {
  1016. rc = -ENOMEM;
  1017. break;
  1018. }
  1019. old = READ_ONCE(*ptr);
  1020. pte = _io_pte_entry(phys_to_pfn(phys), pte_prot);
  1021. if (cmpxchg_relaxed(ptr, old, pte) != old)
  1022. continue;
  1023. riscv_iommu_pte_free(domain, old, &freelist);
  1024. size += pgsize;
  1025. iova += pgsize;
  1026. phys += pgsize;
  1027. --pgcount;
  1028. }
  1029. *mapped = size;
  1030. if (!iommu_pages_list_empty(&freelist)) {
  1031. /*
  1032. * In 1.0 spec version, the smallest scope we can use to
  1033. * invalidate all levels of page table (i.e. leaf and non-leaf)
  1034. * is an invalidate-all-PSCID IOTINVAL.VMA with AV=0.
  1035. * This will be updated with hardware support for
  1036. * capability.NL (non-leaf) IOTINVAL command.
  1037. */
  1038. riscv_iommu_iotlb_inval(domain, 0, ULONG_MAX);
  1039. iommu_put_pages_list(&freelist);
  1040. }
  1041. return rc;
  1042. }
  1043. static size_t riscv_iommu_unmap_pages(struct iommu_domain *iommu_domain,
  1044. unsigned long iova, size_t pgsize,
  1045. size_t pgcount,
  1046. struct iommu_iotlb_gather *gather)
  1047. {
  1048. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  1049. size_t size = pgcount << __ffs(pgsize);
  1050. unsigned long *ptr, old;
  1051. size_t unmapped = 0;
  1052. size_t pte_size;
  1053. while (unmapped < size) {
  1054. ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
  1055. if (!ptr)
  1056. return unmapped;
  1057. /* partial unmap is not allowed, fail. */
  1058. if (iova & (pte_size - 1))
  1059. return unmapped;
  1060. old = READ_ONCE(*ptr);
  1061. if (cmpxchg_relaxed(ptr, old, 0) != old)
  1062. continue;
  1063. iommu_iotlb_gather_add_page(&domain->domain, gather, iova,
  1064. pte_size);
  1065. iova += pte_size;
  1066. unmapped += pte_size;
  1067. }
  1068. return unmapped;
  1069. }
  1070. static phys_addr_t riscv_iommu_iova_to_phys(struct iommu_domain *iommu_domain,
  1071. dma_addr_t iova)
  1072. {
  1073. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  1074. size_t pte_size;
  1075. unsigned long *ptr;
  1076. ptr = riscv_iommu_pte_fetch(domain, iova, &pte_size);
  1077. if (!ptr)
  1078. return 0;
  1079. return pfn_to_phys(__page_val_to_pfn(*ptr)) | (iova & (pte_size - 1));
  1080. }
  1081. static void riscv_iommu_free_paging_domain(struct iommu_domain *iommu_domain)
  1082. {
  1083. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  1084. const unsigned long pfn = virt_to_pfn(domain->pgd_root);
  1085. WARN_ON(!list_empty(&domain->bonds));
  1086. if ((int)domain->pscid > 0)
  1087. ida_free(&riscv_iommu_pscids, domain->pscid);
  1088. riscv_iommu_pte_free(domain, _io_pte_entry(pfn, _PAGE_TABLE), NULL);
  1089. kfree(domain);
  1090. }
  1091. static bool riscv_iommu_pt_supported(struct riscv_iommu_device *iommu, int pgd_mode)
  1092. {
  1093. switch (pgd_mode) {
  1094. case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39:
  1095. return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39;
  1096. case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48:
  1097. return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48;
  1098. case RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57:
  1099. return iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57;
  1100. }
  1101. return false;
  1102. }
  1103. static int riscv_iommu_attach_paging_domain(struct iommu_domain *iommu_domain,
  1104. struct device *dev,
  1105. struct iommu_domain *old)
  1106. {
  1107. struct riscv_iommu_domain *domain = iommu_domain_to_riscv(iommu_domain);
  1108. struct riscv_iommu_device *iommu = dev_to_iommu(dev);
  1109. struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
  1110. u64 fsc, ta;
  1111. if (!riscv_iommu_pt_supported(iommu, domain->pgd_mode))
  1112. return -ENODEV;
  1113. fsc = FIELD_PREP(RISCV_IOMMU_PC_FSC_MODE, domain->pgd_mode) |
  1114. FIELD_PREP(RISCV_IOMMU_PC_FSC_PPN, virt_to_pfn(domain->pgd_root));
  1115. ta = FIELD_PREP(RISCV_IOMMU_PC_TA_PSCID, domain->pscid) |
  1116. RISCV_IOMMU_PC_TA_V;
  1117. if (riscv_iommu_bond_link(domain, dev))
  1118. return -ENOMEM;
  1119. riscv_iommu_iodir_update(iommu, dev, fsc, ta);
  1120. riscv_iommu_bond_unlink(info->domain, dev);
  1121. info->domain = domain;
  1122. return 0;
  1123. }
  1124. static const struct iommu_domain_ops riscv_iommu_paging_domain_ops = {
  1125. .attach_dev = riscv_iommu_attach_paging_domain,
  1126. .free = riscv_iommu_free_paging_domain,
  1127. .map_pages = riscv_iommu_map_pages,
  1128. .unmap_pages = riscv_iommu_unmap_pages,
  1129. .iova_to_phys = riscv_iommu_iova_to_phys,
  1130. .iotlb_sync = riscv_iommu_iotlb_sync,
  1131. .flush_iotlb_all = riscv_iommu_iotlb_flush_all,
  1132. };
  1133. static struct iommu_domain *riscv_iommu_alloc_paging_domain(struct device *dev)
  1134. {
  1135. struct riscv_iommu_domain *domain;
  1136. struct riscv_iommu_device *iommu;
  1137. unsigned int pgd_mode;
  1138. dma_addr_t va_mask;
  1139. int va_bits;
  1140. iommu = dev_to_iommu(dev);
  1141. if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV57) {
  1142. pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV57;
  1143. va_bits = 57;
  1144. } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV48) {
  1145. pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV48;
  1146. va_bits = 48;
  1147. } else if (iommu->caps & RISCV_IOMMU_CAPABILITIES_SV39) {
  1148. pgd_mode = RISCV_IOMMU_DC_FSC_IOSATP_MODE_SV39;
  1149. va_bits = 39;
  1150. } else {
  1151. dev_err(dev, "cannot find supported page table mode\n");
  1152. return ERR_PTR(-ENODEV);
  1153. }
  1154. domain = kzalloc_obj(*domain);
  1155. if (!domain)
  1156. return ERR_PTR(-ENOMEM);
  1157. INIT_LIST_HEAD_RCU(&domain->bonds);
  1158. spin_lock_init(&domain->lock);
  1159. domain->numa_node = dev_to_node(iommu->dev);
  1160. domain->amo_enabled = !!(iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD);
  1161. domain->pgd_mode = pgd_mode;
  1162. domain->pgd_root = iommu_alloc_pages_node_sz(domain->numa_node,
  1163. GFP_KERNEL_ACCOUNT, SZ_4K);
  1164. if (!domain->pgd_root) {
  1165. kfree(domain);
  1166. return ERR_PTR(-ENOMEM);
  1167. }
  1168. domain->pscid = ida_alloc_range(&riscv_iommu_pscids, 1,
  1169. RISCV_IOMMU_MAX_PSCID, GFP_KERNEL);
  1170. if (domain->pscid < 0) {
  1171. iommu_free_pages(domain->pgd_root);
  1172. kfree(domain);
  1173. return ERR_PTR(-ENOMEM);
  1174. }
  1175. /*
  1176. * Note: RISC-V Privilege spec mandates that virtual addresses
  1177. * need to be sign-extended, so if (VA_BITS - 1) is set, all
  1178. * bits >= VA_BITS need to also be set or else we'll get a
  1179. * page fault. However the code that creates the mappings
  1180. * above us (e.g. iommu_dma_alloc_iova()) won't do that for us
  1181. * for now, so we'll end up with invalid virtual addresses
  1182. * to map. As a workaround until we get this sorted out
  1183. * limit the available virtual addresses to VA_BITS - 1.
  1184. */
  1185. va_mask = DMA_BIT_MASK(va_bits - 1);
  1186. domain->domain.geometry.aperture_start = 0;
  1187. domain->domain.geometry.aperture_end = va_mask;
  1188. domain->domain.geometry.force_aperture = true;
  1189. domain->domain.pgsize_bitmap = va_mask & (SZ_4K | SZ_2M | SZ_1G | SZ_512G);
  1190. domain->domain.ops = &riscv_iommu_paging_domain_ops;
  1191. return &domain->domain;
  1192. }
  1193. static int riscv_iommu_attach_blocking_domain(struct iommu_domain *iommu_domain,
  1194. struct device *dev,
  1195. struct iommu_domain *old)
  1196. {
  1197. struct riscv_iommu_device *iommu = dev_to_iommu(dev);
  1198. struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
  1199. /* Make device context invalid, translation requests will fault w/ #258 */
  1200. riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, 0);
  1201. riscv_iommu_bond_unlink(info->domain, dev);
  1202. info->domain = NULL;
  1203. return 0;
  1204. }
  1205. static struct iommu_domain riscv_iommu_blocking_domain = {
  1206. .type = IOMMU_DOMAIN_BLOCKED,
  1207. .ops = &(const struct iommu_domain_ops) {
  1208. .attach_dev = riscv_iommu_attach_blocking_domain,
  1209. }
  1210. };
  1211. static int riscv_iommu_attach_identity_domain(struct iommu_domain *iommu_domain,
  1212. struct device *dev,
  1213. struct iommu_domain *old)
  1214. {
  1215. struct riscv_iommu_device *iommu = dev_to_iommu(dev);
  1216. struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
  1217. riscv_iommu_iodir_update(iommu, dev, RISCV_IOMMU_FSC_BARE, RISCV_IOMMU_PC_TA_V);
  1218. riscv_iommu_bond_unlink(info->domain, dev);
  1219. info->domain = NULL;
  1220. return 0;
  1221. }
  1222. static struct iommu_domain riscv_iommu_identity_domain = {
  1223. .type = IOMMU_DOMAIN_IDENTITY,
  1224. .ops = &(const struct iommu_domain_ops) {
  1225. .attach_dev = riscv_iommu_attach_identity_domain,
  1226. }
  1227. };
  1228. static struct iommu_group *riscv_iommu_device_group(struct device *dev)
  1229. {
  1230. if (dev_is_pci(dev))
  1231. return pci_device_group(dev);
  1232. return generic_device_group(dev);
  1233. }
  1234. static int riscv_iommu_of_xlate(struct device *dev, const struct of_phandle_args *args)
  1235. {
  1236. return iommu_fwspec_add_ids(dev, args->args, 1);
  1237. }
  1238. static struct iommu_device *riscv_iommu_probe_device(struct device *dev)
  1239. {
  1240. struct iommu_fwspec *fwspec = dev_iommu_fwspec_get(dev);
  1241. struct riscv_iommu_device *iommu;
  1242. struct riscv_iommu_info *info;
  1243. struct riscv_iommu_dc *dc;
  1244. u64 tc;
  1245. int i;
  1246. if (!fwspec || !fwspec->iommu_fwnode->dev || !fwspec->num_ids)
  1247. return ERR_PTR(-ENODEV);
  1248. iommu = dev_get_drvdata(fwspec->iommu_fwnode->dev);
  1249. if (!iommu)
  1250. return ERR_PTR(-ENODEV);
  1251. /*
  1252. * IOMMU hardware operating in fail-over BARE mode will provide
  1253. * identity translation for all connected devices anyway...
  1254. */
  1255. if (iommu->ddt_mode <= RISCV_IOMMU_DDTP_IOMMU_MODE_BARE)
  1256. return ERR_PTR(-ENODEV);
  1257. info = kzalloc_obj(*info);
  1258. if (!info)
  1259. return ERR_PTR(-ENOMEM);
  1260. /*
  1261. * Allocate and pre-configure device context entries in
  1262. * the device directory. Do not mark the context valid yet.
  1263. */
  1264. tc = 0;
  1265. if (iommu->caps & RISCV_IOMMU_CAPABILITIES_AMO_HWAD)
  1266. tc |= RISCV_IOMMU_DC_TC_SADE;
  1267. for (i = 0; i < fwspec->num_ids; i++) {
  1268. dc = riscv_iommu_get_dc(iommu, fwspec->ids[i]);
  1269. if (!dc) {
  1270. kfree(info);
  1271. return ERR_PTR(-ENODEV);
  1272. }
  1273. if (READ_ONCE(dc->tc) & RISCV_IOMMU_DC_TC_V)
  1274. dev_warn(dev, "already attached to IOMMU device directory\n");
  1275. WRITE_ONCE(dc->tc, tc);
  1276. }
  1277. dev_iommu_priv_set(dev, info);
  1278. return &iommu->iommu;
  1279. }
  1280. static void riscv_iommu_release_device(struct device *dev)
  1281. {
  1282. struct riscv_iommu_info *info = dev_iommu_priv_get(dev);
  1283. kfree_rcu_mightsleep(info);
  1284. }
  1285. static const struct iommu_ops riscv_iommu_ops = {
  1286. .of_xlate = riscv_iommu_of_xlate,
  1287. .identity_domain = &riscv_iommu_identity_domain,
  1288. .blocked_domain = &riscv_iommu_blocking_domain,
  1289. .release_domain = &riscv_iommu_blocking_domain,
  1290. .domain_alloc_paging = riscv_iommu_alloc_paging_domain,
  1291. .device_group = riscv_iommu_device_group,
  1292. .probe_device = riscv_iommu_probe_device,
  1293. .release_device = riscv_iommu_release_device,
  1294. };
  1295. static int riscv_iommu_init_check(struct riscv_iommu_device *iommu)
  1296. {
  1297. u64 ddtp;
  1298. /*
  1299. * Make sure the IOMMU is switched off or in pass-through mode during
  1300. * regular boot flow and disable translation when we boot into a kexec
  1301. * kernel and the previous kernel left them enabled.
  1302. */
  1303. ddtp = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_DDTP);
  1304. if (ddtp & RISCV_IOMMU_DDTP_BUSY)
  1305. return -EBUSY;
  1306. if (FIELD_GET(RISCV_IOMMU_DDTP_IOMMU_MODE, ddtp) >
  1307. RISCV_IOMMU_DDTP_IOMMU_MODE_BARE) {
  1308. if (!is_kdump_kernel())
  1309. return -EBUSY;
  1310. riscv_iommu_disable(iommu);
  1311. }
  1312. /* Configure accesses to in-memory data structures for CPU-native byte order. */
  1313. if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) !=
  1314. !!(iommu->fctl & RISCV_IOMMU_FCTL_BE)) {
  1315. if (!(iommu->caps & RISCV_IOMMU_CAPABILITIES_END))
  1316. return -EINVAL;
  1317. riscv_iommu_writel(iommu, RISCV_IOMMU_REG_FCTL,
  1318. iommu->fctl ^ RISCV_IOMMU_FCTL_BE);
  1319. iommu->fctl = riscv_iommu_readl(iommu, RISCV_IOMMU_REG_FCTL);
  1320. if (IS_ENABLED(CONFIG_CPU_BIG_ENDIAN) !=
  1321. !!(iommu->fctl & RISCV_IOMMU_FCTL_BE))
  1322. return -EINVAL;
  1323. }
  1324. /*
  1325. * Distribute interrupt vectors, always use first vector for CIV.
  1326. * At least one interrupt is required. Read back and verify.
  1327. */
  1328. if (!iommu->irqs_count)
  1329. return -EINVAL;
  1330. iommu->icvec = FIELD_PREP(RISCV_IOMMU_ICVEC_FIV, 1 % iommu->irqs_count) |
  1331. FIELD_PREP(RISCV_IOMMU_ICVEC_PIV, 2 % iommu->irqs_count) |
  1332. FIELD_PREP(RISCV_IOMMU_ICVEC_PMIV, 3 % iommu->irqs_count);
  1333. riscv_iommu_writeq(iommu, RISCV_IOMMU_REG_ICVEC, iommu->icvec);
  1334. iommu->icvec = riscv_iommu_readq(iommu, RISCV_IOMMU_REG_ICVEC);
  1335. if (max3(FIELD_GET(RISCV_IOMMU_ICVEC_CIV, iommu->icvec),
  1336. FIELD_GET(RISCV_IOMMU_ICVEC_FIV, iommu->icvec),
  1337. max(FIELD_GET(RISCV_IOMMU_ICVEC_PIV, iommu->icvec),
  1338. FIELD_GET(RISCV_IOMMU_ICVEC_PMIV, iommu->icvec))) >= iommu->irqs_count)
  1339. return -EINVAL;
  1340. return 0;
  1341. }
  1342. void riscv_iommu_remove(struct riscv_iommu_device *iommu)
  1343. {
  1344. iommu_device_unregister(&iommu->iommu);
  1345. iommu_device_sysfs_remove(&iommu->iommu);
  1346. riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF);
  1347. riscv_iommu_queue_disable(&iommu->cmdq);
  1348. riscv_iommu_queue_disable(&iommu->fltq);
  1349. }
  1350. int riscv_iommu_init(struct riscv_iommu_device *iommu)
  1351. {
  1352. int rc;
  1353. RISCV_IOMMU_QUEUE_INIT(&iommu->cmdq, CQ);
  1354. RISCV_IOMMU_QUEUE_INIT(&iommu->fltq, FQ);
  1355. rc = riscv_iommu_init_check(iommu);
  1356. if (rc)
  1357. return dev_err_probe(iommu->dev, rc, "unexpected device state\n");
  1358. rc = riscv_iommu_iodir_alloc(iommu);
  1359. if (rc)
  1360. return rc;
  1361. rc = riscv_iommu_queue_alloc(iommu, &iommu->cmdq,
  1362. sizeof(struct riscv_iommu_command));
  1363. if (rc)
  1364. return rc;
  1365. rc = riscv_iommu_queue_alloc(iommu, &iommu->fltq,
  1366. sizeof(struct riscv_iommu_fq_record));
  1367. if (rc)
  1368. return rc;
  1369. rc = riscv_iommu_queue_enable(iommu, &iommu->cmdq, riscv_iommu_cmdq_process);
  1370. if (rc)
  1371. return rc;
  1372. rc = riscv_iommu_queue_enable(iommu, &iommu->fltq, riscv_iommu_fltq_process);
  1373. if (rc)
  1374. goto err_queue_disable;
  1375. rc = riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_MAX);
  1376. if (rc)
  1377. goto err_queue_disable;
  1378. rc = iommu_device_sysfs_add(&iommu->iommu, NULL, NULL, "riscv-iommu@%s",
  1379. dev_name(iommu->dev));
  1380. if (rc) {
  1381. dev_err_probe(iommu->dev, rc, "cannot register sysfs interface\n");
  1382. goto err_iodir_off;
  1383. }
  1384. if (!acpi_disabled) {
  1385. rc = rimt_iommu_register(iommu->dev);
  1386. if (rc) {
  1387. dev_err_probe(iommu->dev, rc, "cannot register iommu with RIMT\n");
  1388. goto err_remove_sysfs;
  1389. }
  1390. }
  1391. rc = iommu_device_register(&iommu->iommu, &riscv_iommu_ops, iommu->dev);
  1392. if (rc) {
  1393. dev_err_probe(iommu->dev, rc, "cannot register iommu interface\n");
  1394. goto err_remove_sysfs;
  1395. }
  1396. return 0;
  1397. err_remove_sysfs:
  1398. iommu_device_sysfs_remove(&iommu->iommu);
  1399. err_iodir_off:
  1400. riscv_iommu_iodir_set_mode(iommu, RISCV_IOMMU_DDTP_IOMMU_MODE_OFF);
  1401. err_queue_disable:
  1402. riscv_iommu_queue_disable(&iommu->fltq);
  1403. riscv_iommu_queue_disable(&iommu->cmdq);
  1404. return rc;
  1405. }