s390-iommu.c 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * IOMMU API for s390 PCI devices
  4. *
  5. * Copyright IBM Corp. 2015
  6. * Author(s): Gerald Schaefer <gerald.schaefer@de.ibm.com>
  7. */
  8. #include <linux/pci.h>
  9. #include <linux/iommu.h>
  10. #include <linux/iommu-helper.h>
  11. #include <linux/sizes.h>
  12. #include <linux/rculist.h>
  13. #include <linux/rcupdate.h>
  14. #include <asm/pci_dma.h>
  15. #include "dma-iommu.h"
  16. static const struct iommu_ops s390_iommu_ops, s390_iommu_rtr_ops;
  17. static struct kmem_cache *dma_region_table_cache;
  18. static struct kmem_cache *dma_page_table_cache;
  19. static u64 s390_iommu_aperture;
  20. static u32 s390_iommu_aperture_factor = 1;
  21. struct s390_domain {
  22. struct iommu_domain domain;
  23. struct list_head devices;
  24. struct zpci_iommu_ctrs ctrs;
  25. unsigned long *dma_table;
  26. spinlock_t list_lock;
  27. struct rcu_head rcu;
  28. u8 origin_type;
  29. };
  30. static struct iommu_domain blocking_domain;
  31. static inline unsigned int calc_rfx(dma_addr_t ptr)
  32. {
  33. return ((unsigned long)ptr >> ZPCI_RF_SHIFT) & ZPCI_INDEX_MASK;
  34. }
  35. static inline unsigned int calc_rsx(dma_addr_t ptr)
  36. {
  37. return ((unsigned long)ptr >> ZPCI_RS_SHIFT) & ZPCI_INDEX_MASK;
  38. }
  39. static inline unsigned int calc_rtx(dma_addr_t ptr)
  40. {
  41. return ((unsigned long)ptr >> ZPCI_RT_SHIFT) & ZPCI_INDEX_MASK;
  42. }
  43. static inline unsigned int calc_sx(dma_addr_t ptr)
  44. {
  45. return ((unsigned long)ptr >> ZPCI_ST_SHIFT) & ZPCI_INDEX_MASK;
  46. }
  47. static inline unsigned int calc_px(dma_addr_t ptr)
  48. {
  49. return ((unsigned long)ptr >> PAGE_SHIFT) & ZPCI_PT_MASK;
  50. }
  51. static inline void set_pt_pfaa(unsigned long *entry, phys_addr_t pfaa)
  52. {
  53. *entry &= ZPCI_PTE_FLAG_MASK;
  54. *entry |= (pfaa & ZPCI_PTE_ADDR_MASK);
  55. }
  56. static inline void set_rf_rso(unsigned long *entry, phys_addr_t rso)
  57. {
  58. *entry &= ZPCI_RTE_FLAG_MASK;
  59. *entry |= (rso & ZPCI_RTE_ADDR_MASK);
  60. *entry |= ZPCI_TABLE_TYPE_RFX;
  61. }
  62. static inline void set_rs_rto(unsigned long *entry, phys_addr_t rto)
  63. {
  64. *entry &= ZPCI_RTE_FLAG_MASK;
  65. *entry |= (rto & ZPCI_RTE_ADDR_MASK);
  66. *entry |= ZPCI_TABLE_TYPE_RSX;
  67. }
  68. static inline void set_rt_sto(unsigned long *entry, phys_addr_t sto)
  69. {
  70. *entry &= ZPCI_RTE_FLAG_MASK;
  71. *entry |= (sto & ZPCI_RTE_ADDR_MASK);
  72. *entry |= ZPCI_TABLE_TYPE_RTX;
  73. }
  74. static inline void set_st_pto(unsigned long *entry, phys_addr_t pto)
  75. {
  76. *entry &= ZPCI_STE_FLAG_MASK;
  77. *entry |= (pto & ZPCI_STE_ADDR_MASK);
  78. *entry |= ZPCI_TABLE_TYPE_SX;
  79. }
  80. static inline void validate_rf_entry(unsigned long *entry)
  81. {
  82. *entry &= ~ZPCI_TABLE_VALID_MASK;
  83. *entry &= ~ZPCI_TABLE_OFFSET_MASK;
  84. *entry |= ZPCI_TABLE_VALID;
  85. *entry |= ZPCI_TABLE_LEN_RFX;
  86. }
  87. static inline void validate_rs_entry(unsigned long *entry)
  88. {
  89. *entry &= ~ZPCI_TABLE_VALID_MASK;
  90. *entry &= ~ZPCI_TABLE_OFFSET_MASK;
  91. *entry |= ZPCI_TABLE_VALID;
  92. *entry |= ZPCI_TABLE_LEN_RSX;
  93. }
  94. static inline void validate_rt_entry(unsigned long *entry)
  95. {
  96. *entry &= ~ZPCI_TABLE_VALID_MASK;
  97. *entry &= ~ZPCI_TABLE_OFFSET_MASK;
  98. *entry |= ZPCI_TABLE_VALID;
  99. *entry |= ZPCI_TABLE_LEN_RTX;
  100. }
  101. static inline void validate_st_entry(unsigned long *entry)
  102. {
  103. *entry &= ~ZPCI_TABLE_VALID_MASK;
  104. *entry |= ZPCI_TABLE_VALID;
  105. }
  106. static inline void invalidate_pt_entry(unsigned long *entry)
  107. {
  108. WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_INVALID);
  109. *entry &= ~ZPCI_PTE_VALID_MASK;
  110. *entry |= ZPCI_PTE_INVALID;
  111. }
  112. static inline void validate_pt_entry(unsigned long *entry)
  113. {
  114. WARN_ON_ONCE((*entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID);
  115. *entry &= ~ZPCI_PTE_VALID_MASK;
  116. *entry |= ZPCI_PTE_VALID;
  117. }
  118. static inline void entry_set_protected(unsigned long *entry)
  119. {
  120. *entry &= ~ZPCI_TABLE_PROT_MASK;
  121. *entry |= ZPCI_TABLE_PROTECTED;
  122. }
  123. static inline void entry_clr_protected(unsigned long *entry)
  124. {
  125. *entry &= ~ZPCI_TABLE_PROT_MASK;
  126. *entry |= ZPCI_TABLE_UNPROTECTED;
  127. }
  128. static inline int reg_entry_isvalid(unsigned long entry)
  129. {
  130. return (entry & ZPCI_TABLE_VALID_MASK) == ZPCI_TABLE_VALID;
  131. }
  132. static inline int pt_entry_isvalid(unsigned long entry)
  133. {
  134. return (entry & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID;
  135. }
  136. static inline unsigned long *get_rf_rso(unsigned long entry)
  137. {
  138. if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RFX)
  139. return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
  140. else
  141. return NULL;
  142. }
  143. static inline unsigned long *get_rs_rto(unsigned long entry)
  144. {
  145. if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RSX)
  146. return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
  147. else
  148. return NULL;
  149. }
  150. static inline unsigned long *get_rt_sto(unsigned long entry)
  151. {
  152. if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_RTX)
  153. return phys_to_virt(entry & ZPCI_RTE_ADDR_MASK);
  154. else
  155. return NULL;
  156. }
  157. static inline unsigned long *get_st_pto(unsigned long entry)
  158. {
  159. if ((entry & ZPCI_TABLE_TYPE_MASK) == ZPCI_TABLE_TYPE_SX)
  160. return phys_to_virt(entry & ZPCI_STE_ADDR_MASK);
  161. else
  162. return NULL;
  163. }
  164. static int __init dma_alloc_cpu_table_caches(void)
  165. {
  166. dma_region_table_cache = kmem_cache_create("PCI_DMA_region_tables",
  167. ZPCI_TABLE_SIZE,
  168. ZPCI_TABLE_ALIGN,
  169. 0, NULL);
  170. if (!dma_region_table_cache)
  171. return -ENOMEM;
  172. dma_page_table_cache = kmem_cache_create("PCI_DMA_page_tables",
  173. ZPCI_PT_SIZE,
  174. ZPCI_PT_ALIGN,
  175. 0, NULL);
  176. if (!dma_page_table_cache) {
  177. kmem_cache_destroy(dma_region_table_cache);
  178. return -ENOMEM;
  179. }
  180. return 0;
  181. }
  182. static unsigned long *dma_alloc_cpu_table(gfp_t gfp)
  183. {
  184. unsigned long *table, *entry;
  185. table = kmem_cache_alloc(dma_region_table_cache, gfp);
  186. if (!table)
  187. return NULL;
  188. for (entry = table; entry < table + ZPCI_TABLE_ENTRIES; entry++)
  189. *entry = ZPCI_TABLE_INVALID;
  190. return table;
  191. }
  192. static void dma_free_cpu_table(void *table)
  193. {
  194. kmem_cache_free(dma_region_table_cache, table);
  195. }
  196. static void dma_free_page_table(void *table)
  197. {
  198. kmem_cache_free(dma_page_table_cache, table);
  199. }
  200. static void dma_free_seg_table(unsigned long entry)
  201. {
  202. unsigned long *sto = get_rt_sto(entry);
  203. int sx;
  204. for (sx = 0; sx < ZPCI_TABLE_ENTRIES; sx++)
  205. if (reg_entry_isvalid(sto[sx]))
  206. dma_free_page_table(get_st_pto(sto[sx]));
  207. dma_free_cpu_table(sto);
  208. }
  209. static void dma_free_rt_table(unsigned long entry)
  210. {
  211. unsigned long *rto = get_rs_rto(entry);
  212. int rtx;
  213. for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
  214. if (reg_entry_isvalid(rto[rtx]))
  215. dma_free_seg_table(rto[rtx]);
  216. dma_free_cpu_table(rto);
  217. }
  218. static void dma_free_rs_table(unsigned long entry)
  219. {
  220. unsigned long *rso = get_rf_rso(entry);
  221. int rsx;
  222. for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
  223. if (reg_entry_isvalid(rso[rsx]))
  224. dma_free_rt_table(rso[rsx]);
  225. dma_free_cpu_table(rso);
  226. }
  227. static void dma_cleanup_tables(struct s390_domain *domain)
  228. {
  229. int rtx, rsx, rfx;
  230. if (!domain->dma_table)
  231. return;
  232. switch (domain->origin_type) {
  233. case ZPCI_TABLE_TYPE_RFX:
  234. for (rfx = 0; rfx < ZPCI_TABLE_ENTRIES; rfx++)
  235. if (reg_entry_isvalid(domain->dma_table[rfx]))
  236. dma_free_rs_table(domain->dma_table[rfx]);
  237. break;
  238. case ZPCI_TABLE_TYPE_RSX:
  239. for (rsx = 0; rsx < ZPCI_TABLE_ENTRIES; rsx++)
  240. if (reg_entry_isvalid(domain->dma_table[rsx]))
  241. dma_free_rt_table(domain->dma_table[rsx]);
  242. break;
  243. case ZPCI_TABLE_TYPE_RTX:
  244. for (rtx = 0; rtx < ZPCI_TABLE_ENTRIES; rtx++)
  245. if (reg_entry_isvalid(domain->dma_table[rtx]))
  246. dma_free_seg_table(domain->dma_table[rtx]);
  247. break;
  248. default:
  249. WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type);
  250. return;
  251. }
  252. dma_free_cpu_table(domain->dma_table);
  253. }
  254. static unsigned long *dma_alloc_page_table(gfp_t gfp)
  255. {
  256. unsigned long *table, *entry;
  257. table = kmem_cache_alloc(dma_page_table_cache, gfp);
  258. if (!table)
  259. return NULL;
  260. for (entry = table; entry < table + ZPCI_PT_ENTRIES; entry++)
  261. *entry = ZPCI_PTE_INVALID;
  262. return table;
  263. }
  264. static unsigned long *dma_walk_rs_table(unsigned long *rso,
  265. dma_addr_t dma_addr, gfp_t gfp)
  266. {
  267. unsigned int rsx = calc_rsx(dma_addr);
  268. unsigned long old_rse, rse;
  269. unsigned long *rsep, *rto;
  270. rsep = &rso[rsx];
  271. rse = READ_ONCE(*rsep);
  272. if (reg_entry_isvalid(rse)) {
  273. rto = get_rs_rto(rse);
  274. } else {
  275. rto = dma_alloc_cpu_table(gfp);
  276. if (!rto)
  277. return NULL;
  278. set_rs_rto(&rse, virt_to_phys(rto));
  279. validate_rs_entry(&rse);
  280. entry_clr_protected(&rse);
  281. old_rse = cmpxchg(rsep, ZPCI_TABLE_INVALID, rse);
  282. if (old_rse != ZPCI_TABLE_INVALID) {
  283. /* Somone else was faster, use theirs */
  284. dma_free_cpu_table(rto);
  285. rto = get_rs_rto(old_rse);
  286. }
  287. }
  288. return rto;
  289. }
  290. static unsigned long *dma_walk_rf_table(unsigned long *rfo,
  291. dma_addr_t dma_addr, gfp_t gfp)
  292. {
  293. unsigned int rfx = calc_rfx(dma_addr);
  294. unsigned long old_rfe, rfe;
  295. unsigned long *rfep, *rso;
  296. rfep = &rfo[rfx];
  297. rfe = READ_ONCE(*rfep);
  298. if (reg_entry_isvalid(rfe)) {
  299. rso = get_rf_rso(rfe);
  300. } else {
  301. rso = dma_alloc_cpu_table(gfp);
  302. if (!rso)
  303. return NULL;
  304. set_rf_rso(&rfe, virt_to_phys(rso));
  305. validate_rf_entry(&rfe);
  306. entry_clr_protected(&rfe);
  307. old_rfe = cmpxchg(rfep, ZPCI_TABLE_INVALID, rfe);
  308. if (old_rfe != ZPCI_TABLE_INVALID) {
  309. /* Somone else was faster, use theirs */
  310. dma_free_cpu_table(rso);
  311. rso = get_rf_rso(old_rfe);
  312. }
  313. }
  314. if (!rso)
  315. return NULL;
  316. return dma_walk_rs_table(rso, dma_addr, gfp);
  317. }
  318. static unsigned long *dma_get_seg_table_origin(unsigned long *rtep, gfp_t gfp)
  319. {
  320. unsigned long old_rte, rte;
  321. unsigned long *sto;
  322. rte = READ_ONCE(*rtep);
  323. if (reg_entry_isvalid(rte)) {
  324. sto = get_rt_sto(rte);
  325. } else {
  326. sto = dma_alloc_cpu_table(gfp);
  327. if (!sto)
  328. return NULL;
  329. set_rt_sto(&rte, virt_to_phys(sto));
  330. validate_rt_entry(&rte);
  331. entry_clr_protected(&rte);
  332. old_rte = cmpxchg(rtep, ZPCI_TABLE_INVALID, rte);
  333. if (old_rte != ZPCI_TABLE_INVALID) {
  334. /* Somone else was faster, use theirs */
  335. dma_free_cpu_table(sto);
  336. sto = get_rt_sto(old_rte);
  337. }
  338. }
  339. return sto;
  340. }
  341. static unsigned long *dma_get_page_table_origin(unsigned long *step, gfp_t gfp)
  342. {
  343. unsigned long old_ste, ste;
  344. unsigned long *pto;
  345. ste = READ_ONCE(*step);
  346. if (reg_entry_isvalid(ste)) {
  347. pto = get_st_pto(ste);
  348. } else {
  349. pto = dma_alloc_page_table(gfp);
  350. if (!pto)
  351. return NULL;
  352. set_st_pto(&ste, virt_to_phys(pto));
  353. validate_st_entry(&ste);
  354. entry_clr_protected(&ste);
  355. old_ste = cmpxchg(step, ZPCI_TABLE_INVALID, ste);
  356. if (old_ste != ZPCI_TABLE_INVALID) {
  357. /* Somone else was faster, use theirs */
  358. dma_free_page_table(pto);
  359. pto = get_st_pto(old_ste);
  360. }
  361. }
  362. return pto;
  363. }
  364. static unsigned long *dma_walk_region_tables(struct s390_domain *domain,
  365. dma_addr_t dma_addr, gfp_t gfp)
  366. {
  367. switch (domain->origin_type) {
  368. case ZPCI_TABLE_TYPE_RFX:
  369. return dma_walk_rf_table(domain->dma_table, dma_addr, gfp);
  370. case ZPCI_TABLE_TYPE_RSX:
  371. return dma_walk_rs_table(domain->dma_table, dma_addr, gfp);
  372. case ZPCI_TABLE_TYPE_RTX:
  373. return domain->dma_table;
  374. default:
  375. return NULL;
  376. }
  377. }
  378. static unsigned long *dma_walk_cpu_trans(struct s390_domain *domain,
  379. dma_addr_t dma_addr, gfp_t gfp)
  380. {
  381. unsigned long *rto, *sto, *pto;
  382. unsigned int rtx, sx, px;
  383. rto = dma_walk_region_tables(domain, dma_addr, gfp);
  384. if (!rto)
  385. return NULL;
  386. rtx = calc_rtx(dma_addr);
  387. sto = dma_get_seg_table_origin(&rto[rtx], gfp);
  388. if (!sto)
  389. return NULL;
  390. sx = calc_sx(dma_addr);
  391. pto = dma_get_page_table_origin(&sto[sx], gfp);
  392. if (!pto)
  393. return NULL;
  394. px = calc_px(dma_addr);
  395. return &pto[px];
  396. }
  397. static void dma_update_cpu_trans(unsigned long *ptep, phys_addr_t page_addr, int flags)
  398. {
  399. unsigned long pte;
  400. pte = READ_ONCE(*ptep);
  401. if (flags & ZPCI_PTE_INVALID) {
  402. invalidate_pt_entry(&pte);
  403. } else {
  404. set_pt_pfaa(&pte, page_addr);
  405. validate_pt_entry(&pte);
  406. }
  407. if (flags & ZPCI_TABLE_PROTECTED)
  408. entry_set_protected(&pte);
  409. else
  410. entry_clr_protected(&pte);
  411. xchg(ptep, pte);
  412. }
  413. static struct s390_domain *to_s390_domain(struct iommu_domain *dom)
  414. {
  415. return container_of(dom, struct s390_domain, domain);
  416. }
  417. static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap)
  418. {
  419. struct zpci_dev *zdev = to_zpci_dev(dev);
  420. switch (cap) {
  421. case IOMMU_CAP_CACHE_COHERENCY:
  422. return true;
  423. case IOMMU_CAP_DEFERRED_FLUSH:
  424. return zdev->pft != PCI_FUNC_TYPE_ISM;
  425. default:
  426. return false;
  427. }
  428. }
  429. static inline u64 max_tbl_size(struct s390_domain *domain)
  430. {
  431. switch (domain->origin_type) {
  432. case ZPCI_TABLE_TYPE_RTX:
  433. return ZPCI_TABLE_SIZE_RT - 1;
  434. case ZPCI_TABLE_TYPE_RSX:
  435. return ZPCI_TABLE_SIZE_RS - 1;
  436. case ZPCI_TABLE_TYPE_RFX:
  437. return U64_MAX;
  438. default:
  439. return 0;
  440. }
  441. }
  442. static struct iommu_domain *s390_domain_alloc_paging(struct device *dev)
  443. {
  444. struct zpci_dev *zdev = to_zpci_dev(dev);
  445. struct s390_domain *s390_domain;
  446. u64 aperture_size;
  447. s390_domain = kzalloc_obj(*s390_domain);
  448. if (!s390_domain)
  449. return NULL;
  450. s390_domain->dma_table = dma_alloc_cpu_table(GFP_KERNEL);
  451. if (!s390_domain->dma_table) {
  452. kfree(s390_domain);
  453. return NULL;
  454. }
  455. aperture_size = min(s390_iommu_aperture,
  456. zdev->end_dma - zdev->start_dma + 1);
  457. if (aperture_size <= (ZPCI_TABLE_SIZE_RT - zdev->start_dma)) {
  458. s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
  459. } else if (aperture_size <= (ZPCI_TABLE_SIZE_RS - zdev->start_dma) &&
  460. (zdev->dtsm & ZPCI_IOTA_DT_RS)) {
  461. s390_domain->origin_type = ZPCI_TABLE_TYPE_RSX;
  462. } else if (zdev->dtsm & ZPCI_IOTA_DT_RF) {
  463. s390_domain->origin_type = ZPCI_TABLE_TYPE_RFX;
  464. } else {
  465. /* Assume RTX available */
  466. s390_domain->origin_type = ZPCI_TABLE_TYPE_RTX;
  467. aperture_size = ZPCI_TABLE_SIZE_RT - zdev->start_dma;
  468. }
  469. zdev->end_dma = zdev->start_dma + aperture_size - 1;
  470. s390_domain->domain.pgsize_bitmap = SZ_4K;
  471. s390_domain->domain.geometry.force_aperture = true;
  472. s390_domain->domain.geometry.aperture_start = 0;
  473. s390_domain->domain.geometry.aperture_end = max_tbl_size(s390_domain);
  474. spin_lock_init(&s390_domain->list_lock);
  475. INIT_LIST_HEAD_RCU(&s390_domain->devices);
  476. return &s390_domain->domain;
  477. }
  478. static void s390_iommu_rcu_free_domain(struct rcu_head *head)
  479. {
  480. struct s390_domain *s390_domain = container_of(head, struct s390_domain, rcu);
  481. dma_cleanup_tables(s390_domain);
  482. kfree(s390_domain);
  483. }
  484. static void s390_domain_free(struct iommu_domain *domain)
  485. {
  486. struct s390_domain *s390_domain = to_s390_domain(domain);
  487. rcu_read_lock();
  488. WARN_ON(!list_empty(&s390_domain->devices));
  489. rcu_read_unlock();
  490. call_rcu(&s390_domain->rcu, s390_iommu_rcu_free_domain);
  491. }
  492. static void zdev_s390_domain_update(struct zpci_dev *zdev,
  493. struct iommu_domain *domain)
  494. {
  495. unsigned long flags;
  496. spin_lock_irqsave(&zdev->dom_lock, flags);
  497. zdev->s390_domain = domain;
  498. spin_unlock_irqrestore(&zdev->dom_lock, flags);
  499. }
  500. static u64 get_iota_region_flag(struct s390_domain *domain)
  501. {
  502. switch (domain->origin_type) {
  503. case ZPCI_TABLE_TYPE_RTX:
  504. return ZPCI_IOTA_RTTO_FLAG;
  505. case ZPCI_TABLE_TYPE_RSX:
  506. return ZPCI_IOTA_RSTO_FLAG;
  507. case ZPCI_TABLE_TYPE_RFX:
  508. return ZPCI_IOTA_RFTO_FLAG;
  509. default:
  510. WARN_ONCE(1, "Invalid IOMMU table (%x)\n", domain->origin_type);
  511. return 0;
  512. }
  513. }
  514. static bool reg_ioat_propagate_error(int cc, u8 status)
  515. {
  516. /*
  517. * If the device is in the error state the reset routine
  518. * will register the IOAT of the newly set domain on re-enable
  519. */
  520. if (cc == ZPCI_CC_ERR && status == ZPCI_PCI_ST_FUNC_NOT_AVAIL)
  521. return false;
  522. /*
  523. * If the device was removed treat registration as success
  524. * and let the subsequent error event trigger tear down.
  525. */
  526. if (cc == ZPCI_CC_INVAL_HANDLE)
  527. return false;
  528. return cc != ZPCI_CC_OK;
  529. }
  530. static int s390_iommu_domain_reg_ioat(struct zpci_dev *zdev,
  531. struct iommu_domain *domain, u8 *status)
  532. {
  533. struct s390_domain *s390_domain;
  534. int rc = 0;
  535. u64 iota;
  536. switch (domain->type) {
  537. case IOMMU_DOMAIN_IDENTITY:
  538. rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
  539. zdev->end_dma, 0, status);
  540. break;
  541. case IOMMU_DOMAIN_BLOCKED:
  542. /* Nothing to do in this case */
  543. break;
  544. default:
  545. s390_domain = to_s390_domain(domain);
  546. iota = virt_to_phys(s390_domain->dma_table) |
  547. get_iota_region_flag(s390_domain);
  548. rc = zpci_register_ioat(zdev, 0, zdev->start_dma,
  549. zdev->end_dma, iota, status);
  550. }
  551. return rc;
  552. }
  553. int zpci_iommu_register_ioat(struct zpci_dev *zdev, u8 *status)
  554. {
  555. unsigned long flags;
  556. int rc;
  557. spin_lock_irqsave(&zdev->dom_lock, flags);
  558. rc = s390_iommu_domain_reg_ioat(zdev, zdev->s390_domain, status);
  559. spin_unlock_irqrestore(&zdev->dom_lock, flags);
  560. return rc;
  561. }
  562. static int blocking_domain_attach_device(struct iommu_domain *domain,
  563. struct device *dev,
  564. struct iommu_domain *old)
  565. {
  566. struct zpci_dev *zdev = to_zpci_dev(dev);
  567. struct s390_domain *s390_domain;
  568. unsigned long flags;
  569. if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED)
  570. return 0;
  571. s390_domain = to_s390_domain(zdev->s390_domain);
  572. if (zdev->dma_table) {
  573. spin_lock_irqsave(&s390_domain->list_lock, flags);
  574. list_del_rcu(&zdev->iommu_list);
  575. spin_unlock_irqrestore(&s390_domain->list_lock, flags);
  576. }
  577. zpci_unregister_ioat(zdev, 0);
  578. zdev->dma_table = NULL;
  579. zdev_s390_domain_update(zdev, domain);
  580. return 0;
  581. }
  582. static int s390_iommu_attach_device(struct iommu_domain *domain,
  583. struct device *dev,
  584. struct iommu_domain *old)
  585. {
  586. struct s390_domain *s390_domain = to_s390_domain(domain);
  587. struct zpci_dev *zdev = to_zpci_dev(dev);
  588. unsigned long flags;
  589. u8 status;
  590. int cc;
  591. if (!zdev)
  592. return -ENODEV;
  593. if (WARN_ON(domain->geometry.aperture_start > zdev->end_dma ||
  594. domain->geometry.aperture_end < zdev->start_dma))
  595. return -EINVAL;
  596. blocking_domain_attach_device(&blocking_domain, dev, old);
  597. /* If we fail now DMA remains blocked via blocking domain */
  598. cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
  599. if (reg_ioat_propagate_error(cc, status))
  600. return -EIO;
  601. zdev->dma_table = s390_domain->dma_table;
  602. zdev_s390_domain_update(zdev, domain);
  603. spin_lock_irqsave(&s390_domain->list_lock, flags);
  604. list_add_rcu(&zdev->iommu_list, &s390_domain->devices);
  605. spin_unlock_irqrestore(&s390_domain->list_lock, flags);
  606. return 0;
  607. }
  608. static void s390_iommu_get_resv_regions(struct device *dev,
  609. struct list_head *list)
  610. {
  611. struct zpci_dev *zdev = to_zpci_dev(dev);
  612. struct iommu_resv_region *region;
  613. u64 max_size, end_resv;
  614. unsigned long flags;
  615. if (zdev->start_dma) {
  616. region = iommu_alloc_resv_region(0, zdev->start_dma, 0,
  617. IOMMU_RESV_RESERVED, GFP_KERNEL);
  618. if (!region)
  619. return;
  620. list_add_tail(&region->list, list);
  621. }
  622. spin_lock_irqsave(&zdev->dom_lock, flags);
  623. if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
  624. zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY) {
  625. spin_unlock_irqrestore(&zdev->dom_lock, flags);
  626. return;
  627. }
  628. max_size = max_tbl_size(to_s390_domain(zdev->s390_domain));
  629. spin_unlock_irqrestore(&zdev->dom_lock, flags);
  630. if (zdev->end_dma < max_size) {
  631. end_resv = max_size - zdev->end_dma;
  632. region = iommu_alloc_resv_region(zdev->end_dma + 1, end_resv,
  633. 0, IOMMU_RESV_RESERVED,
  634. GFP_KERNEL);
  635. if (!region)
  636. return;
  637. list_add_tail(&region->list, list);
  638. }
  639. }
  640. static struct iommu_device *s390_iommu_probe_device(struct device *dev)
  641. {
  642. struct zpci_dev *zdev;
  643. if (!dev_is_pci(dev))
  644. return ERR_PTR(-ENODEV);
  645. zdev = to_zpci_dev(dev);
  646. if (zdev->start_dma > zdev->end_dma)
  647. return ERR_PTR(-EINVAL);
  648. if (zdev->tlb_refresh)
  649. dev->iommu->shadow_on_flush = 1;
  650. /* Start with DMA blocked */
  651. spin_lock_init(&zdev->dom_lock);
  652. zdev_s390_domain_update(zdev, &blocking_domain);
  653. return &zdev->iommu_dev;
  654. }
  655. static int zpci_refresh_all(struct zpci_dev *zdev)
  656. {
  657. return zpci_refresh_trans((u64)zdev->fh << 32, zdev->start_dma,
  658. zdev->end_dma - zdev->start_dma + 1);
  659. }
  660. static void s390_iommu_flush_iotlb_all(struct iommu_domain *domain)
  661. {
  662. struct s390_domain *s390_domain = to_s390_domain(domain);
  663. struct zpci_dev *zdev;
  664. rcu_read_lock();
  665. list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
  666. atomic64_inc(&s390_domain->ctrs.global_rpcits);
  667. zpci_refresh_all(zdev);
  668. }
  669. rcu_read_unlock();
  670. }
  671. static void s390_iommu_iotlb_sync(struct iommu_domain *domain,
  672. struct iommu_iotlb_gather *gather)
  673. {
  674. struct s390_domain *s390_domain = to_s390_domain(domain);
  675. size_t size = gather->end - gather->start + 1;
  676. struct zpci_dev *zdev;
  677. /* If gather was never added to there is nothing to flush */
  678. if (!gather->end)
  679. return;
  680. rcu_read_lock();
  681. list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
  682. atomic64_inc(&s390_domain->ctrs.sync_rpcits);
  683. zpci_refresh_trans((u64)zdev->fh << 32, gather->start,
  684. size);
  685. }
  686. rcu_read_unlock();
  687. }
  688. static int s390_iommu_iotlb_sync_map(struct iommu_domain *domain,
  689. unsigned long iova, size_t size)
  690. {
  691. struct s390_domain *s390_domain = to_s390_domain(domain);
  692. struct zpci_dev *zdev;
  693. int ret = 0;
  694. rcu_read_lock();
  695. list_for_each_entry_rcu(zdev, &s390_domain->devices, iommu_list) {
  696. if (!zdev->tlb_refresh)
  697. continue;
  698. atomic64_inc(&s390_domain->ctrs.sync_map_rpcits);
  699. ret = zpci_refresh_trans((u64)zdev->fh << 32,
  700. iova, size);
  701. /*
  702. * let the hypervisor discover invalidated entries
  703. * allowing it to free IOVAs and unpin pages
  704. */
  705. if (ret == -ENOMEM) {
  706. ret = zpci_refresh_all(zdev);
  707. if (ret)
  708. break;
  709. }
  710. }
  711. rcu_read_unlock();
  712. return ret;
  713. }
  714. static int s390_iommu_validate_trans(struct s390_domain *s390_domain,
  715. phys_addr_t pa, dma_addr_t dma_addr,
  716. unsigned long nr_pages, int flags,
  717. gfp_t gfp)
  718. {
  719. phys_addr_t page_addr = pa & PAGE_MASK;
  720. unsigned long *entry;
  721. unsigned long i;
  722. int rc;
  723. for (i = 0; i < nr_pages; i++) {
  724. entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
  725. if (unlikely(!entry)) {
  726. rc = -ENOMEM;
  727. goto undo_cpu_trans;
  728. }
  729. dma_update_cpu_trans(entry, page_addr, flags);
  730. page_addr += PAGE_SIZE;
  731. dma_addr += PAGE_SIZE;
  732. }
  733. return 0;
  734. undo_cpu_trans:
  735. while (i-- > 0) {
  736. dma_addr -= PAGE_SIZE;
  737. entry = dma_walk_cpu_trans(s390_domain, dma_addr, gfp);
  738. if (!entry)
  739. break;
  740. dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
  741. }
  742. return rc;
  743. }
  744. static int s390_iommu_invalidate_trans(struct s390_domain *s390_domain,
  745. dma_addr_t dma_addr, unsigned long nr_pages)
  746. {
  747. unsigned long *entry;
  748. unsigned long i;
  749. int rc = 0;
  750. for (i = 0; i < nr_pages; i++) {
  751. entry = dma_walk_cpu_trans(s390_domain, dma_addr, GFP_ATOMIC);
  752. if (unlikely(!entry)) {
  753. rc = -EINVAL;
  754. break;
  755. }
  756. dma_update_cpu_trans(entry, 0, ZPCI_PTE_INVALID);
  757. dma_addr += PAGE_SIZE;
  758. }
  759. return rc;
  760. }
  761. static int s390_iommu_map_pages(struct iommu_domain *domain,
  762. unsigned long iova, phys_addr_t paddr,
  763. size_t pgsize, size_t pgcount,
  764. int prot, gfp_t gfp, size_t *mapped)
  765. {
  766. struct s390_domain *s390_domain = to_s390_domain(domain);
  767. size_t size = pgcount << __ffs(pgsize);
  768. int flags = ZPCI_PTE_VALID, rc = 0;
  769. if (pgsize != SZ_4K)
  770. return -EINVAL;
  771. if (iova < s390_domain->domain.geometry.aperture_start ||
  772. (iova + size - 1) > s390_domain->domain.geometry.aperture_end)
  773. return -EINVAL;
  774. if (!IS_ALIGNED(iova | paddr, pgsize))
  775. return -EINVAL;
  776. if (!(prot & IOMMU_WRITE))
  777. flags |= ZPCI_TABLE_PROTECTED;
  778. rc = s390_iommu_validate_trans(s390_domain, paddr, iova,
  779. pgcount, flags, gfp);
  780. if (!rc) {
  781. *mapped = size;
  782. atomic64_add(pgcount, &s390_domain->ctrs.mapped_pages);
  783. }
  784. return rc;
  785. }
  786. static unsigned long *get_rso_from_iova(struct s390_domain *domain,
  787. dma_addr_t iova)
  788. {
  789. unsigned long *rfo;
  790. unsigned long rfe;
  791. unsigned int rfx;
  792. switch (domain->origin_type) {
  793. case ZPCI_TABLE_TYPE_RFX:
  794. rfo = domain->dma_table;
  795. rfx = calc_rfx(iova);
  796. rfe = READ_ONCE(rfo[rfx]);
  797. if (!reg_entry_isvalid(rfe))
  798. return NULL;
  799. return get_rf_rso(rfe);
  800. case ZPCI_TABLE_TYPE_RSX:
  801. return domain->dma_table;
  802. default:
  803. return NULL;
  804. }
  805. }
  806. static unsigned long *get_rto_from_iova(struct s390_domain *domain,
  807. dma_addr_t iova)
  808. {
  809. unsigned long *rso;
  810. unsigned long rse;
  811. unsigned int rsx;
  812. switch (domain->origin_type) {
  813. case ZPCI_TABLE_TYPE_RFX:
  814. case ZPCI_TABLE_TYPE_RSX:
  815. rso = get_rso_from_iova(domain, iova);
  816. rsx = calc_rsx(iova);
  817. rse = READ_ONCE(rso[rsx]);
  818. if (!reg_entry_isvalid(rse))
  819. return NULL;
  820. return get_rs_rto(rse);
  821. case ZPCI_TABLE_TYPE_RTX:
  822. return domain->dma_table;
  823. default:
  824. return NULL;
  825. }
  826. }
  827. static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain,
  828. dma_addr_t iova)
  829. {
  830. struct s390_domain *s390_domain = to_s390_domain(domain);
  831. unsigned long *rto, *sto, *pto;
  832. unsigned long ste, pte, rte;
  833. unsigned int rtx, sx, px;
  834. phys_addr_t phys = 0;
  835. if (iova < domain->geometry.aperture_start ||
  836. iova > domain->geometry.aperture_end)
  837. return 0;
  838. rto = get_rto_from_iova(s390_domain, iova);
  839. if (!rto)
  840. return 0;
  841. rtx = calc_rtx(iova);
  842. sx = calc_sx(iova);
  843. px = calc_px(iova);
  844. rte = READ_ONCE(rto[rtx]);
  845. if (reg_entry_isvalid(rte)) {
  846. sto = get_rt_sto(rte);
  847. ste = READ_ONCE(sto[sx]);
  848. if (reg_entry_isvalid(ste)) {
  849. pto = get_st_pto(ste);
  850. pte = READ_ONCE(pto[px]);
  851. if (pt_entry_isvalid(pte))
  852. phys = pte & ZPCI_PTE_ADDR_MASK;
  853. }
  854. }
  855. return phys;
  856. }
  857. static size_t s390_iommu_unmap_pages(struct iommu_domain *domain,
  858. unsigned long iova,
  859. size_t pgsize, size_t pgcount,
  860. struct iommu_iotlb_gather *gather)
  861. {
  862. struct s390_domain *s390_domain = to_s390_domain(domain);
  863. size_t size = pgcount << __ffs(pgsize);
  864. int rc;
  865. if (WARN_ON(iova < s390_domain->domain.geometry.aperture_start ||
  866. (iova + size - 1) > s390_domain->domain.geometry.aperture_end))
  867. return 0;
  868. rc = s390_iommu_invalidate_trans(s390_domain, iova, pgcount);
  869. if (rc)
  870. return 0;
  871. iommu_iotlb_gather_add_range(gather, iova, size);
  872. atomic64_add(pgcount, &s390_domain->ctrs.unmapped_pages);
  873. return size;
  874. }
  875. struct zpci_iommu_ctrs *zpci_get_iommu_ctrs(struct zpci_dev *zdev)
  876. {
  877. struct s390_domain *s390_domain;
  878. lockdep_assert_held(&zdev->dom_lock);
  879. if (zdev->s390_domain->type == IOMMU_DOMAIN_BLOCKED ||
  880. zdev->s390_domain->type == IOMMU_DOMAIN_IDENTITY)
  881. return NULL;
  882. s390_domain = to_s390_domain(zdev->s390_domain);
  883. return &s390_domain->ctrs;
  884. }
  885. int zpci_init_iommu(struct zpci_dev *zdev)
  886. {
  887. int rc = 0;
  888. rc = iommu_device_sysfs_add(&zdev->iommu_dev, NULL, NULL,
  889. "s390-iommu.%08x", zdev->fid);
  890. if (rc)
  891. goto out_err;
  892. if (zdev->rtr_avail) {
  893. rc = iommu_device_register(&zdev->iommu_dev,
  894. &s390_iommu_rtr_ops, NULL);
  895. } else {
  896. rc = iommu_device_register(&zdev->iommu_dev, &s390_iommu_ops,
  897. NULL);
  898. }
  899. if (rc)
  900. goto out_sysfs;
  901. return 0;
  902. out_sysfs:
  903. iommu_device_sysfs_remove(&zdev->iommu_dev);
  904. out_err:
  905. return rc;
  906. }
  907. void zpci_destroy_iommu(struct zpci_dev *zdev)
  908. {
  909. iommu_device_unregister(&zdev->iommu_dev);
  910. iommu_device_sysfs_remove(&zdev->iommu_dev);
  911. }
  912. static int __init s390_iommu_setup(char *str)
  913. {
  914. if (!strcmp(str, "strict")) {
  915. pr_warn("s390_iommu=strict deprecated; use iommu.strict=1 instead\n");
  916. iommu_set_dma_strict();
  917. }
  918. return 1;
  919. }
  920. __setup("s390_iommu=", s390_iommu_setup);
  921. static int __init s390_iommu_aperture_setup(char *str)
  922. {
  923. if (kstrtou32(str, 10, &s390_iommu_aperture_factor))
  924. s390_iommu_aperture_factor = 1;
  925. return 1;
  926. }
  927. __setup("s390_iommu_aperture=", s390_iommu_aperture_setup);
  928. static int __init s390_iommu_init(void)
  929. {
  930. int rc;
  931. iommu_dma_forcedac = true;
  932. s390_iommu_aperture = (u64)virt_to_phys(high_memory);
  933. if (!s390_iommu_aperture_factor)
  934. s390_iommu_aperture = ULONG_MAX;
  935. else
  936. s390_iommu_aperture *= s390_iommu_aperture_factor;
  937. rc = dma_alloc_cpu_table_caches();
  938. if (rc)
  939. return rc;
  940. return rc;
  941. }
  942. subsys_initcall(s390_iommu_init);
  943. static int s390_attach_dev_identity(struct iommu_domain *domain,
  944. struct device *dev,
  945. struct iommu_domain *old)
  946. {
  947. struct zpci_dev *zdev = to_zpci_dev(dev);
  948. u8 status;
  949. int cc;
  950. blocking_domain_attach_device(&blocking_domain, dev, old);
  951. /* If we fail now DMA remains blocked via blocking domain */
  952. cc = s390_iommu_domain_reg_ioat(zdev, domain, &status);
  953. if (reg_ioat_propagate_error(cc, status))
  954. return -EIO;
  955. zdev_s390_domain_update(zdev, domain);
  956. return 0;
  957. }
  958. static const struct iommu_domain_ops s390_identity_ops = {
  959. .attach_dev = s390_attach_dev_identity,
  960. };
  961. static struct iommu_domain s390_identity_domain = {
  962. .type = IOMMU_DOMAIN_IDENTITY,
  963. .ops = &s390_identity_ops,
  964. };
  965. static struct iommu_domain blocking_domain = {
  966. .type = IOMMU_DOMAIN_BLOCKED,
  967. .ops = &(const struct iommu_domain_ops) {
  968. .attach_dev = blocking_domain_attach_device,
  969. }
  970. };
  971. #define S390_IOMMU_COMMON_OPS() \
  972. .blocked_domain = &blocking_domain, \
  973. .release_domain = &blocking_domain, \
  974. .capable = s390_iommu_capable, \
  975. .domain_alloc_paging = s390_domain_alloc_paging, \
  976. .probe_device = s390_iommu_probe_device, \
  977. .device_group = generic_device_group, \
  978. .get_resv_regions = s390_iommu_get_resv_regions, \
  979. .default_domain_ops = &(const struct iommu_domain_ops) { \
  980. .attach_dev = s390_iommu_attach_device, \
  981. .map_pages = s390_iommu_map_pages, \
  982. .unmap_pages = s390_iommu_unmap_pages, \
  983. .flush_iotlb_all = s390_iommu_flush_iotlb_all, \
  984. .iotlb_sync = s390_iommu_iotlb_sync, \
  985. .iotlb_sync_map = s390_iommu_iotlb_sync_map, \
  986. .iova_to_phys = s390_iommu_iova_to_phys, \
  987. .free = s390_domain_free, \
  988. }
  989. static const struct iommu_ops s390_iommu_ops = {
  990. S390_IOMMU_COMMON_OPS()
  991. };
  992. static const struct iommu_ops s390_iommu_rtr_ops = {
  993. .identity_domain = &s390_identity_domain,
  994. S390_IOMMU_COMMON_OPS()
  995. };