vmd.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Volume Management Device driver
  4. * Copyright (c) 2015, Intel Corporation.
  5. */
  6. #include <linux/device.h>
  7. #include <linux/interrupt.h>
  8. #include <linux/irq.h>
  9. #include <linux/irqchip/irq-msi-lib.h>
  10. #include <linux/kernel.h>
  11. #include <linux/module.h>
  12. #include <linux/msi.h>
  13. #include <linux/pci.h>
  14. #include <linux/pci-acpi.h>
  15. #include <linux/pci-ecam.h>
  16. #include <linux/srcu.h>
  17. #include <linux/rculist.h>
  18. #include <linux/rcupdate.h>
  19. #include <xen/xen.h>
  20. #include <asm/irqdomain.h>
  21. #define VMD_CFGBAR 0
  22. #define VMD_MEMBAR1 2
  23. #define VMD_MEMBAR2 4
  24. #define PCI_REG_VMCAP 0x40
  25. #define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
  26. #define PCI_REG_VMCONFIG 0x44
  27. #define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
  28. #define VMCONFIG_MSI_REMAP 0x2
  29. #define PCI_REG_VMLOCK 0x70
  30. #define MB2_SHADOW_EN(vmlock) (vmlock & 0x2)
  31. #define MB2_SHADOW_OFFSET 0x2000
  32. #define MB2_SHADOW_SIZE 16
  33. enum vmd_features {
  34. /*
  35. * Device may contain registers which hint the physical location of the
  36. * membars, in order to allow proper address translation during
  37. * resource assignment to enable guest virtualization
  38. */
  39. VMD_FEAT_HAS_MEMBAR_SHADOW = (1 << 0),
  40. /*
  41. * Device may provide root port configuration information which limits
  42. * bus numbering
  43. */
  44. VMD_FEAT_HAS_BUS_RESTRICTIONS = (1 << 1),
  45. /*
  46. * Device contains physical location shadow registers in
  47. * vendor-specific capability space
  48. */
  49. VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP = (1 << 2),
  50. /*
  51. * Device may use MSI-X vector 0 for software triggering and will not
  52. * be used for MSI remapping
  53. */
  54. VMD_FEAT_OFFSET_FIRST_VECTOR = (1 << 3),
  55. /*
  56. * Device can bypass remapping MSI-X transactions into its MSI-X table,
  57. * avoiding the requirement of a VMD MSI domain for child device
  58. * interrupt handling.
  59. */
  60. VMD_FEAT_CAN_BYPASS_MSI_REMAP = (1 << 4),
  61. /*
  62. * Enable ASPM on the PCIE root ports and set the default LTR of the
  63. * storage devices on platforms where these values are not configured by
  64. * BIOS. This is needed for laptops, which require these settings for
  65. * proper power management of the SoC.
  66. */
  67. VMD_FEAT_BIOS_PM_QUIRK = (1 << 5),
  68. };
  69. #define VMD_BIOS_PM_QUIRK_LTR 0x1003 /* 3145728 ns */
  70. #define VMD_FEATS_CLIENT (VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP | \
  71. VMD_FEAT_HAS_BUS_RESTRICTIONS | \
  72. VMD_FEAT_OFFSET_FIRST_VECTOR | \
  73. VMD_FEAT_BIOS_PM_QUIRK)
  74. static DEFINE_IDA(vmd_instance_ida);
  75. /*
  76. * Lock for manipulating VMD IRQ lists.
  77. */
  78. static DEFINE_RAW_SPINLOCK(list_lock);
  79. /**
  80. * struct vmd_irq - private data to map driver IRQ to the VMD shared vector
  81. * @node: list item for parent traversal.
  82. * @irq: back pointer to parent.
  83. * @enabled: true if driver enabled IRQ
  84. * @virq: the virtual IRQ value provided to the requesting driver.
  85. *
  86. * Every MSI/MSI-X IRQ requested for a device in a VMD domain will be mapped to
  87. * a VMD IRQ using this structure.
  88. */
  89. struct vmd_irq {
  90. struct list_head node;
  91. struct vmd_irq_list *irq;
  92. bool enabled;
  93. unsigned int virq;
  94. };
  95. /**
  96. * struct vmd_irq_list - list of driver requested IRQs mapping to a VMD vector
  97. * @irq_list: the list of irq's the VMD one demuxes to.
  98. * @srcu: SRCU struct for local synchronization.
  99. * @count: number of child IRQs assigned to this vector; used to track
  100. * sharing.
  101. * @virq: The underlying VMD Linux interrupt number
  102. */
  103. struct vmd_irq_list {
  104. struct list_head irq_list;
  105. struct srcu_struct srcu;
  106. unsigned int count;
  107. unsigned int virq;
  108. };
  109. struct vmd_dev {
  110. struct pci_dev *dev;
  111. raw_spinlock_t cfg_lock;
  112. void __iomem *cfgbar;
  113. int msix_count;
  114. struct vmd_irq_list *irqs;
  115. struct pci_sysdata sysdata;
  116. struct resource resources[3];
  117. struct irq_domain *irq_domain;
  118. struct pci_bus *bus;
  119. u8 busn_start;
  120. u8 first_vec;
  121. char *name;
  122. int instance;
  123. };
  124. static inline struct vmd_dev *vmd_from_bus(struct pci_bus *bus)
  125. {
  126. return container_of(bus->sysdata, struct vmd_dev, sysdata);
  127. }
  128. static inline unsigned int index_from_irqs(struct vmd_dev *vmd,
  129. struct vmd_irq_list *irqs)
  130. {
  131. return irqs - vmd->irqs;
  132. }
  133. /*
  134. * Drivers managing a device in a VMD domain allocate their own IRQs as before,
  135. * but the MSI entry for the hardware it's driving will be programmed with a
  136. * destination ID for the VMD MSI-X table. The VMD muxes interrupts in its
  137. * domain into one of its own, and the VMD driver de-muxes these for the
  138. * handlers sharing that VMD IRQ. The vmd irq_domain provides the operations
  139. * and irq_chip to set this up.
  140. */
  141. static void vmd_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
  142. {
  143. struct vmd_irq *vmdirq = data->chip_data;
  144. struct vmd_irq_list *irq = vmdirq->irq;
  145. struct vmd_dev *vmd = irq_data_get_irq_handler_data(data);
  146. memset(msg, 0, sizeof(*msg));
  147. msg->address_hi = X86_MSI_BASE_ADDRESS_HIGH;
  148. msg->arch_addr_lo.base_address = X86_MSI_BASE_ADDRESS_LOW;
  149. msg->arch_addr_lo.destid_0_7 = index_from_irqs(vmd, irq);
  150. }
  151. static void vmd_irq_enable(struct irq_data *data)
  152. {
  153. struct vmd_irq *vmdirq = data->chip_data;
  154. scoped_guard(raw_spinlock_irqsave, &list_lock) {
  155. WARN_ON(vmdirq->enabled);
  156. list_add_tail_rcu(&vmdirq->node, &vmdirq->irq->irq_list);
  157. vmdirq->enabled = true;
  158. }
  159. }
  160. static void vmd_pci_msi_enable(struct irq_data *data)
  161. {
  162. vmd_irq_enable(data->parent_data);
  163. data->chip->irq_unmask(data);
  164. }
  165. static unsigned int vmd_pci_msi_startup(struct irq_data *data)
  166. {
  167. vmd_pci_msi_enable(data);
  168. return 0;
  169. }
  170. static void vmd_irq_disable(struct irq_data *data)
  171. {
  172. struct vmd_irq *vmdirq = data->chip_data;
  173. scoped_guard(raw_spinlock_irqsave, &list_lock) {
  174. if (vmdirq->enabled) {
  175. list_del_rcu(&vmdirq->node);
  176. vmdirq->enabled = false;
  177. }
  178. }
  179. }
  180. static void vmd_pci_msi_disable(struct irq_data *data)
  181. {
  182. data->chip->irq_mask(data);
  183. vmd_irq_disable(data->parent_data);
  184. }
  185. static void vmd_pci_msi_shutdown(struct irq_data *data)
  186. {
  187. vmd_pci_msi_disable(data);
  188. }
  189. static struct irq_chip vmd_msi_controller = {
  190. .name = "VMD-MSI",
  191. .irq_compose_msi_msg = vmd_compose_msi_msg,
  192. };
  193. /*
  194. * XXX: We can be even smarter selecting the best IRQ once we solve the
  195. * affinity problem.
  196. */
  197. static struct vmd_irq_list *vmd_next_irq(struct vmd_dev *vmd, struct msi_desc *desc)
  198. {
  199. int i, best;
  200. if (vmd->msix_count == 1 + vmd->first_vec)
  201. return &vmd->irqs[vmd->first_vec];
  202. /*
  203. * White list for fast-interrupt handlers. All others will share the
  204. * "slow" interrupt vector.
  205. */
  206. switch (msi_desc_to_pci_dev(desc)->class) {
  207. case PCI_CLASS_STORAGE_EXPRESS:
  208. break;
  209. default:
  210. return &vmd->irqs[vmd->first_vec];
  211. }
  212. scoped_guard(raw_spinlock_irq, &list_lock) {
  213. best = vmd->first_vec + 1;
  214. for (i = best; i < vmd->msix_count; i++)
  215. if (vmd->irqs[i].count < vmd->irqs[best].count)
  216. best = i;
  217. vmd->irqs[best].count++;
  218. }
  219. return &vmd->irqs[best];
  220. }
  221. static void vmd_msi_free(struct irq_domain *domain, unsigned int virq,
  222. unsigned int nr_irqs);
  223. static int vmd_msi_alloc(struct irq_domain *domain, unsigned int virq,
  224. unsigned int nr_irqs, void *arg)
  225. {
  226. struct msi_desc *desc = ((msi_alloc_info_t *)arg)->desc;
  227. struct vmd_dev *vmd = domain->host_data;
  228. struct vmd_irq *vmdirq;
  229. for (int i = 0; i < nr_irqs; ++i) {
  230. vmdirq = kzalloc_obj(*vmdirq);
  231. if (!vmdirq) {
  232. vmd_msi_free(domain, virq, i);
  233. return -ENOMEM;
  234. }
  235. INIT_LIST_HEAD(&vmdirq->node);
  236. vmdirq->irq = vmd_next_irq(vmd, desc);
  237. vmdirq->virq = virq + i;
  238. irq_domain_set_info(domain, virq + i, vmdirq->irq->virq,
  239. &vmd_msi_controller, vmdirq,
  240. handle_untracked_irq, vmd, NULL);
  241. }
  242. return 0;
  243. }
  244. static void vmd_msi_free(struct irq_domain *domain, unsigned int virq,
  245. unsigned int nr_irqs)
  246. {
  247. struct irq_data *irq_data;
  248. struct vmd_irq *vmdirq;
  249. for (int i = 0; i < nr_irqs; ++i) {
  250. irq_data = irq_domain_get_irq_data(domain, virq + i);
  251. vmdirq = irq_data->chip_data;
  252. synchronize_srcu(&vmdirq->irq->srcu);
  253. /* XXX: Potential optimization to rebalance */
  254. scoped_guard(raw_spinlock_irq, &list_lock)
  255. vmdirq->irq->count--;
  256. kfree(vmdirq);
  257. }
  258. }
  259. static const struct irq_domain_ops vmd_msi_domain_ops = {
  260. .alloc = vmd_msi_alloc,
  261. .free = vmd_msi_free,
  262. };
  263. static bool vmd_init_dev_msi_info(struct device *dev, struct irq_domain *domain,
  264. struct irq_domain *real_parent,
  265. struct msi_domain_info *info)
  266. {
  267. if (!msi_lib_init_dev_msi_info(dev, domain, real_parent, info))
  268. return false;
  269. info->chip->irq_startup = vmd_pci_msi_startup;
  270. info->chip->irq_shutdown = vmd_pci_msi_shutdown;
  271. info->chip->irq_enable = vmd_pci_msi_enable;
  272. info->chip->irq_disable = vmd_pci_msi_disable;
  273. return true;
  274. }
  275. #define VMD_MSI_FLAGS_SUPPORTED (MSI_GENERIC_FLAGS_MASK | MSI_FLAG_PCI_MSIX)
  276. #define VMD_MSI_FLAGS_REQUIRED (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_NO_AFFINITY)
  277. static const struct msi_parent_ops vmd_msi_parent_ops = {
  278. .supported_flags = VMD_MSI_FLAGS_SUPPORTED,
  279. .required_flags = VMD_MSI_FLAGS_REQUIRED,
  280. .bus_select_token = DOMAIN_BUS_VMD_MSI,
  281. .bus_select_mask = MATCH_PCI_MSI,
  282. .prefix = "VMD-",
  283. .init_dev_msi_info = vmd_init_dev_msi_info,
  284. };
  285. static int vmd_create_irq_domain(struct vmd_dev *vmd)
  286. {
  287. struct irq_domain_info info = {
  288. .size = vmd->msix_count,
  289. .ops = &vmd_msi_domain_ops,
  290. .host_data = vmd,
  291. };
  292. info.fwnode = irq_domain_alloc_named_id_fwnode("VMD-MSI",
  293. vmd->sysdata.domain);
  294. if (!info.fwnode)
  295. return -ENODEV;
  296. vmd->irq_domain = msi_create_parent_irq_domain(&info,
  297. &vmd_msi_parent_ops);
  298. if (!vmd->irq_domain) {
  299. irq_domain_free_fwnode(info.fwnode);
  300. return -ENODEV;
  301. }
  302. return 0;
  303. }
  304. static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable)
  305. {
  306. u16 reg;
  307. pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, &reg);
  308. reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) :
  309. (reg | VMCONFIG_MSI_REMAP);
  310. pci_write_config_word(vmd->dev, PCI_REG_VMCONFIG, reg);
  311. }
  312. static void vmd_remove_irq_domain(struct vmd_dev *vmd)
  313. {
  314. /*
  315. * Some production BIOS won't enable remapping between soft reboots.
  316. * Ensure remapping is restored before unloading the driver.
  317. */
  318. if (!vmd->msix_count)
  319. vmd_set_msi_remapping(vmd, true);
  320. if (vmd->irq_domain) {
  321. struct fwnode_handle *fn = vmd->irq_domain->fwnode;
  322. irq_domain_remove(vmd->irq_domain);
  323. irq_domain_free_fwnode(fn);
  324. }
  325. }
  326. static void __iomem *vmd_cfg_addr(struct vmd_dev *vmd, struct pci_bus *bus,
  327. unsigned int devfn, int reg, int len)
  328. {
  329. unsigned int busnr_ecam = bus->number - vmd->busn_start;
  330. u32 offset = PCIE_ECAM_OFFSET(busnr_ecam, devfn, reg);
  331. if (offset + len >= resource_size(&vmd->dev->resource[VMD_CFGBAR]))
  332. return NULL;
  333. return vmd->cfgbar + offset;
  334. }
  335. /*
  336. * CPU may deadlock if config space is not serialized on some versions of this
  337. * hardware, so all config space access is done under a spinlock.
  338. */
  339. static int vmd_pci_read(struct pci_bus *bus, unsigned int devfn, int reg,
  340. int len, u32 *value)
  341. {
  342. struct vmd_dev *vmd = vmd_from_bus(bus);
  343. void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
  344. if (!addr)
  345. return -EFAULT;
  346. guard(raw_spinlock_irqsave)(&vmd->cfg_lock);
  347. switch (len) {
  348. case 1:
  349. *value = readb(addr);
  350. return 0;
  351. case 2:
  352. *value = readw(addr);
  353. return 0;
  354. case 4:
  355. *value = readl(addr);
  356. return 0;
  357. default:
  358. return -EINVAL;
  359. }
  360. }
  361. /*
  362. * VMD h/w converts non-posted config writes to posted memory writes. The
  363. * read-back in this function forces the completion so it returns only after
  364. * the config space was written, as expected.
  365. */
  366. static int vmd_pci_write(struct pci_bus *bus, unsigned int devfn, int reg,
  367. int len, u32 value)
  368. {
  369. struct vmd_dev *vmd = vmd_from_bus(bus);
  370. void __iomem *addr = vmd_cfg_addr(vmd, bus, devfn, reg, len);
  371. if (!addr)
  372. return -EFAULT;
  373. guard(raw_spinlock_irqsave)(&vmd->cfg_lock);
  374. switch (len) {
  375. case 1:
  376. writeb(value, addr);
  377. readb(addr);
  378. return 0;
  379. case 2:
  380. writew(value, addr);
  381. readw(addr);
  382. return 0;
  383. case 4:
  384. writel(value, addr);
  385. readl(addr);
  386. return 0;
  387. default:
  388. return -EINVAL;
  389. }
  390. }
  391. static struct pci_ops vmd_ops = {
  392. .read = vmd_pci_read,
  393. .write = vmd_pci_write,
  394. };
  395. #ifdef CONFIG_ACPI
  396. static struct acpi_device *vmd_acpi_find_companion(struct pci_dev *pci_dev)
  397. {
  398. struct pci_host_bridge *bridge;
  399. u32 busnr, addr;
  400. if (pci_dev->bus->ops != &vmd_ops)
  401. return NULL;
  402. bridge = pci_find_host_bridge(pci_dev->bus);
  403. busnr = pci_dev->bus->number - bridge->bus->number;
  404. /*
  405. * The address computation below is only applicable to relative bus
  406. * numbers below 32.
  407. */
  408. if (busnr > 31)
  409. return NULL;
  410. addr = (busnr << 24) | ((u32)pci_dev->devfn << 16) | 0x8000FFFFU;
  411. dev_dbg(&pci_dev->dev, "Looking for ACPI companion (address 0x%x)\n",
  412. addr);
  413. return acpi_find_child_device(ACPI_COMPANION(bridge->dev.parent), addr,
  414. false);
  415. }
  416. static bool hook_installed;
  417. static void vmd_acpi_begin(void)
  418. {
  419. if (pci_acpi_set_companion_lookup_hook(vmd_acpi_find_companion))
  420. return;
  421. hook_installed = true;
  422. }
  423. static void vmd_acpi_end(void)
  424. {
  425. if (!hook_installed)
  426. return;
  427. pci_acpi_clear_companion_lookup_hook();
  428. hook_installed = false;
  429. }
  430. #else
  431. static inline void vmd_acpi_begin(void) { }
  432. static inline void vmd_acpi_end(void) { }
  433. #endif /* CONFIG_ACPI */
  434. static void vmd_domain_reset(struct vmd_dev *vmd)
  435. {
  436. u16 bus, max_buses = resource_size(&vmd->resources[0]);
  437. u8 dev, functions, fn, hdr_type;
  438. char __iomem *base;
  439. for (bus = 0; bus < max_buses; bus++) {
  440. for (dev = 0; dev < 32; dev++) {
  441. base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
  442. PCI_DEVFN(dev, 0), 0);
  443. hdr_type = readb(base + PCI_HEADER_TYPE);
  444. functions = (hdr_type & PCI_HEADER_TYPE_MFD) ? 8 : 1;
  445. for (fn = 0; fn < functions; fn++) {
  446. base = vmd->cfgbar + PCIE_ECAM_OFFSET(bus,
  447. PCI_DEVFN(dev, fn), 0);
  448. hdr_type = readb(base + PCI_HEADER_TYPE) &
  449. PCI_HEADER_TYPE_MASK;
  450. if (hdr_type != PCI_HEADER_TYPE_BRIDGE ||
  451. (readw(base + PCI_CLASS_DEVICE) !=
  452. PCI_CLASS_BRIDGE_PCI))
  453. continue;
  454. /*
  455. * Temporarily disable the I/O range before updating
  456. * PCI_IO_BASE.
  457. */
  458. writel(0x0000ffff, base + PCI_IO_BASE_UPPER16);
  459. /* Update lower 16 bits of I/O base/limit */
  460. writew(0x00f0, base + PCI_IO_BASE);
  461. /* Update upper 16 bits of I/O base/limit */
  462. writel(0, base + PCI_IO_BASE_UPPER16);
  463. /* MMIO Base/Limit */
  464. writel(0x0000fff0, base + PCI_MEMORY_BASE);
  465. /* Prefetchable MMIO Base/Limit */
  466. writel(0, base + PCI_PREF_LIMIT_UPPER32);
  467. writel(0x0000fff0, base + PCI_PREF_MEMORY_BASE);
  468. writel(0xffffffff, base + PCI_PREF_BASE_UPPER32);
  469. }
  470. }
  471. }
  472. }
  473. static void vmd_attach_resources(struct vmd_dev *vmd)
  474. {
  475. vmd->dev->resource[VMD_MEMBAR1].child = &vmd->resources[1];
  476. vmd->dev->resource[VMD_MEMBAR2].child = &vmd->resources[2];
  477. }
  478. static void vmd_detach_resources(struct vmd_dev *vmd)
  479. {
  480. vmd->dev->resource[VMD_MEMBAR1].child = NULL;
  481. vmd->dev->resource[VMD_MEMBAR2].child = NULL;
  482. }
  483. static int vmd_get_phys_offsets(struct vmd_dev *vmd, bool native_hint,
  484. resource_size_t *offset1,
  485. resource_size_t *offset2)
  486. {
  487. struct pci_dev *dev = vmd->dev;
  488. u64 phys1, phys2;
  489. if (native_hint) {
  490. u32 vmlock;
  491. int ret;
  492. ret = pci_read_config_dword(dev, PCI_REG_VMLOCK, &vmlock);
  493. if (ret || PCI_POSSIBLE_ERROR(vmlock))
  494. return -ENODEV;
  495. if (MB2_SHADOW_EN(vmlock)) {
  496. void __iomem *membar2;
  497. membar2 = pci_iomap(dev, VMD_MEMBAR2, 0);
  498. if (!membar2)
  499. return -ENOMEM;
  500. phys1 = readq(membar2 + MB2_SHADOW_OFFSET);
  501. phys2 = readq(membar2 + MB2_SHADOW_OFFSET + 8);
  502. pci_iounmap(dev, membar2);
  503. } else
  504. return 0;
  505. } else {
  506. /* Hypervisor-Emulated Vendor-Specific Capability */
  507. int pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
  508. u32 reg, regu;
  509. pci_read_config_dword(dev, pos + 4, &reg);
  510. /* "SHDW" */
  511. if (pos && reg == 0x53484457) {
  512. pci_read_config_dword(dev, pos + 8, &reg);
  513. pci_read_config_dword(dev, pos + 12, &regu);
  514. phys1 = (u64) regu << 32 | reg;
  515. pci_read_config_dword(dev, pos + 16, &reg);
  516. pci_read_config_dword(dev, pos + 20, &regu);
  517. phys2 = (u64) regu << 32 | reg;
  518. } else
  519. return 0;
  520. }
  521. *offset1 = dev->resource[VMD_MEMBAR1].start -
  522. (phys1 & PCI_BASE_ADDRESS_MEM_MASK);
  523. *offset2 = dev->resource[VMD_MEMBAR2].start -
  524. (phys2 & PCI_BASE_ADDRESS_MEM_MASK);
  525. return 0;
  526. }
  527. static int vmd_get_bus_number_start(struct vmd_dev *vmd)
  528. {
  529. struct pci_dev *dev = vmd->dev;
  530. u16 reg;
  531. pci_read_config_word(dev, PCI_REG_VMCAP, &reg);
  532. if (BUS_RESTRICT_CAP(reg)) {
  533. pci_read_config_word(dev, PCI_REG_VMCONFIG, &reg);
  534. switch (BUS_RESTRICT_CFG(reg)) {
  535. case 0:
  536. vmd->busn_start = 0;
  537. break;
  538. case 1:
  539. vmd->busn_start = 128;
  540. break;
  541. case 2:
  542. vmd->busn_start = 224;
  543. break;
  544. default:
  545. pci_err(dev, "Unknown Bus Offset Setting (%d)\n",
  546. BUS_RESTRICT_CFG(reg));
  547. return -ENODEV;
  548. }
  549. }
  550. return 0;
  551. }
  552. static irqreturn_t vmd_irq(int irq, void *data)
  553. {
  554. struct vmd_irq_list *irqs = data;
  555. struct vmd_irq *vmdirq;
  556. int idx;
  557. idx = srcu_read_lock(&irqs->srcu);
  558. list_for_each_entry_rcu(vmdirq, &irqs->irq_list, node)
  559. generic_handle_irq(vmdirq->virq);
  560. srcu_read_unlock(&irqs->srcu, idx);
  561. return IRQ_HANDLED;
  562. }
  563. static int vmd_alloc_irqs(struct vmd_dev *vmd)
  564. {
  565. struct pci_dev *dev = vmd->dev;
  566. int i, err;
  567. vmd->msix_count = pci_msix_vec_count(dev);
  568. if (vmd->msix_count < 0)
  569. return -ENODEV;
  570. vmd->msix_count = pci_alloc_irq_vectors(dev, vmd->first_vec + 1,
  571. vmd->msix_count, PCI_IRQ_MSIX);
  572. if (vmd->msix_count < 0)
  573. return vmd->msix_count;
  574. vmd->irqs = devm_kcalloc(&dev->dev, vmd->msix_count, sizeof(*vmd->irqs),
  575. GFP_KERNEL);
  576. if (!vmd->irqs)
  577. return -ENOMEM;
  578. for (i = 0; i < vmd->msix_count; i++) {
  579. err = init_srcu_struct(&vmd->irqs[i].srcu);
  580. if (err)
  581. return err;
  582. INIT_LIST_HEAD(&vmd->irqs[i].irq_list);
  583. vmd->irqs[i].virq = pci_irq_vector(dev, i);
  584. err = devm_request_irq(&dev->dev, vmd->irqs[i].virq,
  585. vmd_irq, IRQF_NO_THREAD,
  586. vmd->name, &vmd->irqs[i]);
  587. if (err)
  588. return err;
  589. }
  590. return 0;
  591. }
  592. /*
  593. * Since VMD is an aperture to regular PCIe root ports, only allow it to
  594. * control features that the OS is allowed to control on the physical PCI bus.
  595. */
  596. static void vmd_copy_host_bridge_flags(struct pci_host_bridge *root_bridge,
  597. struct pci_host_bridge *vmd_bridge)
  598. {
  599. vmd_bridge->native_pcie_hotplug = root_bridge->native_pcie_hotplug;
  600. vmd_bridge->native_shpc_hotplug = root_bridge->native_shpc_hotplug;
  601. vmd_bridge->native_aer = root_bridge->native_aer;
  602. vmd_bridge->native_pme = root_bridge->native_pme;
  603. vmd_bridge->native_ltr = root_bridge->native_ltr;
  604. vmd_bridge->native_dpc = root_bridge->native_dpc;
  605. }
  606. /*
  607. * Enable ASPM and LTR settings on devices that aren't configured by BIOS.
  608. */
  609. static int vmd_pm_enable_quirk(struct pci_dev *pdev, void *userdata)
  610. {
  611. unsigned long features = *(unsigned long *)userdata;
  612. u16 ltr = VMD_BIOS_PM_QUIRK_LTR;
  613. u32 ltr_reg;
  614. int pos;
  615. if (!(features & VMD_FEAT_BIOS_PM_QUIRK))
  616. return 0;
  617. pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_LTR);
  618. if (!pos)
  619. goto out_state_change;
  620. /*
  621. * Skip if the max snoop LTR is non-zero, indicating BIOS has set it
  622. * so the LTR quirk is not needed.
  623. */
  624. pci_read_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, &ltr_reg);
  625. if (!!(ltr_reg & (PCI_LTR_VALUE_MASK | PCI_LTR_SCALE_MASK)))
  626. goto out_state_change;
  627. /*
  628. * Set the default values to the maximum required by the platform to
  629. * allow the deepest power management savings. Write as a DWORD where
  630. * the lower word is the max snoop latency and the upper word is the
  631. * max non-snoop latency.
  632. */
  633. ltr_reg = (ltr << 16) | ltr;
  634. pci_write_config_dword(pdev, pos + PCI_LTR_MAX_SNOOP_LAT, ltr_reg);
  635. pci_info(pdev, "VMD: Default LTR value set by driver\n");
  636. out_state_change:
  637. /*
  638. * Ensure devices are in D0 before enabling PCI-PM L1 PM Substates, per
  639. * PCIe r6.0, sec 5.5.4.
  640. */
  641. pci_set_power_state_locked(pdev, PCI_D0);
  642. pci_enable_link_state_locked(pdev, PCIE_LINK_STATE_ALL);
  643. return 0;
  644. }
  645. static int vmd_enable_domain(struct vmd_dev *vmd, unsigned long features)
  646. {
  647. struct pci_sysdata *sd = &vmd->sysdata;
  648. struct resource *res;
  649. u32 upper_bits;
  650. unsigned long flags;
  651. LIST_HEAD(resources);
  652. resource_size_t offset[2] = {0};
  653. resource_size_t membar2_offset = 0x2000;
  654. struct pci_bus *child;
  655. struct pci_dev *dev;
  656. int ret;
  657. /*
  658. * Shadow registers may exist in certain VMD device ids which allow
  659. * guests to correctly assign host physical addresses to the root ports
  660. * and child devices. These registers will either return the host value
  661. * or 0, depending on an enable bit in the VMD device.
  662. */
  663. if (features & VMD_FEAT_HAS_MEMBAR_SHADOW) {
  664. membar2_offset = MB2_SHADOW_OFFSET + MB2_SHADOW_SIZE;
  665. ret = vmd_get_phys_offsets(vmd, true, &offset[0], &offset[1]);
  666. if (ret)
  667. return ret;
  668. } else if (features & VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP) {
  669. ret = vmd_get_phys_offsets(vmd, false, &offset[0], &offset[1]);
  670. if (ret)
  671. return ret;
  672. }
  673. /*
  674. * Certain VMD devices may have a root port configuration option which
  675. * limits the bus range to between 0-127, 128-255, or 224-255
  676. */
  677. if (features & VMD_FEAT_HAS_BUS_RESTRICTIONS) {
  678. ret = vmd_get_bus_number_start(vmd);
  679. if (ret)
  680. return ret;
  681. }
  682. res = &vmd->dev->resource[VMD_CFGBAR];
  683. vmd->resources[0] = (struct resource) {
  684. .name = "VMD CFGBAR",
  685. .start = vmd->busn_start,
  686. .end = vmd->busn_start + (resource_size(res) >> 20) - 1,
  687. .flags = IORESOURCE_BUS | IORESOURCE_PCI_FIXED,
  688. };
  689. /*
  690. * If the window is below 4GB, clear IORESOURCE_MEM_64 so we can
  691. * put 32-bit resources in the window.
  692. *
  693. * There's no hardware reason why a 64-bit window *couldn't*
  694. * contain a 32-bit resource, but pbus_size_mem() computes the
  695. * bridge window size assuming a 64-bit window will contain no
  696. * 32-bit resources. __pci_assign_resource() enforces that
  697. * artificial restriction to make sure everything will fit.
  698. *
  699. * The only way we could use a 64-bit non-prefetchable MEMBAR is
  700. * if its address is <4GB so that we can convert it to a 32-bit
  701. * resource. To be visible to the host OS, all VMD endpoints must
  702. * be initially configured by platform BIOS, which includes setting
  703. * up these resources. We can assume the device is configured
  704. * according to the platform needs.
  705. */
  706. res = &vmd->dev->resource[VMD_MEMBAR1];
  707. upper_bits = upper_32_bits(res->end);
  708. flags = res->flags & ~IORESOURCE_SIZEALIGN;
  709. if (!upper_bits)
  710. flags &= ~IORESOURCE_MEM_64;
  711. vmd->resources[1] = (struct resource) {
  712. .name = "VMD MEMBAR1",
  713. .start = res->start,
  714. .end = res->end,
  715. .flags = flags,
  716. .parent = res,
  717. };
  718. res = &vmd->dev->resource[VMD_MEMBAR2];
  719. upper_bits = upper_32_bits(res->end);
  720. flags = res->flags & ~IORESOURCE_SIZEALIGN;
  721. if (!upper_bits)
  722. flags &= ~IORESOURCE_MEM_64;
  723. vmd->resources[2] = (struct resource) {
  724. .name = "VMD MEMBAR2",
  725. .start = res->start + membar2_offset,
  726. .end = res->end,
  727. .flags = flags,
  728. .parent = res,
  729. };
  730. /*
  731. * Currently MSI remapping must be enabled in guest passthrough mode
  732. * due to some missing interrupt remapping plumbing. This is probably
  733. * acceptable because the guest is usually CPU-limited and MSI
  734. * remapping doesn't become a performance bottleneck.
  735. */
  736. if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
  737. offset[0] || offset[1]) {
  738. ret = vmd_alloc_irqs(vmd);
  739. if (ret)
  740. return ret;
  741. vmd_set_msi_remapping(vmd, true);
  742. ret = vmd_create_irq_domain(vmd);
  743. if (ret)
  744. return ret;
  745. } else {
  746. vmd_set_msi_remapping(vmd, false);
  747. }
  748. pci_add_resource(&resources, &vmd->resources[0]);
  749. pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
  750. pci_add_resource_offset(&resources, &vmd->resources[2], offset[1]);
  751. sd->vmd_dev = vmd->dev;
  752. /*
  753. * Emulated domains start at 0x10000 to not clash with ACPI _SEG
  754. * domains. Per ACPI r6.0, sec 6.5.6, _SEG returns an integer, of
  755. * which the lower 16 bits are the PCI Segment Group (domain) number.
  756. * Other bits are currently reserved.
  757. */
  758. sd->domain = pci_bus_find_emul_domain_nr(0, 0x10000, INT_MAX);
  759. if (sd->domain < 0)
  760. return sd->domain;
  761. sd->node = pcibus_to_node(vmd->dev->bus);
  762. vmd->bus = pci_create_root_bus(&vmd->dev->dev, vmd->busn_start,
  763. &vmd_ops, sd, &resources);
  764. if (!vmd->bus) {
  765. pci_bus_release_emul_domain_nr(sd->domain);
  766. pci_free_resource_list(&resources);
  767. vmd_remove_irq_domain(vmd);
  768. return -ENODEV;
  769. }
  770. vmd_copy_host_bridge_flags(pci_find_host_bridge(vmd->dev->bus),
  771. to_pci_host_bridge(vmd->bus->bridge));
  772. vmd_attach_resources(vmd);
  773. if (vmd->irq_domain)
  774. dev_set_msi_domain(&vmd->bus->dev, vmd->irq_domain);
  775. else
  776. dev_set_msi_domain(&vmd->bus->dev,
  777. dev_get_msi_domain(&vmd->dev->dev));
  778. WARN(sysfs_create_link(&vmd->dev->dev.kobj, &vmd->bus->dev.kobj,
  779. "domain"), "Can't create symlink to domain\n");
  780. vmd_acpi_begin();
  781. pci_scan_child_bus(vmd->bus);
  782. vmd_domain_reset(vmd);
  783. /* When Intel VMD is enabled, the OS does not discover the Root Ports
  784. * owned by Intel VMD within the MMCFG space. pci_reset_bus() applies
  785. * a reset to the parent of the PCI device supplied as argument. This
  786. * is why we pass a child device, so the reset can be triggered at
  787. * the Intel bridge level and propagated to all the children in the
  788. * hierarchy.
  789. */
  790. list_for_each_entry(child, &vmd->bus->children, node) {
  791. if (!list_empty(&child->devices)) {
  792. dev = list_first_entry(&child->devices,
  793. struct pci_dev, bus_list);
  794. ret = pci_reset_bus(dev);
  795. if (ret)
  796. pci_warn(dev, "can't reset device: %d\n", ret);
  797. break;
  798. }
  799. }
  800. pci_assign_unassigned_bus_resources(vmd->bus);
  801. pci_walk_bus(vmd->bus, vmd_pm_enable_quirk, &features);
  802. /*
  803. * VMD root buses are virtual and don't return true on pci_is_pcie()
  804. * and will fail pcie_bus_configure_settings() early. It can instead be
  805. * run on each of the real root ports.
  806. */
  807. list_for_each_entry(child, &vmd->bus->children, node)
  808. pcie_bus_configure_settings(child);
  809. pci_bus_add_devices(vmd->bus);
  810. vmd_acpi_end();
  811. return 0;
  812. }
  813. static int vmd_probe(struct pci_dev *dev, const struct pci_device_id *id)
  814. {
  815. unsigned long features = (unsigned long) id->driver_data;
  816. struct vmd_dev *vmd;
  817. int err;
  818. if (xen_domain()) {
  819. /*
  820. * Xen doesn't have knowledge about devices in the VMD bus
  821. * because the config space of devices behind the VMD bridge is
  822. * not known to Xen, and hence Xen cannot discover or configure
  823. * them in any way.
  824. *
  825. * Bypass of MSI remapping won't work in that case as direct
  826. * write by Linux to the MSI entries won't result in functional
  827. * interrupts, as Xen is the entity that manages the host
  828. * interrupt controller and must configure interrupts. However
  829. * multiplexing of interrupts by the VMD bridge will work under
  830. * Xen, so force the usage of that mode which must always be
  831. * supported by VMD bridges.
  832. */
  833. features &= ~VMD_FEAT_CAN_BYPASS_MSI_REMAP;
  834. }
  835. if (resource_size(&dev->resource[VMD_CFGBAR]) < (1 << 20))
  836. return -ENOMEM;
  837. vmd = devm_kzalloc(&dev->dev, sizeof(*vmd), GFP_KERNEL);
  838. if (!vmd)
  839. return -ENOMEM;
  840. vmd->dev = dev;
  841. vmd->sysdata.domain = PCI_DOMAIN_NR_NOT_SET;
  842. vmd->instance = ida_alloc(&vmd_instance_ida, GFP_KERNEL);
  843. if (vmd->instance < 0)
  844. return vmd->instance;
  845. vmd->name = devm_kasprintf(&dev->dev, GFP_KERNEL, "vmd%d",
  846. vmd->instance);
  847. if (!vmd->name) {
  848. err = -ENOMEM;
  849. goto out_release_instance;
  850. }
  851. err = pcim_enable_device(dev);
  852. if (err < 0)
  853. goto out_release_instance;
  854. vmd->cfgbar = pcim_iomap(dev, VMD_CFGBAR, 0);
  855. if (!vmd->cfgbar) {
  856. err = -ENOMEM;
  857. goto out_release_instance;
  858. }
  859. pci_set_master(dev);
  860. if (dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(64)) &&
  861. dma_set_mask_and_coherent(&dev->dev, DMA_BIT_MASK(32))) {
  862. err = -ENODEV;
  863. goto out_release_instance;
  864. }
  865. if (features & VMD_FEAT_OFFSET_FIRST_VECTOR)
  866. vmd->first_vec = 1;
  867. raw_spin_lock_init(&vmd->cfg_lock);
  868. pci_set_drvdata(dev, vmd);
  869. err = vmd_enable_domain(vmd, features);
  870. if (err)
  871. goto out_release_instance;
  872. dev_info(&vmd->dev->dev, "Bound to PCI domain %04x\n",
  873. vmd->sysdata.domain);
  874. return 0;
  875. out_release_instance:
  876. ida_free(&vmd_instance_ida, vmd->instance);
  877. return err;
  878. }
  879. static void vmd_cleanup_srcu(struct vmd_dev *vmd)
  880. {
  881. int i;
  882. for (i = 0; i < vmd->msix_count; i++)
  883. cleanup_srcu_struct(&vmd->irqs[i].srcu);
  884. }
  885. static void vmd_remove(struct pci_dev *dev)
  886. {
  887. struct vmd_dev *vmd = pci_get_drvdata(dev);
  888. pci_stop_root_bus(vmd->bus);
  889. sysfs_remove_link(&vmd->dev->dev.kobj, "domain");
  890. pci_remove_root_bus(vmd->bus);
  891. vmd_cleanup_srcu(vmd);
  892. vmd_detach_resources(vmd);
  893. vmd_remove_irq_domain(vmd);
  894. ida_free(&vmd_instance_ida, vmd->instance);
  895. pci_bus_release_emul_domain_nr(vmd->sysdata.domain);
  896. }
  897. static void vmd_shutdown(struct pci_dev *dev)
  898. {
  899. struct vmd_dev *vmd = pci_get_drvdata(dev);
  900. vmd_remove_irq_domain(vmd);
  901. }
  902. #ifdef CONFIG_PM_SLEEP
  903. static int vmd_suspend(struct device *dev)
  904. {
  905. struct pci_dev *pdev = to_pci_dev(dev);
  906. struct vmd_dev *vmd = pci_get_drvdata(pdev);
  907. int i;
  908. for (i = 0; i < vmd->msix_count; i++)
  909. devm_free_irq(dev, vmd->irqs[i].virq, &vmd->irqs[i]);
  910. return 0;
  911. }
  912. static int vmd_resume(struct device *dev)
  913. {
  914. struct pci_dev *pdev = to_pci_dev(dev);
  915. struct vmd_dev *vmd = pci_get_drvdata(pdev);
  916. int err, i;
  917. vmd_set_msi_remapping(vmd, !!vmd->irq_domain);
  918. for (i = 0; i < vmd->msix_count; i++) {
  919. err = devm_request_irq(dev, vmd->irqs[i].virq,
  920. vmd_irq, IRQF_NO_THREAD,
  921. vmd->name, &vmd->irqs[i]);
  922. if (err)
  923. return err;
  924. }
  925. return 0;
  926. }
  927. #endif
  928. static SIMPLE_DEV_PM_OPS(vmd_dev_pm_ops, vmd_suspend, vmd_resume);
  929. static const struct pci_device_id vmd_ids[] = {
  930. {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_201D),
  931. .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,},
  932. {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
  933. .driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
  934. VMD_FEAT_HAS_BUS_RESTRICTIONS |
  935. VMD_FEAT_CAN_BYPASS_MSI_REMAP,},
  936. {PCI_VDEVICE(INTEL, 0x467f),
  937. .driver_data = VMD_FEATS_CLIENT,},
  938. {PCI_VDEVICE(INTEL, 0x4c3d),
  939. .driver_data = VMD_FEATS_CLIENT,},
  940. {PCI_VDEVICE(INTEL, 0xa77f),
  941. .driver_data = VMD_FEATS_CLIENT,},
  942. {PCI_VDEVICE(INTEL, 0x7d0b),
  943. .driver_data = VMD_FEATS_CLIENT,},
  944. {PCI_VDEVICE(INTEL, 0xad0b),
  945. .driver_data = VMD_FEATS_CLIENT,},
  946. {PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_VMD_9A0B),
  947. .driver_data = VMD_FEATS_CLIENT,},
  948. {PCI_VDEVICE(INTEL, 0xb60b),
  949. .driver_data = VMD_FEATS_CLIENT,},
  950. {PCI_VDEVICE(INTEL, 0xb06f),
  951. .driver_data = VMD_FEATS_CLIENT,},
  952. {PCI_VDEVICE(INTEL, 0xb07f),
  953. .driver_data = VMD_FEATS_CLIENT,},
  954. {0,}
  955. };
  956. MODULE_DEVICE_TABLE(pci, vmd_ids);
  957. static struct pci_driver vmd_drv = {
  958. .name = "vmd",
  959. .id_table = vmd_ids,
  960. .probe = vmd_probe,
  961. .remove = vmd_remove,
  962. .shutdown = vmd_shutdown,
  963. .driver = {
  964. .pm = &vmd_dev_pm_ops,
  965. },
  966. };
  967. module_pci_driver(vmd_drv);
  968. MODULE_AUTHOR("Intel Corporation");
  969. MODULE_DESCRIPTION("Volume Management Device driver");
  970. MODULE_LICENSE("GPL v2");
  971. MODULE_VERSION("0.6");