vgic-v3-nested.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. #include <linux/cpu.h>
  3. #include <linux/kvm.h>
  4. #include <linux/kvm_host.h>
  5. #include <linux/interrupt.h>
  6. #include <linux/io.h>
  7. #include <linux/uaccess.h>
  8. #include <kvm/arm_vgic.h>
  9. #include <asm/kvm_arm.h>
  10. #include <asm/kvm_emulate.h>
  11. #include <asm/kvm_nested.h>
  12. #include "vgic.h"
  13. #define ICH_LRN(n) (ICH_LR0_EL2 + (n))
  14. #define ICH_AP0RN(n) (ICH_AP0R0_EL2 + (n))
  15. #define ICH_AP1RN(n) (ICH_AP1R0_EL2 + (n))
  16. struct mi_state {
  17. u16 eisr;
  18. u16 elrsr;
  19. bool pend;
  20. };
  21. /*
  22. * The shadow registers loaded to the hardware when running a L2 guest
  23. * with the virtual IMO/FMO bits set.
  24. */
  25. struct shadow_if {
  26. struct vgic_v3_cpu_if cpuif;
  27. unsigned long lr_map;
  28. };
  29. static DEFINE_PER_CPU(struct shadow_if, shadow_if);
  30. static int lr_map_idx_to_shadow_idx(struct shadow_if *shadow_if, int idx)
  31. {
  32. return hweight16(shadow_if->lr_map & (BIT(idx) - 1));
  33. }
  34. /*
  35. * Nesting GICv3 support
  36. *
  37. * On a non-nesting VM (only running at EL0/EL1), the host hypervisor
  38. * completely controls the interrupts injected via the list registers.
  39. * Consequently, most of the state that is modified by the guest (by ACK-ing
  40. * and EOI-ing interrupts) is synced by KVM on each entry/exit, so that we
  41. * keep a semi-consistent view of the interrupts.
  42. *
  43. * This still applies for a NV guest, but only while "InHost" (either
  44. * running at EL2, or at EL0 with HCR_EL2.{E2H.TGE}=={1,1}.
  45. *
  46. * When running a L2 guest ("not InHost"), things are radically different,
  47. * as the L1 guest is in charge of provisioning the interrupts via its own
  48. * view of the ICH_LR*_EL2 registers, which conveniently live in the VNCR
  49. * page. This means that the flow described above does work (there is no
  50. * state to rebuild in the L0 hypervisor), and that most things happen on L2
  51. * load/put:
  52. *
  53. * - on L2 load: move the in-memory L1 vGIC configuration into a shadow,
  54. * per-CPU data structure that is used to populate the actual LRs. This is
  55. * an extra copy that we could avoid, but life is short. In the process,
  56. * we remap any interrupt that has the HW bit set to the mapped interrupt
  57. * on the host, should the host consider it a HW one. This allows the HW
  58. * deactivation to take its course, such as for the timer.
  59. *
  60. * - on L2 put: perform the inverse transformation, so that the result of L2
  61. * running becomes visible to L1 in the VNCR-accessible registers.
  62. *
  63. * - there is nothing to do on L2 entry apart from enabling the vgic, as
  64. * everything will have happened on load. However, this is the point where
  65. * we detect that an interrupt targeting L1 and prepare the grand
  66. * switcheroo.
  67. *
  68. * - on L2 exit: resync the LRs and VMCR, emulate the HW bit, and deactivate
  69. * corresponding the L1 interrupt. The L0 active state will be cleared by
  70. * the HW if the L1 interrupt was itself backed by a HW interrupt.
  71. *
  72. * Maintenance Interrupt (MI) management:
  73. *
  74. * Since the L2 guest runs the vgic in its full glory, MIs get delivered and
  75. * used as a handover point between L2 and L1.
  76. *
  77. * - on delivery of a MI to L0 while L2 is running: make the L1 MI pending,
  78. * and let it rip. This will initiate a vcpu_put() on L2, and allow L1 to
  79. * run and process the MI.
  80. *
  81. * - L1 MI is a fully virtual interrupt, not linked to the host's MI. Its
  82. * state must be computed at each entry/exit of the guest, much like we do
  83. * it for the PMU interrupt.
  84. *
  85. * - because most of the ICH_*_EL2 registers live in the VNCR page, the
  86. * quality of emulation is poor: L1 can setup the vgic so that an MI would
  87. * immediately fire, and not observe anything until the next exit.
  88. * Similarly, a pending MI is not immediately disabled by clearing
  89. * ICH_HCR_EL2.En. Trying to read ICH_MISR_EL2 would do the trick, for
  90. * example.
  91. *
  92. * System register emulation:
  93. *
  94. * We get two classes of registers:
  95. *
  96. * - those backed by memory (LRs, APRs, HCR, VMCR): L1 can freely access
  97. * them, and L0 doesn't see a thing.
  98. *
  99. * - those that always trap (ELRSR, EISR, MISR): these are status registers
  100. * that are built on the fly based on the in-memory state.
  101. *
  102. * Only L1 can access the ICH_*_EL2 registers. A non-NV L2 obviously cannot,
  103. * and a NV L2 would either access the VNCR page provided by L1 (memory
  104. * based registers), or see the access redirected to L1 (registers that
  105. * trap) thanks to NV being set by L1.
  106. */
  107. bool vgic_state_is_nested(struct kvm_vcpu *vcpu)
  108. {
  109. u64 xmo;
  110. if (is_nested_ctxt(vcpu)) {
  111. xmo = __vcpu_sys_reg(vcpu, HCR_EL2) & (HCR_IMO | HCR_FMO);
  112. WARN_ONCE(xmo && xmo != (HCR_IMO | HCR_FMO),
  113. "Separate virtual IRQ/FIQ settings not supported\n");
  114. return !!xmo;
  115. }
  116. return false;
  117. }
  118. static struct shadow_if *get_shadow_if(void)
  119. {
  120. return this_cpu_ptr(&shadow_if);
  121. }
  122. static bool lr_triggers_eoi(u64 lr)
  123. {
  124. return !(lr & (ICH_LR_STATE | ICH_LR_HW)) && (lr & ICH_LR_EOI);
  125. }
  126. static void vgic_compute_mi_state(struct kvm_vcpu *vcpu, struct mi_state *mi_state)
  127. {
  128. u16 eisr = 0, elrsr = 0;
  129. bool pend = false;
  130. for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
  131. u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
  132. if (lr_triggers_eoi(lr))
  133. eisr |= BIT(i);
  134. if (!(lr & ICH_LR_STATE))
  135. elrsr |= BIT(i);
  136. pend |= (lr & ICH_LR_PENDING_BIT);
  137. }
  138. mi_state->eisr = eisr;
  139. mi_state->elrsr = elrsr;
  140. mi_state->pend = pend;
  141. }
  142. u16 vgic_v3_get_eisr(struct kvm_vcpu *vcpu)
  143. {
  144. struct mi_state mi_state;
  145. vgic_compute_mi_state(vcpu, &mi_state);
  146. return mi_state.eisr;
  147. }
  148. u16 vgic_v3_get_elrsr(struct kvm_vcpu *vcpu)
  149. {
  150. struct mi_state mi_state;
  151. vgic_compute_mi_state(vcpu, &mi_state);
  152. return mi_state.elrsr;
  153. }
  154. u64 vgic_v3_get_misr(struct kvm_vcpu *vcpu)
  155. {
  156. struct mi_state mi_state;
  157. u64 reg = 0, hcr, vmcr;
  158. hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
  159. vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
  160. vgic_compute_mi_state(vcpu, &mi_state);
  161. if (mi_state.eisr)
  162. reg |= ICH_MISR_EL2_EOI;
  163. if (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_UIE) {
  164. int used_lrs = kvm_vgic_global_state.nr_lr;
  165. used_lrs -= hweight16(mi_state.elrsr);
  166. reg |= (used_lrs <= 1) ? ICH_MISR_EL2_U : 0;
  167. }
  168. if ((hcr & ICH_HCR_EL2_LRENPIE) && FIELD_GET(ICH_HCR_EL2_EOIcount_MASK, hcr))
  169. reg |= ICH_MISR_EL2_LRENP;
  170. if ((hcr & ICH_HCR_EL2_NPIE) && !mi_state.pend)
  171. reg |= ICH_MISR_EL2_NP;
  172. if ((hcr & ICH_HCR_EL2_VGrp0EIE) && (vmcr & ICH_VMCR_EL2_VENG0_MASK))
  173. reg |= ICH_MISR_EL2_VGrp0E;
  174. if ((hcr & ICH_HCR_EL2_VGrp0DIE) && !(vmcr & ICH_VMCR_EL2_VENG0_MASK))
  175. reg |= ICH_MISR_EL2_VGrp0D;
  176. if ((hcr & ICH_HCR_EL2_VGrp1EIE) && (vmcr & ICH_VMCR_EL2_VENG1_MASK))
  177. reg |= ICH_MISR_EL2_VGrp1E;
  178. if ((hcr & ICH_HCR_EL2_VGrp1DIE) && !(vmcr & ICH_VMCR_EL2_VENG1_MASK))
  179. reg |= ICH_MISR_EL2_VGrp1D;
  180. return reg;
  181. }
  182. static u64 translate_lr_pintid(struct kvm_vcpu *vcpu, u64 lr)
  183. {
  184. struct vgic_irq *irq;
  185. if (!(lr & ICH_LR_HW))
  186. return lr;
  187. /* We have the HW bit set, check for validity of pINTID */
  188. irq = vgic_get_vcpu_irq(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
  189. /* If there was no real mapping, nuke the HW bit */
  190. if (!irq || !irq->hw || irq->intid > VGIC_MAX_SPI)
  191. lr &= ~ICH_LR_HW;
  192. /* Translate the virtual mapping to the real one, even if invalid */
  193. if (irq) {
  194. lr &= ~ICH_LR_PHYS_ID_MASK;
  195. lr |= FIELD_PREP(ICH_LR_PHYS_ID_MASK, (u64)irq->hwintid);
  196. vgic_put_irq(vcpu->kvm, irq);
  197. }
  198. return lr;
  199. }
  200. /*
  201. * For LRs which have HW bit set such as timer interrupts, we modify them to
  202. * have the host hardware interrupt number instead of the virtual one programmed
  203. * by the guest hypervisor.
  204. */
  205. static void vgic_v3_create_shadow_lr(struct kvm_vcpu *vcpu,
  206. struct vgic_v3_cpu_if *s_cpu_if)
  207. {
  208. struct shadow_if *shadow_if;
  209. shadow_if = container_of(s_cpu_if, struct shadow_if, cpuif);
  210. shadow_if->lr_map = 0;
  211. for (int i = 0; i < kvm_vgic_global_state.nr_lr; i++) {
  212. u64 lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
  213. if (!(lr & ICH_LR_STATE))
  214. continue;
  215. lr = translate_lr_pintid(vcpu, lr);
  216. s_cpu_if->vgic_lr[hweight16(shadow_if->lr_map)] = lr;
  217. shadow_if->lr_map |= BIT(i);
  218. }
  219. s_cpu_if->used_lrs = hweight16(shadow_if->lr_map);
  220. }
  221. void vgic_v3_flush_nested(struct kvm_vcpu *vcpu)
  222. {
  223. u64 val = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
  224. write_sysreg_s(val | vgic_ich_hcr_trap_bits(), SYS_ICH_HCR_EL2);
  225. }
  226. void vgic_v3_sync_nested(struct kvm_vcpu *vcpu)
  227. {
  228. struct shadow_if *shadow_if = get_shadow_if();
  229. int i;
  230. for_each_set_bit(i, &shadow_if->lr_map, kvm_vgic_global_state.nr_lr) {
  231. u64 val, host_lr, lr;
  232. host_lr = __gic_v3_get_lr(lr_map_idx_to_shadow_idx(shadow_if, i));
  233. /* Propagate the new LR state */
  234. lr = __vcpu_sys_reg(vcpu, ICH_LRN(i));
  235. val = lr & ~ICH_LR_STATE;
  236. val |= host_lr & ICH_LR_STATE;
  237. __vcpu_assign_sys_reg(vcpu, ICH_LRN(i), val);
  238. /*
  239. * Deactivation of a HW interrupt: the LR must have the HW
  240. * bit set, have been in a non-invalid state before the run,
  241. * and now be in an invalid state. If any of that doesn't
  242. * hold, we're done with this LR.
  243. */
  244. if (!((lr & ICH_LR_HW) && (lr & ICH_LR_STATE) &&
  245. !(host_lr & ICH_LR_STATE)))
  246. continue;
  247. /*
  248. * If we had a HW lr programmed by the guest hypervisor, we
  249. * need to emulate the HW effect between the guest hypervisor
  250. * and the nested guest.
  251. */
  252. vgic_v3_deactivate(vcpu, FIELD_GET(ICH_LR_PHYS_ID_MASK, lr));
  253. }
  254. /* We need these to be synchronised to generate the MI */
  255. __vcpu_assign_sys_reg(vcpu, ICH_VMCR_EL2, read_sysreg_s(SYS_ICH_VMCR_EL2));
  256. __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, &=, ~ICH_HCR_EL2_EOIcount);
  257. __vcpu_rmw_sys_reg(vcpu, ICH_HCR_EL2, |=, read_sysreg_s(SYS_ICH_HCR_EL2) & ICH_HCR_EL2_EOIcount);
  258. write_sysreg_s(0, SYS_ICH_HCR_EL2);
  259. isb();
  260. vgic_v3_nested_update_mi(vcpu);
  261. }
  262. static void vgic_v3_create_shadow_state(struct kvm_vcpu *vcpu,
  263. struct vgic_v3_cpu_if *s_cpu_if)
  264. {
  265. struct vgic_v3_cpu_if *host_if = &vcpu->arch.vgic_cpu.vgic_v3;
  266. int i;
  267. s_cpu_if->vgic_hcr = __vcpu_sys_reg(vcpu, ICH_HCR_EL2);
  268. s_cpu_if->vgic_vmcr = __vcpu_sys_reg(vcpu, ICH_VMCR_EL2);
  269. s_cpu_if->vgic_sre = host_if->vgic_sre;
  270. for (i = 0; i < 4; i++) {
  271. s_cpu_if->vgic_ap0r[i] = __vcpu_sys_reg(vcpu, ICH_AP0RN(i));
  272. s_cpu_if->vgic_ap1r[i] = __vcpu_sys_reg(vcpu, ICH_AP1RN(i));
  273. }
  274. vgic_v3_create_shadow_lr(vcpu, s_cpu_if);
  275. }
  276. void vgic_v3_load_nested(struct kvm_vcpu *vcpu)
  277. {
  278. struct shadow_if *shadow_if = get_shadow_if();
  279. struct vgic_v3_cpu_if *cpu_if = &shadow_if->cpuif;
  280. BUG_ON(!vgic_state_is_nested(vcpu));
  281. vgic_v3_create_shadow_state(vcpu, cpu_if);
  282. __vgic_v3_restore_vmcr_aprs(cpu_if);
  283. __vgic_v3_activate_traps(cpu_if);
  284. for (int i = 0; i < cpu_if->used_lrs; i++)
  285. __gic_v3_set_lr(cpu_if->vgic_lr[i], i);
  286. /*
  287. * Propagate the number of used LRs for the benefit of the HYP
  288. * GICv3 emulation code. Yes, this is a pretty sorry hack.
  289. */
  290. vcpu->arch.vgic_cpu.vgic_v3.used_lrs = cpu_if->used_lrs;
  291. }
  292. void vgic_v3_put_nested(struct kvm_vcpu *vcpu)
  293. {
  294. struct shadow_if *shadow_if = get_shadow_if();
  295. struct vgic_v3_cpu_if *s_cpu_if = &shadow_if->cpuif;
  296. int i;
  297. __vgic_v3_save_aprs(s_cpu_if);
  298. for (i = 0; i < 4; i++) {
  299. __vcpu_assign_sys_reg(vcpu, ICH_AP0RN(i), s_cpu_if->vgic_ap0r[i]);
  300. __vcpu_assign_sys_reg(vcpu, ICH_AP1RN(i), s_cpu_if->vgic_ap1r[i]);
  301. }
  302. for (i = 0; i < s_cpu_if->used_lrs; i++)
  303. __gic_v3_set_lr(0, i);
  304. __vgic_v3_deactivate_traps(s_cpu_if);
  305. vcpu->arch.vgic_cpu.vgic_v3.used_lrs = 0;
  306. }
  307. /*
  308. * If we exit a L2 VM with a pending maintenance interrupt from the GIC,
  309. * then we need to forward this to L1 so that it can re-sync the appropriate
  310. * LRs and sample level triggered interrupts again.
  311. */
  312. void vgic_v3_handle_nested_maint_irq(struct kvm_vcpu *vcpu)
  313. {
  314. bool state = read_sysreg_s(SYS_ICH_MISR_EL2);
  315. /* This will force a switch back to L1 if the level is high */
  316. kvm_vgic_inject_irq(vcpu->kvm, vcpu,
  317. vcpu->kvm->arch.vgic.mi_intid, state, vcpu);
  318. sysreg_clear_set_s(SYS_ICH_HCR_EL2, ICH_HCR_EL2_En, 0);
  319. }
  320. void vgic_v3_nested_update_mi(struct kvm_vcpu *vcpu)
  321. {
  322. bool level;
  323. level = (__vcpu_sys_reg(vcpu, ICH_HCR_EL2) & ICH_HCR_EL2_En) && vgic_v3_get_misr(vcpu);
  324. kvm_vgic_inject_irq(vcpu->kvm, vcpu,
  325. vcpu->kvm->arch.vgic.mi_intid, level, vcpu);
  326. }