arch_timer.c 43 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (C) 2012 ARM Ltd.
  4. * Author: Marc Zyngier <marc.zyngier@arm.com>
  5. */
  6. #include <linux/cpu.h>
  7. #include <linux/kvm.h>
  8. #include <linux/kvm_host.h>
  9. #include <linux/interrupt.h>
  10. #include <linux/irq.h>
  11. #include <linux/irqdomain.h>
  12. #include <linux/uaccess.h>
  13. #include <clocksource/arm_arch_timer.h>
  14. #include <asm/arch_timer.h>
  15. #include <asm/kvm_emulate.h>
  16. #include <asm/kvm_hyp.h>
  17. #include <asm/kvm_nested.h>
  18. #include <kvm/arm_vgic.h>
  19. #include <kvm/arm_arch_timer.h>
  20. #include "trace.h"
  21. static struct timecounter *timecounter;
  22. static unsigned int host_vtimer_irq;
  23. static unsigned int host_ptimer_irq;
  24. static u32 host_vtimer_irq_flags;
  25. static u32 host_ptimer_irq_flags;
  26. static DEFINE_STATIC_KEY_FALSE(has_gic_active_state);
  27. DEFINE_STATIC_KEY_FALSE(broken_cntvoff_key);
  28. static const u8 default_ppi[] = {
  29. [TIMER_PTIMER] = 30,
  30. [TIMER_VTIMER] = 27,
  31. [TIMER_HPTIMER] = 26,
  32. [TIMER_HVTIMER] = 28,
  33. };
  34. static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx);
  35. static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
  36. struct arch_timer_context *timer_ctx);
  37. static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx);
  38. static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
  39. struct arch_timer_context *timer,
  40. enum kvm_arch_timer_regs treg,
  41. u64 val);
  42. static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
  43. struct arch_timer_context *timer,
  44. enum kvm_arch_timer_regs treg);
  45. static bool kvm_arch_timer_get_input_level(int vintid);
  46. static struct irq_ops arch_timer_irq_ops = {
  47. .get_input_level = kvm_arch_timer_get_input_level,
  48. };
  49. static int nr_timers(struct kvm_vcpu *vcpu)
  50. {
  51. if (!vcpu_has_nv(vcpu))
  52. return NR_KVM_EL0_TIMERS;
  53. return NR_KVM_TIMERS;
  54. }
  55. u32 timer_get_ctl(struct arch_timer_context *ctxt)
  56. {
  57. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
  58. switch(arch_timer_ctx_index(ctxt)) {
  59. case TIMER_VTIMER:
  60. return __vcpu_sys_reg(vcpu, CNTV_CTL_EL0);
  61. case TIMER_PTIMER:
  62. return __vcpu_sys_reg(vcpu, CNTP_CTL_EL0);
  63. case TIMER_HVTIMER:
  64. return __vcpu_sys_reg(vcpu, CNTHV_CTL_EL2);
  65. case TIMER_HPTIMER:
  66. return __vcpu_sys_reg(vcpu, CNTHP_CTL_EL2);
  67. default:
  68. WARN_ON(1);
  69. return 0;
  70. }
  71. }
  72. u64 timer_get_cval(struct arch_timer_context *ctxt)
  73. {
  74. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
  75. switch(arch_timer_ctx_index(ctxt)) {
  76. case TIMER_VTIMER:
  77. return __vcpu_sys_reg(vcpu, CNTV_CVAL_EL0);
  78. case TIMER_PTIMER:
  79. return __vcpu_sys_reg(vcpu, CNTP_CVAL_EL0);
  80. case TIMER_HVTIMER:
  81. return __vcpu_sys_reg(vcpu, CNTHV_CVAL_EL2);
  82. case TIMER_HPTIMER:
  83. return __vcpu_sys_reg(vcpu, CNTHP_CVAL_EL2);
  84. default:
  85. WARN_ON(1);
  86. return 0;
  87. }
  88. }
  89. static void timer_set_ctl(struct arch_timer_context *ctxt, u32 ctl)
  90. {
  91. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
  92. switch(arch_timer_ctx_index(ctxt)) {
  93. case TIMER_VTIMER:
  94. __vcpu_assign_sys_reg(vcpu, CNTV_CTL_EL0, ctl);
  95. break;
  96. case TIMER_PTIMER:
  97. __vcpu_assign_sys_reg(vcpu, CNTP_CTL_EL0, ctl);
  98. break;
  99. case TIMER_HVTIMER:
  100. __vcpu_assign_sys_reg(vcpu, CNTHV_CTL_EL2, ctl);
  101. break;
  102. case TIMER_HPTIMER:
  103. __vcpu_assign_sys_reg(vcpu, CNTHP_CTL_EL2, ctl);
  104. break;
  105. default:
  106. WARN_ON(1);
  107. }
  108. }
  109. static void timer_set_cval(struct arch_timer_context *ctxt, u64 cval)
  110. {
  111. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctxt);
  112. switch(arch_timer_ctx_index(ctxt)) {
  113. case TIMER_VTIMER:
  114. __vcpu_assign_sys_reg(vcpu, CNTV_CVAL_EL0, cval);
  115. break;
  116. case TIMER_PTIMER:
  117. __vcpu_assign_sys_reg(vcpu, CNTP_CVAL_EL0, cval);
  118. break;
  119. case TIMER_HVTIMER:
  120. __vcpu_assign_sys_reg(vcpu, CNTHV_CVAL_EL2, cval);
  121. break;
  122. case TIMER_HPTIMER:
  123. __vcpu_assign_sys_reg(vcpu, CNTHP_CVAL_EL2, cval);
  124. break;
  125. default:
  126. WARN_ON(1);
  127. }
  128. }
  129. u64 kvm_phys_timer_read(void)
  130. {
  131. return timecounter->cc->read(timecounter->cc);
  132. }
  133. void get_timer_map(struct kvm_vcpu *vcpu, struct timer_map *map)
  134. {
  135. if (vcpu_has_nv(vcpu)) {
  136. if (is_hyp_ctxt(vcpu)) {
  137. map->direct_vtimer = vcpu_hvtimer(vcpu);
  138. map->direct_ptimer = vcpu_hptimer(vcpu);
  139. map->emul_vtimer = vcpu_vtimer(vcpu);
  140. map->emul_ptimer = vcpu_ptimer(vcpu);
  141. } else {
  142. map->direct_vtimer = vcpu_vtimer(vcpu);
  143. map->direct_ptimer = vcpu_ptimer(vcpu);
  144. map->emul_vtimer = vcpu_hvtimer(vcpu);
  145. map->emul_ptimer = vcpu_hptimer(vcpu);
  146. }
  147. } else if (has_vhe()) {
  148. map->direct_vtimer = vcpu_vtimer(vcpu);
  149. map->direct_ptimer = vcpu_ptimer(vcpu);
  150. map->emul_vtimer = NULL;
  151. map->emul_ptimer = NULL;
  152. } else {
  153. map->direct_vtimer = vcpu_vtimer(vcpu);
  154. map->direct_ptimer = NULL;
  155. map->emul_vtimer = NULL;
  156. map->emul_ptimer = vcpu_ptimer(vcpu);
  157. }
  158. trace_kvm_get_timer_map(vcpu->vcpu_id, map);
  159. }
  160. static inline bool userspace_irqchip(struct kvm *kvm)
  161. {
  162. return unlikely(!irqchip_in_kernel(kvm));
  163. }
  164. static void soft_timer_start(struct hrtimer *hrt, u64 ns)
  165. {
  166. hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns),
  167. HRTIMER_MODE_ABS_HARD);
  168. }
  169. static void soft_timer_cancel(struct hrtimer *hrt)
  170. {
  171. hrtimer_cancel(hrt);
  172. }
  173. static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id)
  174. {
  175. struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id;
  176. struct arch_timer_context *ctx;
  177. struct timer_map map;
  178. /*
  179. * We may see a timer interrupt after vcpu_put() has been called which
  180. * sets the CPU's vcpu pointer to NULL, because even though the timer
  181. * has been disabled in timer_save_state(), the hardware interrupt
  182. * signal may not have been retired from the interrupt controller yet.
  183. */
  184. if (!vcpu)
  185. return IRQ_HANDLED;
  186. get_timer_map(vcpu, &map);
  187. if (irq == host_vtimer_irq)
  188. ctx = map.direct_vtimer;
  189. else
  190. ctx = map.direct_ptimer;
  191. if (kvm_timer_should_fire(ctx))
  192. kvm_timer_update_irq(vcpu, true, ctx);
  193. if (userspace_irqchip(vcpu->kvm) &&
  194. !static_branch_unlikely(&has_gic_active_state))
  195. disable_percpu_irq(host_vtimer_irq);
  196. return IRQ_HANDLED;
  197. }
  198. static u64 kvm_counter_compute_delta(struct arch_timer_context *timer_ctx,
  199. u64 val)
  200. {
  201. u64 now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
  202. if (now < val) {
  203. u64 ns;
  204. ns = cyclecounter_cyc2ns(timecounter->cc,
  205. val - now,
  206. timecounter->mask,
  207. &timer_ctx->ns_frac);
  208. return ns;
  209. }
  210. return 0;
  211. }
  212. static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx)
  213. {
  214. return kvm_counter_compute_delta(timer_ctx, timer_get_cval(timer_ctx));
  215. }
  216. static bool kvm_timer_irq_can_fire(struct arch_timer_context *timer_ctx)
  217. {
  218. WARN_ON(timer_ctx && timer_ctx->loaded);
  219. return timer_ctx &&
  220. ((timer_get_ctl(timer_ctx) &
  221. (ARCH_TIMER_CTRL_IT_MASK | ARCH_TIMER_CTRL_ENABLE)) == ARCH_TIMER_CTRL_ENABLE);
  222. }
  223. static bool vcpu_has_wfit_active(struct kvm_vcpu *vcpu)
  224. {
  225. return (cpus_have_final_cap(ARM64_HAS_WFXT) &&
  226. vcpu_get_flag(vcpu, IN_WFIT));
  227. }
  228. static u64 wfit_delay_ns(struct kvm_vcpu *vcpu)
  229. {
  230. u64 val = vcpu_get_reg(vcpu, kvm_vcpu_sys_get_rt(vcpu));
  231. struct arch_timer_context *ctx;
  232. ctx = is_hyp_ctxt(vcpu) ? vcpu_hvtimer(vcpu) : vcpu_vtimer(vcpu);
  233. return kvm_counter_compute_delta(ctx, val);
  234. }
  235. /*
  236. * Returns the earliest expiration time in ns among guest timers.
  237. * Note that it will return 0 if none of timers can fire.
  238. */
  239. static u64 kvm_timer_earliest_exp(struct kvm_vcpu *vcpu)
  240. {
  241. u64 min_delta = ULLONG_MAX;
  242. int i;
  243. for (i = 0; i < nr_timers(vcpu); i++) {
  244. struct arch_timer_context *ctx = &vcpu->arch.timer_cpu.timers[i];
  245. WARN(ctx->loaded, "timer %d loaded\n", i);
  246. if (kvm_timer_irq_can_fire(ctx))
  247. min_delta = min(min_delta, kvm_timer_compute_delta(ctx));
  248. }
  249. if (vcpu_has_wfit_active(vcpu))
  250. min_delta = min(min_delta, wfit_delay_ns(vcpu));
  251. /* If none of timers can fire, then return 0 */
  252. if (min_delta == ULLONG_MAX)
  253. return 0;
  254. return min_delta;
  255. }
  256. static enum hrtimer_restart kvm_bg_timer_expire(struct hrtimer *hrt)
  257. {
  258. struct arch_timer_cpu *timer;
  259. struct kvm_vcpu *vcpu;
  260. u64 ns;
  261. timer = container_of(hrt, struct arch_timer_cpu, bg_timer);
  262. vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
  263. /*
  264. * Check that the timer has really expired from the guest's
  265. * PoV (NTP on the host may have forced it to expire
  266. * early). If we should have slept longer, restart it.
  267. */
  268. ns = kvm_timer_earliest_exp(vcpu);
  269. if (unlikely(ns)) {
  270. hrtimer_forward_now(hrt, ns_to_ktime(ns));
  271. return HRTIMER_RESTART;
  272. }
  273. kvm_vcpu_wake_up(vcpu);
  274. return HRTIMER_NORESTART;
  275. }
  276. static enum hrtimer_restart kvm_hrtimer_expire(struct hrtimer *hrt)
  277. {
  278. struct arch_timer_context *ctx;
  279. struct kvm_vcpu *vcpu;
  280. u64 ns;
  281. ctx = container_of(hrt, struct arch_timer_context, hrtimer);
  282. vcpu = timer_context_to_vcpu(ctx);
  283. trace_kvm_timer_hrtimer_expire(ctx);
  284. /*
  285. * Check that the timer has really expired from the guest's
  286. * PoV (NTP on the host may have forced it to expire
  287. * early). If not ready, schedule for a later time.
  288. */
  289. ns = kvm_timer_compute_delta(ctx);
  290. if (unlikely(ns)) {
  291. hrtimer_forward_now(hrt, ns_to_ktime(ns));
  292. return HRTIMER_RESTART;
  293. }
  294. kvm_timer_update_irq(vcpu, true, ctx);
  295. return HRTIMER_NORESTART;
  296. }
  297. static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
  298. {
  299. enum kvm_arch_timers index;
  300. u64 cval, now;
  301. if (!timer_ctx)
  302. return false;
  303. index = arch_timer_ctx_index(timer_ctx);
  304. if (timer_ctx->loaded) {
  305. u32 cnt_ctl = 0;
  306. switch (index) {
  307. case TIMER_VTIMER:
  308. case TIMER_HVTIMER:
  309. cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
  310. break;
  311. case TIMER_PTIMER:
  312. case TIMER_HPTIMER:
  313. cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
  314. break;
  315. case NR_KVM_TIMERS:
  316. /* GCC is braindead */
  317. cnt_ctl = 0;
  318. break;
  319. }
  320. return (cnt_ctl & ARCH_TIMER_CTRL_ENABLE) &&
  321. (cnt_ctl & ARCH_TIMER_CTRL_IT_STAT) &&
  322. !(cnt_ctl & ARCH_TIMER_CTRL_IT_MASK);
  323. }
  324. if (!kvm_timer_irq_can_fire(timer_ctx))
  325. return false;
  326. cval = timer_get_cval(timer_ctx);
  327. now = kvm_phys_timer_read() - timer_get_offset(timer_ctx);
  328. return cval <= now;
  329. }
  330. int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
  331. {
  332. return vcpu_has_wfit_active(vcpu) && wfit_delay_ns(vcpu) == 0;
  333. }
  334. /*
  335. * Reflect the timer output level into the kvm_run structure
  336. */
  337. void kvm_timer_update_run(struct kvm_vcpu *vcpu)
  338. {
  339. struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  340. struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
  341. struct kvm_sync_regs *regs = &vcpu->run->s.regs;
  342. /* Populate the device bitmap with the timer states */
  343. regs->device_irq_level &= ~(KVM_ARM_DEV_EL1_VTIMER |
  344. KVM_ARM_DEV_EL1_PTIMER);
  345. if (kvm_timer_should_fire(vtimer))
  346. regs->device_irq_level |= KVM_ARM_DEV_EL1_VTIMER;
  347. if (kvm_timer_should_fire(ptimer))
  348. regs->device_irq_level |= KVM_ARM_DEV_EL1_PTIMER;
  349. }
  350. static void kvm_timer_update_status(struct arch_timer_context *ctx, bool level)
  351. {
  352. /*
  353. * Paper over NV2 brokenness by publishing the interrupt status
  354. * bit. This still results in a poor quality of emulation (guest
  355. * writes will have no effect until the next exit).
  356. *
  357. * But hey, it's fast, right?
  358. */
  359. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
  360. if (is_hyp_ctxt(vcpu) &&
  361. (ctx == vcpu_vtimer(vcpu) || ctx == vcpu_ptimer(vcpu))) {
  362. unsigned long val = timer_get_ctl(ctx);
  363. __assign_bit(__ffs(ARCH_TIMER_CTRL_IT_STAT), &val, level);
  364. timer_set_ctl(ctx, val);
  365. }
  366. }
  367. static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level,
  368. struct arch_timer_context *timer_ctx)
  369. {
  370. kvm_timer_update_status(timer_ctx, new_level);
  371. timer_ctx->irq.level = new_level;
  372. trace_kvm_timer_update_irq(vcpu->vcpu_id, timer_irq(timer_ctx),
  373. timer_ctx->irq.level);
  374. if (userspace_irqchip(vcpu->kvm))
  375. return;
  376. kvm_vgic_inject_irq(vcpu->kvm, vcpu,
  377. timer_irq(timer_ctx),
  378. timer_ctx->irq.level,
  379. timer_ctx);
  380. }
  381. /* Only called for a fully emulated timer */
  382. static void timer_emulate(struct arch_timer_context *ctx)
  383. {
  384. bool should_fire = kvm_timer_should_fire(ctx);
  385. trace_kvm_timer_emulate(ctx, should_fire);
  386. if (should_fire != ctx->irq.level)
  387. kvm_timer_update_irq(timer_context_to_vcpu(ctx), should_fire, ctx);
  388. kvm_timer_update_status(ctx, should_fire);
  389. /*
  390. * If the timer can fire now, we don't need to have a soft timer
  391. * scheduled for the future. If the timer cannot fire at all,
  392. * then we also don't need a soft timer.
  393. */
  394. if (should_fire || !kvm_timer_irq_can_fire(ctx))
  395. return;
  396. soft_timer_start(&ctx->hrtimer, kvm_timer_compute_delta(ctx));
  397. }
  398. static void set_cntvoff(u64 cntvoff)
  399. {
  400. kvm_call_hyp(__kvm_timer_set_cntvoff, cntvoff);
  401. }
  402. static void set_cntpoff(u64 cntpoff)
  403. {
  404. if (has_cntpoff())
  405. write_sysreg_s(cntpoff, SYS_CNTPOFF_EL2);
  406. }
  407. static void timer_save_state(struct arch_timer_context *ctx)
  408. {
  409. struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
  410. enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
  411. unsigned long flags;
  412. if (!timer->enabled)
  413. return;
  414. local_irq_save(flags);
  415. if (!ctx->loaded)
  416. goto out;
  417. switch (index) {
  418. u64 cval;
  419. case TIMER_VTIMER:
  420. case TIMER_HVTIMER:
  421. timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTV_CTL));
  422. cval = read_sysreg_el0(SYS_CNTV_CVAL);
  423. if (has_broken_cntvoff())
  424. cval -= timer_get_offset(ctx);
  425. timer_set_cval(ctx, cval);
  426. /* Disable the timer */
  427. write_sysreg_el0(0, SYS_CNTV_CTL);
  428. isb();
  429. /*
  430. * The kernel may decide to run userspace after
  431. * calling vcpu_put, so we reset cntvoff to 0 to
  432. * ensure a consistent read between user accesses to
  433. * the virtual counter and kernel access to the
  434. * physical counter of non-VHE case.
  435. *
  436. * For VHE, the virtual counter uses a fixed virtual
  437. * offset of zero, so no need to zero CNTVOFF_EL2
  438. * register, but this is actually useful when switching
  439. * between EL1/vEL2 with NV.
  440. *
  441. * Do it unconditionally, as this is either unavoidable
  442. * or dirt cheap.
  443. */
  444. set_cntvoff(0);
  445. break;
  446. case TIMER_PTIMER:
  447. case TIMER_HPTIMER:
  448. timer_set_ctl(ctx, read_sysreg_el0(SYS_CNTP_CTL));
  449. cval = read_sysreg_el0(SYS_CNTP_CVAL);
  450. cval -= timer_get_offset(ctx);
  451. timer_set_cval(ctx, cval);
  452. /* Disable the timer */
  453. write_sysreg_el0(0, SYS_CNTP_CTL);
  454. isb();
  455. set_cntpoff(0);
  456. break;
  457. case NR_KVM_TIMERS:
  458. BUG();
  459. }
  460. trace_kvm_timer_save_state(ctx);
  461. ctx->loaded = false;
  462. out:
  463. local_irq_restore(flags);
  464. }
  465. /*
  466. * Schedule the background timer before calling kvm_vcpu_halt, so that this
  467. * thread is removed from its waitqueue and made runnable when there's a timer
  468. * interrupt to handle.
  469. */
  470. static void kvm_timer_blocking(struct kvm_vcpu *vcpu)
  471. {
  472. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  473. struct timer_map map;
  474. get_timer_map(vcpu, &map);
  475. /*
  476. * If no timers are capable of raising interrupts (disabled or
  477. * masked), then there's no more work for us to do.
  478. */
  479. if (!kvm_timer_irq_can_fire(map.direct_vtimer) &&
  480. !kvm_timer_irq_can_fire(map.direct_ptimer) &&
  481. !kvm_timer_irq_can_fire(map.emul_vtimer) &&
  482. !kvm_timer_irq_can_fire(map.emul_ptimer) &&
  483. !vcpu_has_wfit_active(vcpu))
  484. return;
  485. /*
  486. * At least one guest time will expire. Schedule a background timer.
  487. * Set the earliest expiration time among the guest timers.
  488. */
  489. soft_timer_start(&timer->bg_timer, kvm_timer_earliest_exp(vcpu));
  490. }
  491. static void kvm_timer_unblocking(struct kvm_vcpu *vcpu)
  492. {
  493. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  494. soft_timer_cancel(&timer->bg_timer);
  495. }
  496. static void timer_restore_state(struct arch_timer_context *ctx)
  497. {
  498. struct arch_timer_cpu *timer = vcpu_timer(timer_context_to_vcpu(ctx));
  499. enum kvm_arch_timers index = arch_timer_ctx_index(ctx);
  500. unsigned long flags;
  501. if (!timer->enabled)
  502. return;
  503. local_irq_save(flags);
  504. if (ctx->loaded)
  505. goto out;
  506. switch (index) {
  507. u64 cval, offset;
  508. case TIMER_VTIMER:
  509. case TIMER_HVTIMER:
  510. cval = timer_get_cval(ctx);
  511. offset = timer_get_offset(ctx);
  512. if (has_broken_cntvoff()) {
  513. set_cntvoff(0);
  514. cval += offset;
  515. } else {
  516. set_cntvoff(offset);
  517. }
  518. write_sysreg_el0(cval, SYS_CNTV_CVAL);
  519. isb();
  520. write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTV_CTL);
  521. break;
  522. case TIMER_PTIMER:
  523. case TIMER_HPTIMER:
  524. cval = timer_get_cval(ctx);
  525. offset = timer_get_offset(ctx);
  526. set_cntpoff(offset);
  527. cval += offset;
  528. write_sysreg_el0(cval, SYS_CNTP_CVAL);
  529. isb();
  530. write_sysreg_el0(timer_get_ctl(ctx), SYS_CNTP_CTL);
  531. break;
  532. case NR_KVM_TIMERS:
  533. BUG();
  534. }
  535. trace_kvm_timer_restore_state(ctx);
  536. ctx->loaded = true;
  537. out:
  538. local_irq_restore(flags);
  539. }
  540. static inline void set_timer_irq_phys_active(struct arch_timer_context *ctx, bool active)
  541. {
  542. int r;
  543. r = irq_set_irqchip_state(ctx->host_timer_irq, IRQCHIP_STATE_ACTIVE, active);
  544. WARN_ON(r);
  545. }
  546. static void kvm_timer_vcpu_load_gic(struct arch_timer_context *ctx)
  547. {
  548. struct kvm_vcpu *vcpu = timer_context_to_vcpu(ctx);
  549. bool phys_active = false;
  550. /*
  551. * Update the timer output so that it is likely to match the
  552. * state we're about to restore. If the timer expires between
  553. * this point and the register restoration, we'll take the
  554. * interrupt anyway.
  555. */
  556. kvm_timer_update_irq(vcpu, kvm_timer_should_fire(ctx), ctx);
  557. if (irqchip_in_kernel(vcpu->kvm))
  558. phys_active = kvm_vgic_map_is_active(vcpu, timer_irq(ctx));
  559. phys_active |= ctx->irq.level;
  560. set_timer_irq_phys_active(ctx, phys_active);
  561. }
  562. static void kvm_timer_vcpu_load_nogic(struct kvm_vcpu *vcpu)
  563. {
  564. struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  565. /*
  566. * Update the timer output so that it is likely to match the
  567. * state we're about to restore. If the timer expires between
  568. * this point and the register restoration, we'll take the
  569. * interrupt anyway.
  570. */
  571. kvm_timer_update_irq(vcpu, kvm_timer_should_fire(vtimer), vtimer);
  572. /*
  573. * When using a userspace irqchip with the architected timers and a
  574. * host interrupt controller that doesn't support an active state, we
  575. * must still prevent continuously exiting from the guest, and
  576. * therefore mask the physical interrupt by disabling it on the host
  577. * interrupt controller when the virtual level is high, such that the
  578. * guest can make forward progress. Once we detect the output level
  579. * being de-asserted, we unmask the interrupt again so that we exit
  580. * from the guest when the timer fires.
  581. */
  582. if (vtimer->irq.level)
  583. disable_percpu_irq(host_vtimer_irq);
  584. else
  585. enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
  586. }
  587. /* If _pred is true, set bit in _set, otherwise set it in _clr */
  588. #define assign_clear_set_bit(_pred, _bit, _clr, _set) \
  589. do { \
  590. if (_pred) \
  591. (_set) |= (_bit); \
  592. else \
  593. (_clr) |= (_bit); \
  594. } while (0)
  595. static void kvm_timer_vcpu_load_nested_switch(struct kvm_vcpu *vcpu,
  596. struct timer_map *map)
  597. {
  598. int hw, ret;
  599. if (!irqchip_in_kernel(vcpu->kvm))
  600. return;
  601. /*
  602. * We only ever unmap the vtimer irq on a VHE system that runs nested
  603. * virtualization, in which case we have both a valid emul_vtimer,
  604. * emul_ptimer, direct_vtimer, and direct_ptimer.
  605. *
  606. * Since this is called from kvm_timer_vcpu_load(), a change between
  607. * vEL2 and vEL1/0 will have just happened, and the timer_map will
  608. * represent this, and therefore we switch the emul/direct mappings
  609. * below.
  610. */
  611. hw = kvm_vgic_get_map(vcpu, timer_irq(map->direct_vtimer));
  612. if (hw < 0) {
  613. kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_vtimer));
  614. kvm_vgic_unmap_phys_irq(vcpu, timer_irq(map->emul_ptimer));
  615. ret = kvm_vgic_map_phys_irq(vcpu,
  616. map->direct_vtimer->host_timer_irq,
  617. timer_irq(map->direct_vtimer),
  618. &arch_timer_irq_ops);
  619. WARN_ON_ONCE(ret);
  620. ret = kvm_vgic_map_phys_irq(vcpu,
  621. map->direct_ptimer->host_timer_irq,
  622. timer_irq(map->direct_ptimer),
  623. &arch_timer_irq_ops);
  624. WARN_ON_ONCE(ret);
  625. }
  626. }
  627. static void timer_set_traps(struct kvm_vcpu *vcpu, struct timer_map *map)
  628. {
  629. bool tvt, tpt, tvc, tpc, tvt02, tpt02;
  630. u64 clr, set;
  631. /*
  632. * No trapping gets configured here with nVHE. See
  633. * __timer_enable_traps(), which is where the stuff happens.
  634. */
  635. if (!has_vhe())
  636. return;
  637. /*
  638. * Our default policy is not to trap anything. As we progress
  639. * within this function, reality kicks in and we start adding
  640. * traps based on emulation requirements.
  641. */
  642. tvt = tpt = tvc = tpc = false;
  643. tvt02 = tpt02 = false;
  644. /*
  645. * NV2 badly breaks the timer semantics by redirecting accesses to
  646. * the EL1 timer state to memory, so let's call ECV to the rescue if
  647. * available: we trap all CNT{P,V}_{CTL,CVAL,TVAL}_EL0 accesses.
  648. *
  649. * The treatment slightly varies depending whether we run a nVHE or
  650. * VHE guest: nVHE will use the _EL0 registers directly, while VHE
  651. * will use the _EL02 accessors. This translates in different trap
  652. * bits.
  653. *
  654. * None of the trapping is required when running in non-HYP context,
  655. * unless required by the L1 hypervisor settings once we advertise
  656. * ECV+NV in the guest, or that we need trapping for other reasons.
  657. */
  658. if (cpus_have_final_cap(ARM64_HAS_ECV) && is_hyp_ctxt(vcpu)) {
  659. if (vcpu_el2_e2h_is_set(vcpu))
  660. tvt02 = tpt02 = true;
  661. else
  662. tvt = tpt = true;
  663. }
  664. /*
  665. * We have two possibility to deal with a physical offset:
  666. *
  667. * - Either we have CNTPOFF (yay!) or the offset is 0:
  668. * we let the guest freely access the HW
  669. *
  670. * - or neither of these condition apply:
  671. * we trap accesses to the HW, but still use it
  672. * after correcting the physical offset
  673. */
  674. if (!has_cntpoff() && timer_get_offset(map->direct_ptimer))
  675. tpt = tpc = true;
  676. /*
  677. * For the poor sods that could not correctly subtract one value
  678. * from another, trap the full virtual timer and counter.
  679. */
  680. if (has_broken_cntvoff() && timer_get_offset(map->direct_vtimer))
  681. tvt = tvc = true;
  682. /*
  683. * Apply the enable bits that the guest hypervisor has requested for
  684. * its own guest. We can only add traps that wouldn't have been set
  685. * above.
  686. * Implementation choices: we do not support NV when E2H=0 in the
  687. * guest, and we don't support configuration where E2H is writable
  688. * by the guest (either FEAT_VHE or FEAT_E2H0 is implemented, but
  689. * not both). This simplifies the handling of the EL1NV* bits.
  690. */
  691. if (is_nested_ctxt(vcpu)) {
  692. u64 val = __vcpu_sys_reg(vcpu, CNTHCTL_EL2);
  693. /* Use the VHE format for mental sanity */
  694. if (!vcpu_el2_e2h_is_set(vcpu))
  695. val = (val & (CNTHCTL_EL1PCEN | CNTHCTL_EL1PCTEN)) << 10;
  696. tpt |= !(val & (CNTHCTL_EL1PCEN << 10));
  697. tpc |= !(val & (CNTHCTL_EL1PCTEN << 10));
  698. tpt02 |= (val & CNTHCTL_EL1NVPCT);
  699. tvt02 |= (val & CNTHCTL_EL1NVVCT);
  700. }
  701. /*
  702. * Now that we have collected our requirements, compute the
  703. * trap and enable bits.
  704. */
  705. set = 0;
  706. clr = 0;
  707. assign_clear_set_bit(tpt, CNTHCTL_EL1PCEN << 10, set, clr);
  708. assign_clear_set_bit(tpc, CNTHCTL_EL1PCTEN << 10, set, clr);
  709. assign_clear_set_bit(tvt, CNTHCTL_EL1TVT, clr, set);
  710. assign_clear_set_bit(tvc, CNTHCTL_EL1TVCT, clr, set);
  711. assign_clear_set_bit(tvt02, CNTHCTL_EL1NVVCT, clr, set);
  712. assign_clear_set_bit(tpt02, CNTHCTL_EL1NVPCT, clr, set);
  713. /* This only happens on VHE, so use the CNTHCTL_EL2 accessor. */
  714. sysreg_clear_set(cnthctl_el2, clr, set);
  715. }
  716. void kvm_timer_vcpu_load(struct kvm_vcpu *vcpu)
  717. {
  718. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  719. struct timer_map map;
  720. if (unlikely(!timer->enabled))
  721. return;
  722. get_timer_map(vcpu, &map);
  723. if (static_branch_likely(&has_gic_active_state)) {
  724. if (vcpu_has_nv(vcpu))
  725. kvm_timer_vcpu_load_nested_switch(vcpu, &map);
  726. kvm_timer_vcpu_load_gic(map.direct_vtimer);
  727. if (map.direct_ptimer)
  728. kvm_timer_vcpu_load_gic(map.direct_ptimer);
  729. } else {
  730. kvm_timer_vcpu_load_nogic(vcpu);
  731. }
  732. kvm_timer_unblocking(vcpu);
  733. timer_restore_state(map.direct_vtimer);
  734. if (map.direct_ptimer)
  735. timer_restore_state(map.direct_ptimer);
  736. if (map.emul_vtimer)
  737. timer_emulate(map.emul_vtimer);
  738. if (map.emul_ptimer)
  739. timer_emulate(map.emul_ptimer);
  740. timer_set_traps(vcpu, &map);
  741. }
  742. bool kvm_timer_should_notify_user(struct kvm_vcpu *vcpu)
  743. {
  744. struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  745. struct arch_timer_context *ptimer = vcpu_ptimer(vcpu);
  746. struct kvm_sync_regs *sregs = &vcpu->run->s.regs;
  747. bool vlevel, plevel;
  748. if (likely(irqchip_in_kernel(vcpu->kvm)))
  749. return false;
  750. vlevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_VTIMER;
  751. plevel = sregs->device_irq_level & KVM_ARM_DEV_EL1_PTIMER;
  752. return kvm_timer_should_fire(vtimer) != vlevel ||
  753. kvm_timer_should_fire(ptimer) != plevel;
  754. }
  755. void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu)
  756. {
  757. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  758. struct timer_map map;
  759. if (unlikely(!timer->enabled))
  760. return;
  761. get_timer_map(vcpu, &map);
  762. timer_save_state(map.direct_vtimer);
  763. if (map.direct_ptimer)
  764. timer_save_state(map.direct_ptimer);
  765. /*
  766. * Cancel soft timer emulation, because the only case where we
  767. * need it after a vcpu_put is in the context of a sleeping VCPU, and
  768. * in that case we already factor in the deadline for the physical
  769. * timer when scheduling the bg_timer.
  770. *
  771. * In any case, we re-schedule the hrtimer for the physical timer when
  772. * coming back to the VCPU thread in kvm_timer_vcpu_load().
  773. */
  774. if (map.emul_vtimer)
  775. soft_timer_cancel(&map.emul_vtimer->hrtimer);
  776. if (map.emul_ptimer)
  777. soft_timer_cancel(&map.emul_ptimer->hrtimer);
  778. if (kvm_vcpu_is_blocking(vcpu))
  779. kvm_timer_blocking(vcpu);
  780. }
  781. void kvm_timer_sync_nested(struct kvm_vcpu *vcpu)
  782. {
  783. /*
  784. * When NV2 is on, guest hypervisors have their EL1 timer register
  785. * accesses redirected to the VNCR page. Any guest action taken on
  786. * the timer is postponed until the next exit, leading to a very
  787. * poor quality of emulation.
  788. *
  789. * This is an unmitigated disaster, only papered over by FEAT_ECV,
  790. * which allows trapping of the timer registers even with NV2.
  791. * Still, this is still worse than FEAT_NV on its own. Meh.
  792. */
  793. if (!cpus_have_final_cap(ARM64_HAS_ECV)) {
  794. /*
  795. * For a VHE guest hypervisor, the EL2 state is directly
  796. * stored in the host EL1 timers, while the emulated EL1
  797. * state is stored in the VNCR page. The latter could have
  798. * been updated behind our back, and we must reset the
  799. * emulation of the timers.
  800. *
  801. * A non-VHE guest hypervisor doesn't have any direct access
  802. * to its timers: the EL2 registers trap despite being
  803. * notionally direct (we use the EL1 HW, as for VHE), while
  804. * the EL1 registers access memory.
  805. *
  806. * In both cases, process the emulated timers on each guest
  807. * exit. Boo.
  808. */
  809. struct timer_map map;
  810. get_timer_map(vcpu, &map);
  811. soft_timer_cancel(&map.emul_vtimer->hrtimer);
  812. soft_timer_cancel(&map.emul_ptimer->hrtimer);
  813. timer_emulate(map.emul_vtimer);
  814. timer_emulate(map.emul_ptimer);
  815. }
  816. }
  817. /*
  818. * With a userspace irqchip we have to check if the guest de-asserted the
  819. * timer and if so, unmask the timer irq signal on the host interrupt
  820. * controller to ensure that we see future timer signals.
  821. */
  822. static void unmask_vtimer_irq_user(struct kvm_vcpu *vcpu)
  823. {
  824. struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
  825. if (!kvm_timer_should_fire(vtimer)) {
  826. kvm_timer_update_irq(vcpu, false, vtimer);
  827. if (static_branch_likely(&has_gic_active_state))
  828. set_timer_irq_phys_active(vtimer, false);
  829. else
  830. enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
  831. }
  832. }
  833. void kvm_timer_sync_user(struct kvm_vcpu *vcpu)
  834. {
  835. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  836. if (unlikely(!timer->enabled))
  837. return;
  838. if (unlikely(!irqchip_in_kernel(vcpu->kvm)))
  839. unmask_vtimer_irq_user(vcpu);
  840. }
  841. void kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu)
  842. {
  843. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  844. struct timer_map map;
  845. get_timer_map(vcpu, &map);
  846. /*
  847. * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
  848. * and to 0 for ARMv7. We provide an implementation that always
  849. * resets the timer to be disabled and unmasked and is compliant with
  850. * the ARMv7 architecture.
  851. */
  852. for (int i = 0; i < nr_timers(vcpu); i++)
  853. timer_set_ctl(vcpu_get_timer(vcpu, i), 0);
  854. /*
  855. * A vcpu running at EL2 is in charge of the offset applied to
  856. * the virtual timer, so use the physical VM offset, and point
  857. * the vcpu offset to CNTVOFF_EL2.
  858. */
  859. if (vcpu_has_nv(vcpu)) {
  860. struct arch_timer_offset *offs = &vcpu_vtimer(vcpu)->offset;
  861. offs->vcpu_offset = __ctxt_sys_reg(&vcpu->arch.ctxt, CNTVOFF_EL2);
  862. offs->vm_offset = &vcpu->kvm->arch.timer_data.poffset;
  863. }
  864. if (timer->enabled) {
  865. for (int i = 0; i < nr_timers(vcpu); i++)
  866. kvm_timer_update_irq(vcpu, false,
  867. vcpu_get_timer(vcpu, i));
  868. if (irqchip_in_kernel(vcpu->kvm)) {
  869. kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_vtimer));
  870. if (map.direct_ptimer)
  871. kvm_vgic_reset_mapped_irq(vcpu, timer_irq(map.direct_ptimer));
  872. }
  873. }
  874. if (map.emul_vtimer)
  875. soft_timer_cancel(&map.emul_vtimer->hrtimer);
  876. if (map.emul_ptimer)
  877. soft_timer_cancel(&map.emul_ptimer->hrtimer);
  878. }
  879. static void timer_context_init(struct kvm_vcpu *vcpu, int timerid)
  880. {
  881. struct arch_timer_context *ctxt = vcpu_get_timer(vcpu, timerid);
  882. struct kvm *kvm = vcpu->kvm;
  883. ctxt->timer_id = timerid;
  884. if (!kvm_vm_is_protected(vcpu->kvm)) {
  885. if (timerid == TIMER_VTIMER)
  886. ctxt->offset.vm_offset = &kvm->arch.timer_data.voffset;
  887. else
  888. ctxt->offset.vm_offset = &kvm->arch.timer_data.poffset;
  889. } else {
  890. ctxt->offset.vm_offset = NULL;
  891. }
  892. hrtimer_setup(&ctxt->hrtimer, kvm_hrtimer_expire, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD);
  893. switch (timerid) {
  894. case TIMER_PTIMER:
  895. case TIMER_HPTIMER:
  896. ctxt->host_timer_irq = host_ptimer_irq;
  897. break;
  898. case TIMER_VTIMER:
  899. case TIMER_HVTIMER:
  900. ctxt->host_timer_irq = host_vtimer_irq;
  901. break;
  902. }
  903. }
  904. void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu)
  905. {
  906. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  907. for (int i = 0; i < NR_KVM_TIMERS; i++)
  908. timer_context_init(vcpu, i);
  909. /* Synchronize offsets across timers of a VM if not already provided */
  910. if (!vcpu_is_protected(vcpu) &&
  911. !test_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &vcpu->kvm->arch.flags)) {
  912. timer_set_offset(vcpu_vtimer(vcpu), kvm_phys_timer_read());
  913. timer_set_offset(vcpu_ptimer(vcpu), 0);
  914. }
  915. hrtimer_setup(&timer->bg_timer, kvm_bg_timer_expire, CLOCK_MONOTONIC,
  916. HRTIMER_MODE_ABS_HARD);
  917. }
  918. void kvm_timer_init_vm(struct kvm *kvm)
  919. {
  920. for (int i = 0; i < NR_KVM_TIMERS; i++)
  921. kvm->arch.timer_data.ppi[i] = default_ppi[i];
  922. }
  923. void kvm_timer_cpu_up(void)
  924. {
  925. enable_percpu_irq(host_vtimer_irq, host_vtimer_irq_flags);
  926. if (host_ptimer_irq)
  927. enable_percpu_irq(host_ptimer_irq, host_ptimer_irq_flags);
  928. }
  929. void kvm_timer_cpu_down(void)
  930. {
  931. disable_percpu_irq(host_vtimer_irq);
  932. if (host_ptimer_irq)
  933. disable_percpu_irq(host_ptimer_irq);
  934. }
  935. static u64 read_timer_ctl(struct arch_timer_context *timer)
  936. {
  937. /*
  938. * Set ISTATUS bit if it's expired.
  939. * Note that according to ARMv8 ARM Issue A.k, ISTATUS bit is
  940. * UNKNOWN when ENABLE bit is 0, so we chose to set ISTATUS bit
  941. * regardless of ENABLE bit for our implementation convenience.
  942. */
  943. u32 ctl = timer_get_ctl(timer);
  944. if (!kvm_timer_compute_delta(timer))
  945. ctl |= ARCH_TIMER_CTRL_IT_STAT;
  946. return ctl;
  947. }
  948. static u64 kvm_arm_timer_read(struct kvm_vcpu *vcpu,
  949. struct arch_timer_context *timer,
  950. enum kvm_arch_timer_regs treg)
  951. {
  952. u64 val;
  953. switch (treg) {
  954. case TIMER_REG_TVAL:
  955. val = timer_get_cval(timer) - kvm_phys_timer_read() + timer_get_offset(timer);
  956. val = lower_32_bits(val);
  957. break;
  958. case TIMER_REG_CTL:
  959. val = read_timer_ctl(timer);
  960. break;
  961. case TIMER_REG_CVAL:
  962. val = timer_get_cval(timer);
  963. break;
  964. case TIMER_REG_CNT:
  965. val = kvm_phys_timer_read() - timer_get_offset(timer);
  966. break;
  967. case TIMER_REG_VOFF:
  968. val = *timer->offset.vcpu_offset;
  969. break;
  970. default:
  971. BUG();
  972. }
  973. return val;
  974. }
  975. u64 kvm_arm_timer_read_sysreg(struct kvm_vcpu *vcpu,
  976. enum kvm_arch_timers tmr,
  977. enum kvm_arch_timer_regs treg)
  978. {
  979. struct arch_timer_context *timer;
  980. struct timer_map map;
  981. u64 val;
  982. get_timer_map(vcpu, &map);
  983. timer = vcpu_get_timer(vcpu, tmr);
  984. if (timer == map.emul_vtimer || timer == map.emul_ptimer)
  985. return kvm_arm_timer_read(vcpu, timer, treg);
  986. preempt_disable();
  987. timer_save_state(timer);
  988. val = kvm_arm_timer_read(vcpu, timer, treg);
  989. timer_restore_state(timer);
  990. preempt_enable();
  991. return val;
  992. }
  993. static void kvm_arm_timer_write(struct kvm_vcpu *vcpu,
  994. struct arch_timer_context *timer,
  995. enum kvm_arch_timer_regs treg,
  996. u64 val)
  997. {
  998. switch (treg) {
  999. case TIMER_REG_TVAL:
  1000. timer_set_cval(timer, kvm_phys_timer_read() - timer_get_offset(timer) + (s32)val);
  1001. break;
  1002. case TIMER_REG_CTL:
  1003. timer_set_ctl(timer, val & ~ARCH_TIMER_CTRL_IT_STAT);
  1004. break;
  1005. case TIMER_REG_CVAL:
  1006. timer_set_cval(timer, val);
  1007. break;
  1008. case TIMER_REG_VOFF:
  1009. *timer->offset.vcpu_offset = val;
  1010. break;
  1011. default:
  1012. BUG();
  1013. }
  1014. }
  1015. void kvm_arm_timer_write_sysreg(struct kvm_vcpu *vcpu,
  1016. enum kvm_arch_timers tmr,
  1017. enum kvm_arch_timer_regs treg,
  1018. u64 val)
  1019. {
  1020. struct arch_timer_context *timer;
  1021. struct timer_map map;
  1022. get_timer_map(vcpu, &map);
  1023. timer = vcpu_get_timer(vcpu, tmr);
  1024. if (timer == map.emul_vtimer || timer == map.emul_ptimer) {
  1025. soft_timer_cancel(&timer->hrtimer);
  1026. kvm_arm_timer_write(vcpu, timer, treg, val);
  1027. timer_emulate(timer);
  1028. } else {
  1029. preempt_disable();
  1030. timer_save_state(timer);
  1031. kvm_arm_timer_write(vcpu, timer, treg, val);
  1032. timer_restore_state(timer);
  1033. preempt_enable();
  1034. }
  1035. }
  1036. static int timer_irq_set_vcpu_affinity(struct irq_data *d, void *vcpu)
  1037. {
  1038. if (vcpu)
  1039. irqd_set_forwarded_to_vcpu(d);
  1040. else
  1041. irqd_clr_forwarded_to_vcpu(d);
  1042. return 0;
  1043. }
  1044. static int timer_irq_set_irqchip_state(struct irq_data *d,
  1045. enum irqchip_irq_state which, bool val)
  1046. {
  1047. if (which != IRQCHIP_STATE_ACTIVE || !irqd_is_forwarded_to_vcpu(d))
  1048. return irq_chip_set_parent_state(d, which, val);
  1049. if (val)
  1050. irq_chip_mask_parent(d);
  1051. else
  1052. irq_chip_unmask_parent(d);
  1053. return 0;
  1054. }
  1055. static void timer_irq_eoi(struct irq_data *d)
  1056. {
  1057. if (!irqd_is_forwarded_to_vcpu(d))
  1058. irq_chip_eoi_parent(d);
  1059. }
  1060. static void timer_irq_ack(struct irq_data *d)
  1061. {
  1062. d = d->parent_data;
  1063. if (d->chip->irq_ack)
  1064. d->chip->irq_ack(d);
  1065. }
  1066. static struct irq_chip timer_chip = {
  1067. .name = "KVM",
  1068. .irq_ack = timer_irq_ack,
  1069. .irq_mask = irq_chip_mask_parent,
  1070. .irq_unmask = irq_chip_unmask_parent,
  1071. .irq_eoi = timer_irq_eoi,
  1072. .irq_set_type = irq_chip_set_type_parent,
  1073. .irq_set_vcpu_affinity = timer_irq_set_vcpu_affinity,
  1074. .irq_set_irqchip_state = timer_irq_set_irqchip_state,
  1075. };
  1076. static int timer_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
  1077. unsigned int nr_irqs, void *arg)
  1078. {
  1079. irq_hw_number_t hwirq = (uintptr_t)arg;
  1080. return irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
  1081. &timer_chip, NULL);
  1082. }
  1083. static void timer_irq_domain_free(struct irq_domain *domain, unsigned int virq,
  1084. unsigned int nr_irqs)
  1085. {
  1086. }
  1087. static const struct irq_domain_ops timer_domain_ops = {
  1088. .alloc = timer_irq_domain_alloc,
  1089. .free = timer_irq_domain_free,
  1090. };
  1091. static void kvm_irq_fixup_flags(unsigned int virq, u32 *flags)
  1092. {
  1093. *flags = irq_get_trigger_type(virq);
  1094. if (*flags != IRQF_TRIGGER_HIGH && *flags != IRQF_TRIGGER_LOW) {
  1095. kvm_err("Invalid trigger for timer IRQ%d, assuming level low\n",
  1096. virq);
  1097. *flags = IRQF_TRIGGER_LOW;
  1098. }
  1099. }
  1100. static int kvm_irq_init(struct arch_timer_kvm_info *info)
  1101. {
  1102. struct irq_domain *domain = NULL;
  1103. if (info->virtual_irq <= 0) {
  1104. kvm_err("kvm_arch_timer: invalid virtual timer IRQ: %d\n",
  1105. info->virtual_irq);
  1106. return -ENODEV;
  1107. }
  1108. host_vtimer_irq = info->virtual_irq;
  1109. kvm_irq_fixup_flags(host_vtimer_irq, &host_vtimer_irq_flags);
  1110. if (kvm_vgic_global_state.no_hw_deactivation) {
  1111. struct fwnode_handle *fwnode;
  1112. struct irq_data *data;
  1113. fwnode = irq_domain_alloc_named_fwnode("kvm-timer");
  1114. if (!fwnode)
  1115. return -ENOMEM;
  1116. /* Assume both vtimer and ptimer in the same parent */
  1117. data = irq_get_irq_data(host_vtimer_irq);
  1118. domain = irq_domain_create_hierarchy(data->domain, 0,
  1119. NR_KVM_TIMERS, fwnode,
  1120. &timer_domain_ops, NULL);
  1121. if (!domain) {
  1122. irq_domain_free_fwnode(fwnode);
  1123. return -ENOMEM;
  1124. }
  1125. arch_timer_irq_ops.flags |= VGIC_IRQ_SW_RESAMPLE;
  1126. WARN_ON(irq_domain_push_irq(domain, host_vtimer_irq,
  1127. (void *)TIMER_VTIMER));
  1128. }
  1129. if (info->physical_irq > 0) {
  1130. host_ptimer_irq = info->physical_irq;
  1131. kvm_irq_fixup_flags(host_ptimer_irq, &host_ptimer_irq_flags);
  1132. if (domain)
  1133. WARN_ON(irq_domain_push_irq(domain, host_ptimer_irq,
  1134. (void *)TIMER_PTIMER));
  1135. }
  1136. return 0;
  1137. }
  1138. static void kvm_timer_handle_errata(void)
  1139. {
  1140. u64 mmfr0, mmfr1, mmfr4;
  1141. /*
  1142. * CNTVOFF_EL2 is broken on some implementations. For those, we trap
  1143. * all virtual timer/counter accesses, requiring FEAT_ECV.
  1144. *
  1145. * However, a hypervisor supporting nesting is likely to mitigate the
  1146. * erratum at L0, and not require other levels to mitigate it (which
  1147. * would otherwise be a terrible performance sink due to trap
  1148. * amplification).
  1149. *
  1150. * Given that the affected HW implements both FEAT_VHE and FEAT_E2H0,
  1151. * and that NV is likely not to (because of limitations of the
  1152. * architecture), only enable the workaround when FEAT_VHE and
  1153. * FEAT_E2H0 are both detected. Time will tell if this actually holds.
  1154. */
  1155. mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
  1156. mmfr1 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1);
  1157. mmfr4 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR4_EL1);
  1158. if (SYS_FIELD_GET(ID_AA64MMFR1_EL1, VH, mmfr1) &&
  1159. !SYS_FIELD_GET(ID_AA64MMFR4_EL1, E2H0, mmfr4) &&
  1160. SYS_FIELD_GET(ID_AA64MMFR0_EL1, ECV, mmfr0) &&
  1161. (has_vhe() || has_hvhe()) &&
  1162. cpus_have_final_cap(ARM64_WORKAROUND_QCOM_ORYON_CNTVOFF)) {
  1163. static_branch_enable(&broken_cntvoff_key);
  1164. kvm_info("Broken CNTVOFF_EL2, trapping virtual timer\n");
  1165. }
  1166. }
  1167. int __init kvm_timer_hyp_init(bool has_gic)
  1168. {
  1169. struct arch_timer_kvm_info *info;
  1170. int err;
  1171. info = arch_timer_get_kvm_info();
  1172. timecounter = &info->timecounter;
  1173. if (!timecounter->cc) {
  1174. kvm_err("kvm_arch_timer: uninitialized timecounter\n");
  1175. return -ENODEV;
  1176. }
  1177. err = kvm_irq_init(info);
  1178. if (err)
  1179. return err;
  1180. /* First, do the virtual EL1 timer irq */
  1181. err = request_percpu_irq(host_vtimer_irq, kvm_arch_timer_handler,
  1182. "kvm guest vtimer", kvm_get_running_vcpus());
  1183. if (err) {
  1184. kvm_err("kvm_arch_timer: can't request vtimer interrupt %d (%d)\n",
  1185. host_vtimer_irq, err);
  1186. return err;
  1187. }
  1188. if (has_gic) {
  1189. err = irq_set_vcpu_affinity(host_vtimer_irq,
  1190. kvm_get_running_vcpus());
  1191. if (err) {
  1192. kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
  1193. goto out_free_vtimer_irq;
  1194. }
  1195. static_branch_enable(&has_gic_active_state);
  1196. }
  1197. kvm_debug("virtual timer IRQ%d\n", host_vtimer_irq);
  1198. /* Now let's do the physical EL1 timer irq */
  1199. if (info->physical_irq > 0) {
  1200. err = request_percpu_irq(host_ptimer_irq, kvm_arch_timer_handler,
  1201. "kvm guest ptimer", kvm_get_running_vcpus());
  1202. if (err) {
  1203. kvm_err("kvm_arch_timer: can't request ptimer interrupt %d (%d)\n",
  1204. host_ptimer_irq, err);
  1205. goto out_free_vtimer_irq;
  1206. }
  1207. if (has_gic) {
  1208. err = irq_set_vcpu_affinity(host_ptimer_irq,
  1209. kvm_get_running_vcpus());
  1210. if (err) {
  1211. kvm_err("kvm_arch_timer: error setting vcpu affinity\n");
  1212. goto out_free_ptimer_irq;
  1213. }
  1214. }
  1215. kvm_debug("physical timer IRQ%d\n", host_ptimer_irq);
  1216. } else if (has_vhe()) {
  1217. kvm_err("kvm_arch_timer: invalid physical timer IRQ: %d\n",
  1218. info->physical_irq);
  1219. err = -ENODEV;
  1220. goto out_free_vtimer_irq;
  1221. }
  1222. kvm_timer_handle_errata();
  1223. return 0;
  1224. out_free_ptimer_irq:
  1225. if (info->physical_irq > 0)
  1226. free_percpu_irq(host_ptimer_irq, kvm_get_running_vcpus());
  1227. out_free_vtimer_irq:
  1228. free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus());
  1229. return err;
  1230. }
  1231. void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu)
  1232. {
  1233. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  1234. soft_timer_cancel(&timer->bg_timer);
  1235. }
  1236. static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu)
  1237. {
  1238. u32 ppis = 0;
  1239. bool valid;
  1240. mutex_lock(&vcpu->kvm->arch.config_lock);
  1241. for (int i = 0; i < nr_timers(vcpu); i++) {
  1242. struct arch_timer_context *ctx;
  1243. int irq;
  1244. ctx = vcpu_get_timer(vcpu, i);
  1245. irq = timer_irq(ctx);
  1246. if (kvm_vgic_set_owner(vcpu, irq, ctx))
  1247. break;
  1248. /*
  1249. * We know by construction that we only have PPIs, so
  1250. * all values are less than 32.
  1251. */
  1252. ppis |= BIT(irq);
  1253. }
  1254. valid = hweight32(ppis) == nr_timers(vcpu);
  1255. if (valid)
  1256. set_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE, &vcpu->kvm->arch.flags);
  1257. mutex_unlock(&vcpu->kvm->arch.config_lock);
  1258. return valid;
  1259. }
  1260. static bool kvm_arch_timer_get_input_level(int vintid)
  1261. {
  1262. struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
  1263. if (WARN(!vcpu, "No vcpu context!\n"))
  1264. return false;
  1265. for (int i = 0; i < nr_timers(vcpu); i++) {
  1266. struct arch_timer_context *ctx;
  1267. ctx = vcpu_get_timer(vcpu, i);
  1268. if (timer_irq(ctx) == vintid)
  1269. return kvm_timer_should_fire(ctx);
  1270. }
  1271. /* A timer IRQ has fired, but no matching timer was found? */
  1272. WARN_RATELIMIT(1, "timer INTID%d unknown\n", vintid);
  1273. return false;
  1274. }
  1275. int kvm_timer_enable(struct kvm_vcpu *vcpu)
  1276. {
  1277. struct arch_timer_cpu *timer = vcpu_timer(vcpu);
  1278. struct timer_map map;
  1279. int ret;
  1280. if (timer->enabled)
  1281. return 0;
  1282. /* Without a VGIC we do not map virtual IRQs to physical IRQs */
  1283. if (!irqchip_in_kernel(vcpu->kvm))
  1284. goto no_vgic;
  1285. /*
  1286. * At this stage, we have the guarantee that the vgic is both
  1287. * available and initialized.
  1288. */
  1289. if (!timer_irqs_are_valid(vcpu)) {
  1290. kvm_debug("incorrectly configured timer irqs\n");
  1291. return -EINVAL;
  1292. }
  1293. get_timer_map(vcpu, &map);
  1294. ret = kvm_vgic_map_phys_irq(vcpu,
  1295. map.direct_vtimer->host_timer_irq,
  1296. timer_irq(map.direct_vtimer),
  1297. &arch_timer_irq_ops);
  1298. if (ret)
  1299. return ret;
  1300. if (map.direct_ptimer) {
  1301. ret = kvm_vgic_map_phys_irq(vcpu,
  1302. map.direct_ptimer->host_timer_irq,
  1303. timer_irq(map.direct_ptimer),
  1304. &arch_timer_irq_ops);
  1305. }
  1306. if (ret)
  1307. return ret;
  1308. no_vgic:
  1309. timer->enabled = 1;
  1310. return 0;
  1311. }
  1312. /* If we have CNTPOFF, permanently set ECV to enable it */
  1313. void kvm_timer_init_vhe(void)
  1314. {
  1315. if (cpus_have_final_cap(ARM64_HAS_ECV_CNTPOFF))
  1316. sysreg_clear_set(cnthctl_el2, 0, CNTHCTL_ECV);
  1317. }
  1318. int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
  1319. {
  1320. int __user *uaddr = (int __user *)(long)attr->addr;
  1321. int irq, idx, ret = 0;
  1322. if (!irqchip_in_kernel(vcpu->kvm))
  1323. return -EINVAL;
  1324. if (get_user(irq, uaddr))
  1325. return -EFAULT;
  1326. if (!(irq_is_ppi(irq)))
  1327. return -EINVAL;
  1328. mutex_lock(&vcpu->kvm->arch.config_lock);
  1329. if (test_bit(KVM_ARCH_FLAG_TIMER_PPIS_IMMUTABLE,
  1330. &vcpu->kvm->arch.flags)) {
  1331. ret = -EBUSY;
  1332. goto out;
  1333. }
  1334. switch (attr->attr) {
  1335. case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
  1336. idx = TIMER_VTIMER;
  1337. break;
  1338. case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
  1339. idx = TIMER_PTIMER;
  1340. break;
  1341. case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
  1342. idx = TIMER_HVTIMER;
  1343. break;
  1344. case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
  1345. idx = TIMER_HPTIMER;
  1346. break;
  1347. default:
  1348. ret = -ENXIO;
  1349. goto out;
  1350. }
  1351. /*
  1352. * We cannot validate the IRQ unicity before we run, so take it at
  1353. * face value. The verdict will be given on first vcpu run, for each
  1354. * vcpu. Yes this is late. Blame it on the stupid API.
  1355. */
  1356. vcpu->kvm->arch.timer_data.ppi[idx] = irq;
  1357. out:
  1358. mutex_unlock(&vcpu->kvm->arch.config_lock);
  1359. return ret;
  1360. }
  1361. int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
  1362. {
  1363. int __user *uaddr = (int __user *)(long)attr->addr;
  1364. struct arch_timer_context *timer;
  1365. int irq;
  1366. switch (attr->attr) {
  1367. case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
  1368. timer = vcpu_vtimer(vcpu);
  1369. break;
  1370. case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
  1371. timer = vcpu_ptimer(vcpu);
  1372. break;
  1373. case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
  1374. timer = vcpu_hvtimer(vcpu);
  1375. break;
  1376. case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
  1377. timer = vcpu_hptimer(vcpu);
  1378. break;
  1379. default:
  1380. return -ENXIO;
  1381. }
  1382. irq = timer_irq(timer);
  1383. return put_user(irq, uaddr);
  1384. }
  1385. int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr)
  1386. {
  1387. switch (attr->attr) {
  1388. case KVM_ARM_VCPU_TIMER_IRQ_VTIMER:
  1389. case KVM_ARM_VCPU_TIMER_IRQ_PTIMER:
  1390. case KVM_ARM_VCPU_TIMER_IRQ_HVTIMER:
  1391. case KVM_ARM_VCPU_TIMER_IRQ_HPTIMER:
  1392. return 0;
  1393. }
  1394. return -ENXIO;
  1395. }
  1396. int kvm_vm_ioctl_set_counter_offset(struct kvm *kvm,
  1397. struct kvm_arm_counter_offset *offset)
  1398. {
  1399. int ret = 0;
  1400. if (offset->reserved)
  1401. return -EINVAL;
  1402. if (kvm_vm_is_protected(kvm))
  1403. return -EINVAL;
  1404. mutex_lock(&kvm->lock);
  1405. if (!kvm_trylock_all_vcpus(kvm)) {
  1406. set_bit(KVM_ARCH_FLAG_VM_COUNTER_OFFSET, &kvm->arch.flags);
  1407. /*
  1408. * If userspace decides to set the offset using this
  1409. * API rather than merely restoring the counter
  1410. * values, the offset applies to both the virtual and
  1411. * physical views.
  1412. */
  1413. kvm->arch.timer_data.voffset = offset->counter_offset;
  1414. kvm->arch.timer_data.poffset = offset->counter_offset;
  1415. kvm_unlock_all_vcpus(kvm);
  1416. } else {
  1417. ret = -EBUSY;
  1418. }
  1419. mutex_unlock(&kvm->lock);
  1420. return ret;
  1421. }