ptp_vmclock.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Virtual PTP 1588 clock for use with LM-safe VMclock device.
  4. *
  5. * Copyright © 2024 Amazon.com, Inc. or its affiliates.
  6. */
  7. #include "linux/poll.h"
  8. #include "linux/types.h"
  9. #include "linux/wait.h"
  10. #include <linux/acpi.h>
  11. #include <linux/device.h>
  12. #include <linux/err.h>
  13. #include <linux/file.h>
  14. #include <linux/fs.h>
  15. #include <linux/init.h>
  16. #include <linux/io.h>
  17. #include <linux/interrupt.h>
  18. #include <linux/kernel.h>
  19. #include <linux/miscdevice.h>
  20. #include <linux/mm.h>
  21. #include <linux/module.h>
  22. #include <linux/of.h>
  23. #include <linux/platform_device.h>
  24. #include <linux/slab.h>
  25. #include <uapi/linux/vmclock-abi.h>
  26. #include <linux/ptp_clock_kernel.h>
  27. #ifdef CONFIG_X86
  28. #include <asm/pvclock.h>
  29. #include <asm/kvmclock.h>
  30. #endif
  31. #ifdef CONFIG_KVM_GUEST
  32. #define SUPPORT_KVMCLOCK
  33. #endif
  34. static DEFINE_IDA(vmclock_ida);
  35. ACPI_MODULE_NAME("vmclock");
  36. struct vmclock_state {
  37. struct resource res;
  38. struct vmclock_abi *clk;
  39. struct miscdevice miscdev;
  40. wait_queue_head_t disrupt_wait;
  41. struct ptp_clock_info ptp_clock_info;
  42. struct ptp_clock *ptp_clock;
  43. enum clocksource_ids cs_id, sys_cs_id;
  44. int index;
  45. char *name;
  46. };
  47. #define VMCLOCK_MAX_WAIT ms_to_ktime(100)
  48. /* Require at least the flags field to be present. All else can be optional. */
  49. #define VMCLOCK_MIN_SIZE offsetof(struct vmclock_abi, pad)
  50. #define VMCLOCK_FIELD_PRESENT(_c, _f) \
  51. (le32_to_cpu((_c)->size) >= (offsetof(struct vmclock_abi, _f) + \
  52. sizeof((_c)->_f)))
  53. /*
  54. * Multiply a 64-bit count by a 64-bit tick 'period' in units of seconds >> 64
  55. * and add the fractional second part of the reference time.
  56. *
  57. * The result is a 128-bit value, the top 64 bits of which are seconds, and
  58. * the low 64 bits are (seconds >> 64).
  59. */
  60. static uint64_t mul_u64_u64_shr_add_u64(uint64_t *res_hi, uint64_t delta,
  61. uint64_t period, uint8_t shift,
  62. uint64_t frac_sec)
  63. {
  64. unsigned __int128 res = (unsigned __int128)delta * period;
  65. res >>= shift;
  66. res += frac_sec;
  67. *res_hi = res >> 64;
  68. return (uint64_t)res;
  69. }
  70. static bool tai_adjust(struct vmclock_abi *clk, uint64_t *sec)
  71. {
  72. if (clk->time_type == VMCLOCK_TIME_TAI)
  73. return true;
  74. if (clk->time_type == VMCLOCK_TIME_UTC &&
  75. (le64_to_cpu(clk->flags) & VMCLOCK_FLAG_TAI_OFFSET_VALID)) {
  76. if (sec)
  77. *sec -= (int16_t)le16_to_cpu(clk->tai_offset_sec);
  78. return true;
  79. }
  80. return false;
  81. }
  82. static int vmclock_get_crosststamp(struct vmclock_state *st,
  83. struct ptp_system_timestamp *sts,
  84. struct system_counterval_t *system_counter,
  85. struct timespec64 *tspec)
  86. {
  87. ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
  88. struct system_time_snapshot systime_snapshot;
  89. uint64_t cycle, delta, seq, frac_sec;
  90. #ifdef CONFIG_X86
  91. /*
  92. * We'd expect the hypervisor to know this and to report the clock
  93. * status as VMCLOCK_STATUS_UNRELIABLE. But be paranoid.
  94. */
  95. if (check_tsc_unstable())
  96. return -EINVAL;
  97. #endif
  98. while (1) {
  99. seq = le32_to_cpu(st->clk->seq_count) & ~1ULL;
  100. /*
  101. * This pairs with a write barrier in the hypervisor
  102. * which populates this structure.
  103. */
  104. virt_rmb();
  105. if (st->clk->clock_status == VMCLOCK_STATUS_UNRELIABLE)
  106. return -EINVAL;
  107. /*
  108. * When invoked for gettimex64(), fill in the pre/post system
  109. * times. The simple case is when system time is based on the
  110. * same counter as st->cs_id, in which case all three times
  111. * will be derived from the *same* counter value.
  112. *
  113. * If the system isn't using the same counter, then the value
  114. * from ktime_get_snapshot() will still be used as pre_ts, and
  115. * ptp_read_system_postts() is called to populate postts after
  116. * calling get_cycles().
  117. *
  118. * The conversion to timespec64 happens further down, outside
  119. * the seq_count loop.
  120. */
  121. if (sts) {
  122. ktime_get_snapshot(&systime_snapshot);
  123. if (systime_snapshot.cs_id == st->cs_id) {
  124. cycle = systime_snapshot.cycles;
  125. } else {
  126. cycle = get_cycles();
  127. ptp_read_system_postts(sts);
  128. }
  129. } else {
  130. cycle = get_cycles();
  131. }
  132. delta = cycle - le64_to_cpu(st->clk->counter_value);
  133. frac_sec = mul_u64_u64_shr_add_u64(&tspec->tv_sec, delta,
  134. le64_to_cpu(st->clk->counter_period_frac_sec),
  135. st->clk->counter_period_shift,
  136. le64_to_cpu(st->clk->time_frac_sec));
  137. tspec->tv_nsec = mul_u64_u64_shr(frac_sec, NSEC_PER_SEC, 64);
  138. tspec->tv_sec += le64_to_cpu(st->clk->time_sec);
  139. if (!tai_adjust(st->clk, &tspec->tv_sec))
  140. return -EINVAL;
  141. /*
  142. * This pairs with a write barrier in the hypervisor
  143. * which populates this structure.
  144. */
  145. virt_rmb();
  146. if (seq == le32_to_cpu(st->clk->seq_count))
  147. break;
  148. if (ktime_after(ktime_get(), deadline))
  149. return -ETIMEDOUT;
  150. }
  151. if (system_counter) {
  152. system_counter->cycles = cycle;
  153. system_counter->cs_id = st->cs_id;
  154. }
  155. if (sts) {
  156. sts->pre_ts = ktime_to_timespec64(systime_snapshot.real);
  157. if (systime_snapshot.cs_id == st->cs_id)
  158. sts->post_ts = sts->pre_ts;
  159. }
  160. return 0;
  161. }
  162. #ifdef SUPPORT_KVMCLOCK
  163. /*
  164. * In the case where the system is using the KVM clock for timekeeping, convert
  165. * the TSC value into a KVM clock time in order to return a paired reading that
  166. * get_device_system_crosststamp() can cope with.
  167. */
  168. static int vmclock_get_crosststamp_kvmclock(struct vmclock_state *st,
  169. struct ptp_system_timestamp *sts,
  170. struct system_counterval_t *system_counter,
  171. struct timespec64 *tspec)
  172. {
  173. struct pvclock_vcpu_time_info *pvti = this_cpu_pvti();
  174. unsigned int pvti_ver;
  175. int ret;
  176. preempt_disable_notrace();
  177. do {
  178. pvti_ver = pvclock_read_begin(pvti);
  179. ret = vmclock_get_crosststamp(st, sts, system_counter, tspec);
  180. if (ret)
  181. break;
  182. system_counter->cycles = __pvclock_read_cycles(pvti,
  183. system_counter->cycles);
  184. system_counter->cs_id = CSID_X86_KVM_CLK;
  185. /*
  186. * This retry should never really happen; if the TSC is
  187. * stable and reliable enough across vCPUS that it is sane
  188. * for the hypervisor to expose a VMCLOCK device which uses
  189. * it as the reference counter, then the KVM clock sohuld be
  190. * in 'master clock mode' and basically never changed. But
  191. * the KVM clock is a fickle and often broken thing, so do
  192. * it "properly" just in case.
  193. */
  194. } while (pvclock_read_retry(pvti, pvti_ver));
  195. preempt_enable_notrace();
  196. return ret;
  197. }
  198. #endif
  199. static int ptp_vmclock_get_time_fn(ktime_t *device_time,
  200. struct system_counterval_t *system_counter,
  201. void *ctx)
  202. {
  203. struct vmclock_state *st = ctx;
  204. struct timespec64 tspec;
  205. int ret;
  206. #ifdef SUPPORT_KVMCLOCK
  207. if (READ_ONCE(st->sys_cs_id) == CSID_X86_KVM_CLK)
  208. ret = vmclock_get_crosststamp_kvmclock(st, NULL, system_counter,
  209. &tspec);
  210. else
  211. #endif
  212. ret = vmclock_get_crosststamp(st, NULL, system_counter, &tspec);
  213. if (!ret)
  214. *device_time = timespec64_to_ktime(tspec);
  215. return ret;
  216. }
  217. static int ptp_vmclock_getcrosststamp(struct ptp_clock_info *ptp,
  218. struct system_device_crosststamp *xtstamp)
  219. {
  220. struct vmclock_state *st = container_of(ptp, struct vmclock_state,
  221. ptp_clock_info);
  222. int ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn, st,
  223. NULL, xtstamp);
  224. #ifdef SUPPORT_KVMCLOCK
  225. /*
  226. * On x86, the KVM clock may be used for the system time. We can
  227. * actually convert a TSC reading to that, and return a paired
  228. * timestamp that get_device_system_crosststamp() *can* handle.
  229. */
  230. if (ret == -ENODEV) {
  231. struct system_time_snapshot systime_snapshot;
  232. ktime_get_snapshot(&systime_snapshot);
  233. if (systime_snapshot.cs_id == CSID_X86_TSC ||
  234. systime_snapshot.cs_id == CSID_X86_KVM_CLK) {
  235. WRITE_ONCE(st->sys_cs_id, systime_snapshot.cs_id);
  236. ret = get_device_system_crosststamp(ptp_vmclock_get_time_fn,
  237. st, NULL, xtstamp);
  238. }
  239. }
  240. #endif
  241. return ret;
  242. }
  243. /*
  244. * PTP clock operations
  245. */
  246. static int ptp_vmclock_adjfine(struct ptp_clock_info *ptp, long delta)
  247. {
  248. return -EOPNOTSUPP;
  249. }
  250. static int ptp_vmclock_adjtime(struct ptp_clock_info *ptp, s64 delta)
  251. {
  252. return -EOPNOTSUPP;
  253. }
  254. static int ptp_vmclock_settime(struct ptp_clock_info *ptp,
  255. const struct timespec64 *ts)
  256. {
  257. return -EOPNOTSUPP;
  258. }
  259. static int ptp_vmclock_gettimex(struct ptp_clock_info *ptp, struct timespec64 *ts,
  260. struct ptp_system_timestamp *sts)
  261. {
  262. struct vmclock_state *st = container_of(ptp, struct vmclock_state,
  263. ptp_clock_info);
  264. return vmclock_get_crosststamp(st, sts, NULL, ts);
  265. }
  266. static int ptp_vmclock_enable(struct ptp_clock_info *ptp,
  267. struct ptp_clock_request *rq, int on)
  268. {
  269. return -EOPNOTSUPP;
  270. }
  271. static const struct ptp_clock_info ptp_vmclock_info = {
  272. .owner = THIS_MODULE,
  273. .max_adj = 0,
  274. .n_ext_ts = 0,
  275. .n_pins = 0,
  276. .pps = 0,
  277. .adjfine = ptp_vmclock_adjfine,
  278. .adjtime = ptp_vmclock_adjtime,
  279. .gettimex64 = ptp_vmclock_gettimex,
  280. .settime64 = ptp_vmclock_settime,
  281. .enable = ptp_vmclock_enable,
  282. .getcrosststamp = ptp_vmclock_getcrosststamp,
  283. };
  284. static struct ptp_clock *vmclock_ptp_register(struct device *dev,
  285. struct vmclock_state *st)
  286. {
  287. enum clocksource_ids cs_id;
  288. if (IS_ENABLED(CONFIG_ARM64) &&
  289. st->clk->counter_id == VMCLOCK_COUNTER_ARM_VCNT) {
  290. /* Can we check it's the virtual counter? */
  291. cs_id = CSID_ARM_ARCH_COUNTER;
  292. } else if (IS_ENABLED(CONFIG_X86) &&
  293. st->clk->counter_id == VMCLOCK_COUNTER_X86_TSC) {
  294. cs_id = CSID_X86_TSC;
  295. } else {
  296. return NULL;
  297. }
  298. /* Accept TAI directly, or UTC with valid offset for conversion to TAI */
  299. if (!tai_adjust(st->clk, NULL)) {
  300. dev_info(dev, "vmclock does not provide unambiguous time\n");
  301. return NULL;
  302. }
  303. st->sys_cs_id = cs_id;
  304. st->cs_id = cs_id;
  305. st->ptp_clock_info = ptp_vmclock_info;
  306. strscpy(st->ptp_clock_info.name, st->name);
  307. return ptp_clock_register(&st->ptp_clock_info, dev);
  308. }
  309. struct vmclock_file_state {
  310. struct vmclock_state *st;
  311. atomic_t seq;
  312. };
  313. static int vmclock_miscdev_mmap(struct file *fp, struct vm_area_struct *vma)
  314. {
  315. struct vmclock_file_state *fst = fp->private_data;
  316. struct vmclock_state *st = fst->st;
  317. if ((vma->vm_flags & (VM_READ|VM_WRITE)) != VM_READ)
  318. return -EROFS;
  319. if (vma->vm_end - vma->vm_start != PAGE_SIZE || vma->vm_pgoff)
  320. return -EINVAL;
  321. if (io_remap_pfn_range(vma, vma->vm_start,
  322. st->res.start >> PAGE_SHIFT, PAGE_SIZE,
  323. vma->vm_page_prot))
  324. return -EAGAIN;
  325. return 0;
  326. }
  327. static ssize_t vmclock_miscdev_read(struct file *fp, char __user *buf,
  328. size_t count, loff_t *ppos)
  329. {
  330. ktime_t deadline = ktime_add(ktime_get(), VMCLOCK_MAX_WAIT);
  331. struct vmclock_file_state *fst = fp->private_data;
  332. struct vmclock_state *st = fst->st;
  333. uint32_t seq, old_seq;
  334. size_t max_count;
  335. if (*ppos >= PAGE_SIZE)
  336. return 0;
  337. max_count = PAGE_SIZE - *ppos;
  338. if (count > max_count)
  339. count = max_count;
  340. old_seq = atomic_read(&fst->seq);
  341. while (1) {
  342. seq = le32_to_cpu(st->clk->seq_count) & ~1U;
  343. /* Pairs with hypervisor wmb */
  344. virt_rmb();
  345. if (copy_to_user(buf, ((char *)st->clk) + *ppos, count))
  346. return -EFAULT;
  347. /* Pairs with hypervisor wmb */
  348. virt_rmb();
  349. if (seq == le32_to_cpu(st->clk->seq_count)) {
  350. /*
  351. * Either we updated fst->seq to seq (the latest version we observed)
  352. * or someone else did (old_seq == seq), so we can break.
  353. */
  354. if (atomic_try_cmpxchg(&fst->seq, &old_seq, seq) ||
  355. old_seq == seq) {
  356. break;
  357. }
  358. }
  359. if (ktime_after(ktime_get(), deadline))
  360. return -ETIMEDOUT;
  361. }
  362. *ppos += count;
  363. return count;
  364. }
  365. static __poll_t vmclock_miscdev_poll(struct file *fp, poll_table *wait)
  366. {
  367. struct vmclock_file_state *fst = fp->private_data;
  368. struct vmclock_state *st = fst->st;
  369. uint32_t seq;
  370. /*
  371. * Hypervisor will not send us any notifications, so fail immediately
  372. * to avoid having caller sleeping for ever.
  373. */
  374. if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
  375. return POLLHUP;
  376. poll_wait(fp, &st->disrupt_wait, wait);
  377. seq = le32_to_cpu(st->clk->seq_count);
  378. if (atomic_read(&fst->seq) != seq)
  379. return POLLIN | POLLRDNORM;
  380. return 0;
  381. }
  382. static int vmclock_miscdev_open(struct inode *inode, struct file *fp)
  383. {
  384. struct vmclock_state *st = container_of(fp->private_data,
  385. struct vmclock_state, miscdev);
  386. struct vmclock_file_state *fst = kzalloc_obj(*fst);
  387. if (!fst)
  388. return -ENOMEM;
  389. fst->st = st;
  390. atomic_set(&fst->seq, 0);
  391. fp->private_data = fst;
  392. return 0;
  393. }
  394. static int vmclock_miscdev_release(struct inode *inode, struct file *fp)
  395. {
  396. kfree(fp->private_data);
  397. return 0;
  398. }
  399. static const struct file_operations vmclock_miscdev_fops = {
  400. .owner = THIS_MODULE,
  401. .open = vmclock_miscdev_open,
  402. .release = vmclock_miscdev_release,
  403. .mmap = vmclock_miscdev_mmap,
  404. .read = vmclock_miscdev_read,
  405. .poll = vmclock_miscdev_poll,
  406. };
  407. /* module operations */
  408. #if IS_ENABLED(CONFIG_ACPI)
  409. static acpi_status vmclock_acpi_resources(struct acpi_resource *ares, void *data)
  410. {
  411. struct vmclock_state *st = data;
  412. struct resource_win win;
  413. struct resource *res = &win.res;
  414. if (ares->type == ACPI_RESOURCE_TYPE_END_TAG)
  415. return AE_OK;
  416. /* There can be only one */
  417. if (resource_type(&st->res) == IORESOURCE_MEM)
  418. return AE_ERROR;
  419. if (acpi_dev_resource_memory(ares, res) ||
  420. acpi_dev_resource_address_space(ares, &win)) {
  421. if (resource_type(res) != IORESOURCE_MEM ||
  422. resource_size(res) < sizeof(st->clk))
  423. return AE_ERROR;
  424. st->res = *res;
  425. return AE_OK;
  426. }
  427. return AE_ERROR;
  428. }
  429. static void
  430. vmclock_acpi_notification_handler(acpi_handle __always_unused handle,
  431. u32 __always_unused event, void *dev)
  432. {
  433. struct device *device = dev;
  434. struct vmclock_state *st = device->driver_data;
  435. wake_up_interruptible(&st->disrupt_wait);
  436. }
  437. static int vmclock_setup_acpi_notification(struct device *dev)
  438. {
  439. struct acpi_device *adev = ACPI_COMPANION(dev);
  440. acpi_status status;
  441. /*
  442. * This should never happen as this function is only called when
  443. * has_acpi_companion(dev) is true, but the logic is sufficiently
  444. * complex that Coverity can't see the tautology.
  445. */
  446. if (!adev)
  447. return -ENODEV;
  448. status = acpi_install_notify_handler(adev->handle, ACPI_DEVICE_NOTIFY,
  449. vmclock_acpi_notification_handler,
  450. dev);
  451. if (ACPI_FAILURE(status)) {
  452. dev_err(dev, "failed to install notification handler");
  453. return -ENODEV;
  454. }
  455. return 0;
  456. }
  457. static int vmclock_probe_acpi(struct device *dev, struct vmclock_state *st)
  458. {
  459. struct acpi_device *adev = ACPI_COMPANION(dev);
  460. acpi_status status;
  461. /*
  462. * This should never happen as this function is only called when
  463. * has_acpi_companion(dev) is true, but the logic is sufficiently
  464. * complex that Coverity can't see the tautology.
  465. */
  466. if (!adev)
  467. return -ENODEV;
  468. status = acpi_walk_resources(adev->handle, METHOD_NAME__CRS,
  469. vmclock_acpi_resources, st);
  470. if (ACPI_FAILURE(status) || resource_type(&st->res) != IORESOURCE_MEM) {
  471. dev_err(dev, "failed to get resources\n");
  472. return -ENODEV;
  473. }
  474. return 0;
  475. }
  476. #endif /* CONFIG_ACPI */
  477. static irqreturn_t vmclock_of_irq_handler(int __always_unused irq, void *_st)
  478. {
  479. struct vmclock_state *st = _st;
  480. wake_up_interruptible(&st->disrupt_wait);
  481. return IRQ_HANDLED;
  482. }
  483. static int vmclock_probe_dt(struct device *dev, struct vmclock_state *st)
  484. {
  485. struct platform_device *pdev = to_platform_device(dev);
  486. struct resource *res;
  487. res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  488. if (!res)
  489. return -ENODEV;
  490. st->res = *res;
  491. return 0;
  492. }
  493. static int vmclock_setup_of_notification(struct device *dev)
  494. {
  495. struct platform_device *pdev = to_platform_device(dev);
  496. int irq;
  497. irq = platform_get_irq(pdev, 0);
  498. if (irq < 0)
  499. return irq;
  500. return devm_request_irq(dev, irq, vmclock_of_irq_handler, IRQF_SHARED,
  501. "vmclock", dev->driver_data);
  502. }
  503. static int vmclock_setup_notification(struct device *dev,
  504. struct vmclock_state *st)
  505. {
  506. /* The device does not support notifications. Nothing else to do */
  507. if (!(le64_to_cpu(st->clk->flags) & VMCLOCK_FLAG_NOTIFICATION_PRESENT))
  508. return 0;
  509. #if IS_ENABLED(CONFIG_ACPI)
  510. if (has_acpi_companion(dev))
  511. return vmclock_setup_acpi_notification(dev);
  512. #endif
  513. return vmclock_setup_of_notification(dev);
  514. }
  515. static void vmclock_remove(void *data)
  516. {
  517. struct device *dev = data;
  518. struct vmclock_state *st = dev->driver_data;
  519. if (!st) {
  520. dev_err(dev, "%s called with NULL driver_data", __func__);
  521. return;
  522. }
  523. #if IS_ENABLED(CONFIG_ACPI)
  524. if (has_acpi_companion(dev))
  525. acpi_remove_notify_handler(ACPI_COMPANION(dev)->handle,
  526. ACPI_DEVICE_NOTIFY,
  527. vmclock_acpi_notification_handler);
  528. #endif
  529. if (st->ptp_clock)
  530. ptp_clock_unregister(st->ptp_clock);
  531. if (st->miscdev.minor != MISC_DYNAMIC_MINOR)
  532. misc_deregister(&st->miscdev);
  533. dev->driver_data = NULL;
  534. }
  535. static void vmclock_put_idx(void *data)
  536. {
  537. struct vmclock_state *st = data;
  538. ida_free(&vmclock_ida, st->index);
  539. }
  540. static int vmclock_probe(struct platform_device *pdev)
  541. {
  542. struct device *dev = &pdev->dev;
  543. struct vmclock_state *st;
  544. int ret;
  545. st = devm_kzalloc(dev, sizeof(*st), GFP_KERNEL);
  546. if (!st)
  547. return -ENOMEM;
  548. #if IS_ENABLED(CONFIG_ACPI)
  549. if (has_acpi_companion(dev))
  550. ret = vmclock_probe_acpi(dev, st);
  551. else
  552. #endif
  553. ret = vmclock_probe_dt(dev, st);
  554. if (ret) {
  555. dev_info(dev, "Failed to obtain physical address: %d\n", ret);
  556. return ret;
  557. }
  558. if (resource_size(&st->res) < VMCLOCK_MIN_SIZE) {
  559. dev_info(dev, "Region too small (0x%llx)\n",
  560. resource_size(&st->res));
  561. return -EINVAL;
  562. }
  563. st->clk = devm_memremap(dev, st->res.start, resource_size(&st->res),
  564. MEMREMAP_WB | MEMREMAP_DEC);
  565. if (IS_ERR(st->clk)) {
  566. ret = PTR_ERR(st->clk);
  567. dev_info(dev, "failed to map shared memory\n");
  568. st->clk = NULL;
  569. return ret;
  570. }
  571. if (le32_to_cpu(st->clk->magic) != VMCLOCK_MAGIC ||
  572. le32_to_cpu(st->clk->size) > resource_size(&st->res) ||
  573. le16_to_cpu(st->clk->version) != 1) {
  574. dev_info(dev, "vmclock magic fields invalid\n");
  575. return -EINVAL;
  576. }
  577. ret = ida_alloc(&vmclock_ida, GFP_KERNEL);
  578. if (ret < 0)
  579. return ret;
  580. st->index = ret;
  581. ret = devm_add_action_or_reset(&pdev->dev, vmclock_put_idx, st);
  582. if (ret)
  583. return ret;
  584. st->name = devm_kasprintf(&pdev->dev, GFP_KERNEL, "vmclock%d", st->index);
  585. if (!st->name)
  586. return -ENOMEM;
  587. st->miscdev.minor = MISC_DYNAMIC_MINOR;
  588. init_waitqueue_head(&st->disrupt_wait);
  589. dev->driver_data = st;
  590. ret = devm_add_action_or_reset(&pdev->dev, vmclock_remove, dev);
  591. if (ret)
  592. return ret;
  593. ret = vmclock_setup_notification(dev, st);
  594. if (ret)
  595. return ret;
  596. /*
  597. * If the structure is big enough, it can be mapped to userspace.
  598. * Theoretically a guest OS even using larger pages could still
  599. * use 4KiB PTEs to map smaller MMIO regions like this, but let's
  600. * cross that bridge if/when we come to it.
  601. */
  602. if (le32_to_cpu(st->clk->size) >= PAGE_SIZE) {
  603. st->miscdev.fops = &vmclock_miscdev_fops;
  604. st->miscdev.name = st->name;
  605. ret = misc_register(&st->miscdev);
  606. if (ret)
  607. return ret;
  608. }
  609. /* If there is valid clock information, register a PTP clock */
  610. if (VMCLOCK_FIELD_PRESENT(st->clk, time_frac_sec)) {
  611. /* Can return a silent NULL, or an error. */
  612. st->ptp_clock = vmclock_ptp_register(dev, st);
  613. if (IS_ERR(st->ptp_clock)) {
  614. ret = PTR_ERR(st->ptp_clock);
  615. st->ptp_clock = NULL;
  616. return ret;
  617. }
  618. }
  619. if (!st->miscdev.minor && !st->ptp_clock) {
  620. /* Neither miscdev nor PTP registered */
  621. dev_info(dev, "vmclock: Neither miscdev nor PTP available; not registering\n");
  622. return -ENODEV;
  623. }
  624. dev_info(dev, "%s: registered %s%s%s\n", st->name,
  625. st->miscdev.minor ? "miscdev" : "",
  626. (st->miscdev.minor && st->ptp_clock) ? ", " : "",
  627. st->ptp_clock ? "PTP" : "");
  628. return 0;
  629. }
  630. static const struct acpi_device_id vmclock_acpi_ids[] = {
  631. { "AMZNC10C", 0 },
  632. { "VMCLOCK", 0 },
  633. {}
  634. };
  635. MODULE_DEVICE_TABLE(acpi, vmclock_acpi_ids);
  636. static const struct of_device_id vmclock_of_ids[] = {
  637. { .compatible = "amazon,vmclock", },
  638. { },
  639. };
  640. MODULE_DEVICE_TABLE(of, vmclock_of_ids);
  641. static struct platform_driver vmclock_platform_driver = {
  642. .probe = vmclock_probe,
  643. .driver = {
  644. .name = "vmclock",
  645. .acpi_match_table = vmclock_acpi_ids,
  646. .of_match_table = vmclock_of_ids,
  647. },
  648. };
  649. module_platform_driver(vmclock_platform_driver)
  650. MODULE_AUTHOR("David Woodhouse <dwmw2@infradead.org>");
  651. MODULE_DESCRIPTION("PTP clock using VMCLOCK");
  652. MODULE_LICENSE("GPL");