rseq_test.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Include rseq.c without _GNU_SOURCE defined, before including any headers, so
  4. * that rseq.c is compiled with its configuration, not KVM selftests' config.
  5. */
  6. #undef _GNU_SOURCE
  7. #include "../rseq/rseq.c"
  8. #define _GNU_SOURCE
  9. #include <errno.h>
  10. #include <fcntl.h>
  11. #include <pthread.h>
  12. #include <sched.h>
  13. #include <stdio.h>
  14. #include <stdlib.h>
  15. #include <string.h>
  16. #include <signal.h>
  17. #include <syscall.h>
  18. #include <sys/ioctl.h>
  19. #include <sys/sysinfo.h>
  20. #include <asm/barrier.h>
  21. #include <linux/atomic.h>
  22. #include <linux/rseq.h>
  23. #include <linux/unistd.h>
  24. #include "kvm_util.h"
  25. #include "processor.h"
  26. #include "test_util.h"
  27. #include "ucall_common.h"
  28. /*
  29. * Any bug related to task migration is likely to be timing-dependent; perform
  30. * a large number of migrations to reduce the odds of a false negative.
  31. */
  32. #define NR_TASK_MIGRATIONS 100000
  33. static pthread_t migration_thread;
  34. static cpu_set_t possible_mask;
  35. static int min_cpu, max_cpu;
  36. static bool done;
  37. static atomic_t seq_cnt;
  38. static void guest_code(void)
  39. {
  40. for (;;)
  41. GUEST_SYNC(0);
  42. }
  43. static int next_cpu(int cpu)
  44. {
  45. /*
  46. * Advance to the next CPU, skipping those that weren't in the original
  47. * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
  48. * data storage is considered as opaque. Note, if this task is pinned
  49. * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
  50. * burn a lot cycles and the test will take longer than normal to
  51. * complete.
  52. */
  53. do {
  54. cpu++;
  55. if (cpu > max_cpu) {
  56. cpu = min_cpu;
  57. TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
  58. "Min CPU = %d must always be usable", cpu);
  59. break;
  60. }
  61. } while (!CPU_ISSET(cpu, &possible_mask));
  62. return cpu;
  63. }
  64. static void *migration_worker(void *__rseq_tid)
  65. {
  66. pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
  67. cpu_set_t allowed_mask;
  68. int r, i, cpu;
  69. CPU_ZERO(&allowed_mask);
  70. for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
  71. CPU_SET(cpu, &allowed_mask);
  72. /*
  73. * Bump the sequence count twice to allow the reader to detect
  74. * that a migration may have occurred in between rseq and sched
  75. * CPU ID reads. An odd sequence count indicates a migration
  76. * is in-progress, while a completely different count indicates
  77. * a migration occurred since the count was last read.
  78. */
  79. atomic_inc(&seq_cnt);
  80. /*
  81. * Ensure the odd count is visible while getcpu() isn't
  82. * stable, i.e. while changing affinity is in-progress.
  83. */
  84. smp_wmb();
  85. r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
  86. TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
  87. errno, strerror(errno));
  88. smp_wmb();
  89. atomic_inc(&seq_cnt);
  90. CPU_CLR(cpu, &allowed_mask);
  91. /*
  92. * Wait 1-10us before proceeding to the next iteration and more
  93. * specifically, before bumping seq_cnt again. A delay is
  94. * needed on three fronts:
  95. *
  96. * 1. To allow sched_setaffinity() to prompt migration before
  97. * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
  98. * (or TIF_NEED_RESCHED, which indirectly leads to handling
  99. * NOTIFY_RESUME) is handled in KVM context.
  100. *
  101. * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
  102. * the guest, the guest will trigger a IO/MMIO exit all the
  103. * way to userspace and the TIF flags will be handled by
  104. * the generic "exit to userspace" logic, not by KVM. The
  105. * exit to userspace is necessary to give the test a chance
  106. * to check the rseq CPU ID (see #2).
  107. *
  108. * Alternatively, guest_code() could include an instruction
  109. * to trigger an exit that is handled by KVM, but any such
  110. * exit requires architecture specific code.
  111. *
  112. * 2. To let ioctl(KVM_RUN) make its way back to the test
  113. * before the next round of migration. The test's check on
  114. * the rseq CPU ID must wait for migration to complete in
  115. * order to avoid false positive, thus any kernel rseq bug
  116. * will be missed if the next migration starts before the
  117. * check completes.
  118. *
  119. * 3. To ensure the read-side makes efficient forward progress,
  120. * e.g. if getcpu() involves a syscall. Stalling the read-side
  121. * means the test will spend more time waiting for getcpu()
  122. * to stabilize and less time trying to hit the timing-dependent
  123. * bug.
  124. *
  125. * Because any bug in this area is likely to be timing-dependent,
  126. * run with a range of delays at 1us intervals from 1us to 10us
  127. * as a best effort to avoid tuning the test to the point where
  128. * it can hit _only_ the original bug and not detect future
  129. * regressions.
  130. *
  131. * The original bug can reproduce with a delay up to ~500us on
  132. * x86-64, but starts to require more iterations to reproduce
  133. * as the delay creeps above ~10us, and the average runtime of
  134. * each iteration obviously increases as well. Cap the delay
  135. * at 10us to keep test runtime reasonable while minimizing
  136. * potential coverage loss.
  137. *
  138. * The lower bound for reproducing the bug is likely below 1us,
  139. * e.g. failures occur on x86-64 with nanosleep(0), but at that
  140. * point the overhead of the syscall likely dominates the delay.
  141. * Use usleep() for simplicity and to avoid unnecessary kernel
  142. * dependencies.
  143. */
  144. usleep((i % 10) + 1);
  145. }
  146. done = true;
  147. return NULL;
  148. }
  149. static void calc_min_max_cpu(void)
  150. {
  151. int i, cnt, nproc;
  152. TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
  153. /*
  154. * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
  155. * this task is affined to in order to reduce the time spent querying
  156. * unusable CPUs, e.g. if this task is pinned to a small percentage of
  157. * total CPUs.
  158. */
  159. nproc = get_nprocs_conf();
  160. min_cpu = -1;
  161. max_cpu = -1;
  162. cnt = 0;
  163. for (i = 0; i < nproc; i++) {
  164. if (!CPU_ISSET(i, &possible_mask))
  165. continue;
  166. if (min_cpu == -1)
  167. min_cpu = i;
  168. max_cpu = i;
  169. cnt++;
  170. }
  171. __TEST_REQUIRE(cnt >= 2,
  172. "Only one usable CPU, task migration not possible");
  173. }
  174. static void help(const char *name)
  175. {
  176. puts("");
  177. printf("usage: %s [-h] [-u] [-l latency]\n", name);
  178. printf(" -u: Don't sanity check the number of successful KVM_RUNs\n");
  179. printf(" -l: Set /dev/cpu_dma_latency to suppress deep sleep states\n");
  180. puts("");
  181. exit(0);
  182. }
  183. int main(int argc, char *argv[])
  184. {
  185. int r, i, snapshot, opt, fd = -1, latency = -1;
  186. bool skip_sanity_check = false;
  187. struct kvm_vm *vm;
  188. struct kvm_vcpu *vcpu;
  189. u32 cpu, rseq_cpu;
  190. while ((opt = getopt(argc, argv, "hl:u")) != -1) {
  191. switch (opt) {
  192. case 'u':
  193. skip_sanity_check = true;
  194. break;
  195. case 'l':
  196. latency = atoi_paranoid(optarg);
  197. break;
  198. case 'h':
  199. default:
  200. help(argv[0]);
  201. break;
  202. }
  203. }
  204. r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
  205. TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
  206. strerror(errno));
  207. calc_min_max_cpu();
  208. r = rseq_register_current_thread();
  209. TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
  210. errno, strerror(errno));
  211. /*
  212. * Create and run a dummy VM that immediately exits to userspace via
  213. * GUEST_SYNC, while concurrently migrating the process by setting its
  214. * CPU affinity.
  215. */
  216. vm = vm_create_with_one_vcpu(&vcpu, guest_code);
  217. pthread_create(&migration_thread, NULL, migration_worker,
  218. (void *)(unsigned long)syscall(SYS_gettid));
  219. if (latency >= 0) {
  220. /*
  221. * Writes to cpu_dma_latency persist only while the file is
  222. * open, i.e. it allows userspace to provide guaranteed latency
  223. * while running a workload. Keep the file open until the test
  224. * completes, otherwise writing cpu_dma_latency is meaningless.
  225. */
  226. fd = open("/dev/cpu_dma_latency", O_RDWR);
  227. TEST_ASSERT(fd >= 0, __KVM_SYSCALL_ERROR("open() /dev/cpu_dma_latency", fd));
  228. r = write(fd, &latency, 4);
  229. TEST_ASSERT(r >= 1, "Error setting /dev/cpu_dma_latency");
  230. }
  231. for (i = 0; !done; i++) {
  232. vcpu_run(vcpu);
  233. TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
  234. "Guest failed?");
  235. /*
  236. * Verify rseq's CPU matches sched's CPU. Ensure migration
  237. * doesn't occur between getcpu() and reading the rseq cpu_id
  238. * by rereading both if the sequence count changes, or if the
  239. * count is odd (migration in-progress).
  240. */
  241. do {
  242. /*
  243. * Drop bit 0 to force a mismatch if the count is odd,
  244. * i.e. if a migration is in-progress.
  245. */
  246. snapshot = atomic_read(&seq_cnt) & ~1;
  247. /*
  248. * Ensure calling getcpu() and reading rseq.cpu_id complete
  249. * in a single "no migration" window, i.e. are not reordered
  250. * across the seq_cnt reads.
  251. */
  252. smp_rmb();
  253. r = sys_getcpu(&cpu, NULL);
  254. TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)",
  255. errno, strerror(errno));
  256. rseq_cpu = rseq_current_cpu_raw();
  257. smp_rmb();
  258. } while (snapshot != atomic_read(&seq_cnt));
  259. TEST_ASSERT(rseq_cpu == cpu,
  260. "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
  261. }
  262. if (fd > 0)
  263. close(fd);
  264. /*
  265. * Sanity check that the test was able to enter the guest a reasonable
  266. * number of times, e.g. didn't get stalled too often/long waiting for
  267. * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
  268. * conservative ratio on x86-64, which can do _more_ KVM_RUNs than
  269. * migrations given the 1us+ delay in the migration task.
  270. *
  271. * Another reason why it may have small migration:KVM_RUN ratio is that,
  272. * on systems with large low power mode wakeup latency, it may happen
  273. * quite often that the scheduler is not able to wake up the target CPU
  274. * before the vCPU thread is scheduled to another CPU.
  275. */
  276. TEST_ASSERT(skip_sanity_check || i > (NR_TASK_MIGRATIONS / 2),
  277. "Only performed %d KVM_RUNs, task stalled too much?\n\n"
  278. " Try disabling deep sleep states to reduce CPU wakeup latency,\n"
  279. " e.g. via cpuidle.off=1 or via -l <latency>, or run with -u to\n"
  280. " disable this sanity check.", i);
  281. pthread_join(migration_thread, NULL);
  282. kvm_vm_free(vm);
  283. rseq_unregister_current_thread();
  284. return 0;
  285. }