kvm_page_table_test.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * KVM page table test
  4. *
  5. * Copyright (C) 2021, Huawei, Inc.
  6. *
  7. * Make sure that THP has been enabled or enough HUGETLB pages with specific
  8. * page size have been pre-allocated on your system, if you are planning to
  9. * use hugepages to back the guest memory for testing.
  10. */
  11. #include <stdio.h>
  12. #include <stdlib.h>
  13. #include <time.h>
  14. #include <pthread.h>
  15. #include <semaphore.h>
  16. #include "test_util.h"
  17. #include "kvm_util.h"
  18. #include "processor.h"
  19. #include "guest_modes.h"
  20. #include "ucall_common.h"
  21. #define TEST_MEM_SLOT_INDEX 1
  22. /* Default size(1GB) of the memory for testing */
  23. #define DEFAULT_TEST_MEM_SIZE (1 << 30)
  24. /* Default guest test virtual memory offset */
  25. #define DEFAULT_GUEST_TEST_MEM 0xc0000000
  26. /* Different guest memory accessing stages */
  27. enum test_stage {
  28. KVM_BEFORE_MAPPINGS,
  29. KVM_CREATE_MAPPINGS,
  30. KVM_UPDATE_MAPPINGS,
  31. KVM_ADJUST_MAPPINGS,
  32. NUM_TEST_STAGES,
  33. };
  34. static const char * const test_stage_string[] = {
  35. "KVM_BEFORE_MAPPINGS",
  36. "KVM_CREATE_MAPPINGS",
  37. "KVM_UPDATE_MAPPINGS",
  38. "KVM_ADJUST_MAPPINGS",
  39. };
  40. struct test_args {
  41. struct kvm_vm *vm;
  42. uint64_t guest_test_virt_mem;
  43. uint64_t host_page_size;
  44. uint64_t host_num_pages;
  45. uint64_t large_page_size;
  46. uint64_t large_num_pages;
  47. uint64_t host_pages_per_lpage;
  48. enum vm_mem_backing_src_type src_type;
  49. struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
  50. };
  51. /*
  52. * Guest variables. Use addr_gva2hva() if these variables need
  53. * to be changed in host.
  54. */
  55. static enum test_stage guest_test_stage;
  56. /* Host variables */
  57. static uint32_t nr_vcpus = 1;
  58. static struct test_args test_args;
  59. static enum test_stage *current_stage;
  60. static bool host_quit;
  61. /* Whether the test stage is updated, or completed */
  62. static sem_t test_stage_updated;
  63. static sem_t test_stage_completed;
  64. /*
  65. * Guest physical memory offset of the testing memory slot.
  66. * This will be set to the topmost valid physical address minus
  67. * the test memory size.
  68. */
  69. static uint64_t guest_test_phys_mem;
  70. /*
  71. * Guest virtual memory offset of the testing memory slot.
  72. * Must not conflict with identity mapped test code.
  73. */
  74. static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
  75. static void guest_code(bool do_write)
  76. {
  77. struct test_args *p = &test_args;
  78. enum test_stage *current_stage = &guest_test_stage;
  79. uint64_t addr;
  80. int i, j;
  81. while (true) {
  82. addr = p->guest_test_virt_mem;
  83. switch (READ_ONCE(*current_stage)) {
  84. /*
  85. * All vCPU threads will be started in this stage,
  86. * where guest code of each vCPU will do nothing.
  87. */
  88. case KVM_BEFORE_MAPPINGS:
  89. break;
  90. /*
  91. * Before dirty logging, vCPUs concurrently access the first
  92. * 8 bytes of each page (host page/large page) within the same
  93. * memory region with different accessing types (read/write).
  94. * Then KVM will create normal page mappings or huge block
  95. * mappings for them.
  96. */
  97. case KVM_CREATE_MAPPINGS:
  98. for (i = 0; i < p->large_num_pages; i++) {
  99. if (do_write)
  100. *(uint64_t *)addr = 0x0123456789ABCDEF;
  101. else
  102. READ_ONCE(*(uint64_t *)addr);
  103. addr += p->large_page_size;
  104. }
  105. break;
  106. /*
  107. * During dirty logging, KVM will only update attributes of the
  108. * normal page mappings from RO to RW if memory backing src type
  109. * is anonymous. In other cases, KVM will split the huge block
  110. * mappings into normal page mappings if memory backing src type
  111. * is THP or HUGETLB.
  112. */
  113. case KVM_UPDATE_MAPPINGS:
  114. if (p->src_type == VM_MEM_SRC_ANONYMOUS) {
  115. for (i = 0; i < p->host_num_pages; i++) {
  116. *(uint64_t *)addr = 0x0123456789ABCDEF;
  117. addr += p->host_page_size;
  118. }
  119. break;
  120. }
  121. for (i = 0; i < p->large_num_pages; i++) {
  122. /*
  123. * Write to the first host page in each large
  124. * page region, and triger break of large pages.
  125. */
  126. *(uint64_t *)addr = 0x0123456789ABCDEF;
  127. /*
  128. * Access the middle host pages in each large
  129. * page region. Since dirty logging is enabled,
  130. * this will create new mappings at the smallest
  131. * granularity.
  132. */
  133. addr += p->large_page_size / 2;
  134. for (j = 0; j < p->host_pages_per_lpage / 2; j++) {
  135. READ_ONCE(*(uint64_t *)addr);
  136. addr += p->host_page_size;
  137. }
  138. }
  139. break;
  140. /*
  141. * After dirty logging is stopped, vCPUs concurrently read
  142. * from every single host page. Then KVM will coalesce the
  143. * split page mappings back to block mappings. And a TLB
  144. * conflict abort could occur here if TLB entries of the
  145. * page mappings are not fully invalidated.
  146. */
  147. case KVM_ADJUST_MAPPINGS:
  148. for (i = 0; i < p->host_num_pages; i++) {
  149. READ_ONCE(*(uint64_t *)addr);
  150. addr += p->host_page_size;
  151. }
  152. break;
  153. default:
  154. GUEST_ASSERT(0);
  155. }
  156. GUEST_SYNC(1);
  157. }
  158. }
  159. static void *vcpu_worker(void *data)
  160. {
  161. struct kvm_vcpu *vcpu = data;
  162. bool do_write = !(vcpu->id % 2);
  163. struct timespec start;
  164. struct timespec ts_diff;
  165. enum test_stage stage;
  166. int ret;
  167. vcpu_args_set(vcpu, 1, do_write);
  168. while (!READ_ONCE(host_quit)) {
  169. ret = sem_wait(&test_stage_updated);
  170. TEST_ASSERT(ret == 0, "Error in sem_wait");
  171. if (READ_ONCE(host_quit))
  172. return NULL;
  173. clock_gettime(CLOCK_MONOTONIC, &start);
  174. ret = _vcpu_run(vcpu);
  175. ts_diff = timespec_elapsed(start);
  176. TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
  177. TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
  178. "Invalid guest sync status: exit_reason=%s",
  179. exit_reason_str(vcpu->run->exit_reason));
  180. pr_debug("Got sync event from vCPU %d\n", vcpu->id);
  181. stage = READ_ONCE(*current_stage);
  182. /*
  183. * Here we can know the execution time of every
  184. * single vcpu running in different test stages.
  185. */
  186. pr_debug("vCPU %d has completed stage %s\n"
  187. "execution time is: %ld.%.9lds\n\n",
  188. vcpu->id, test_stage_string[stage],
  189. ts_diff.tv_sec, ts_diff.tv_nsec);
  190. ret = sem_post(&test_stage_completed);
  191. TEST_ASSERT(ret == 0, "Error in sem_post");
  192. }
  193. return NULL;
  194. }
  195. struct test_params {
  196. uint64_t phys_offset;
  197. uint64_t test_mem_size;
  198. enum vm_mem_backing_src_type src_type;
  199. };
  200. static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
  201. {
  202. int ret;
  203. struct test_params *p = arg;
  204. enum vm_mem_backing_src_type src_type = p->src_type;
  205. uint64_t large_page_size = get_backing_src_pagesz(src_type);
  206. uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
  207. uint64_t host_page_size = getpagesize();
  208. uint64_t test_mem_size = p->test_mem_size;
  209. uint64_t guest_num_pages;
  210. uint64_t alignment;
  211. void *host_test_mem;
  212. struct kvm_vm *vm;
  213. /* Align up the test memory size */
  214. alignment = max(large_page_size, guest_page_size);
  215. test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1);
  216. /* Create a VM with enough guest pages */
  217. guest_num_pages = test_mem_size / guest_page_size;
  218. vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,
  219. guest_code, test_args.vcpus);
  220. /* Align down GPA of the testing memslot */
  221. if (!p->phys_offset)
  222. guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
  223. guest_page_size;
  224. else
  225. guest_test_phys_mem = p->phys_offset;
  226. #ifdef __s390x__
  227. alignment = max(0x100000UL, alignment);
  228. #endif
  229. guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
  230. /* Set up the shared data structure test_args */
  231. test_args.vm = vm;
  232. test_args.guest_test_virt_mem = guest_test_virt_mem;
  233. test_args.host_page_size = host_page_size;
  234. test_args.host_num_pages = test_mem_size / host_page_size;
  235. test_args.large_page_size = large_page_size;
  236. test_args.large_num_pages = test_mem_size / large_page_size;
  237. test_args.host_pages_per_lpage = large_page_size / host_page_size;
  238. test_args.src_type = src_type;
  239. /* Add an extra memory slot with specified backing src type */
  240. vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
  241. TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
  242. /* Do mapping(GVA->GPA) for the testing memory slot */
  243. virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
  244. /* Cache the HVA pointer of the region */
  245. host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
  246. /* Export shared structure test_args to guest */
  247. sync_global_to_guest(vm, test_args);
  248. ret = sem_init(&test_stage_updated, 0, 0);
  249. TEST_ASSERT(ret == 0, "Error in sem_init");
  250. ret = sem_init(&test_stage_completed, 0, 0);
  251. TEST_ASSERT(ret == 0, "Error in sem_init");
  252. current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage));
  253. *current_stage = NUM_TEST_STAGES;
  254. pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
  255. pr_info("Testing memory backing src type: %s\n",
  256. vm_mem_backing_src_alias(src_type)->name);
  257. pr_info("Testing memory backing src granularity: 0x%lx\n",
  258. large_page_size);
  259. pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size);
  260. pr_info("Guest physical test memory offset: 0x%lx\n",
  261. guest_test_phys_mem);
  262. pr_info("Host virtual test memory offset: 0x%lx\n",
  263. (uint64_t)host_test_mem);
  264. pr_info("Number of testing vCPUs: %d\n", nr_vcpus);
  265. return vm;
  266. }
  267. static void vcpus_complete_new_stage(enum test_stage stage)
  268. {
  269. int ret;
  270. int vcpus;
  271. /* Wake up all the vcpus to run new test stage */
  272. for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
  273. ret = sem_post(&test_stage_updated);
  274. TEST_ASSERT(ret == 0, "Error in sem_post");
  275. }
  276. pr_debug("All vcpus have been notified to continue\n");
  277. /* Wait for all the vcpus to complete new test stage */
  278. for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
  279. ret = sem_wait(&test_stage_completed);
  280. TEST_ASSERT(ret == 0, "Error in sem_wait");
  281. pr_debug("%d vcpus have completed stage %s\n",
  282. vcpus + 1, test_stage_string[stage]);
  283. }
  284. pr_debug("All vcpus have completed stage %s\n",
  285. test_stage_string[stage]);
  286. }
  287. static void run_test(enum vm_guest_mode mode, void *arg)
  288. {
  289. pthread_t *vcpu_threads;
  290. struct kvm_vm *vm;
  291. struct timespec start;
  292. struct timespec ts_diff;
  293. int ret, i;
  294. /* Create VM with vCPUs and make some pre-initialization */
  295. vm = pre_init_before_test(mode, arg);
  296. vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
  297. TEST_ASSERT(vcpu_threads, "Memory allocation failed");
  298. host_quit = false;
  299. *current_stage = KVM_BEFORE_MAPPINGS;
  300. for (i = 0; i < nr_vcpus; i++)
  301. pthread_create(&vcpu_threads[i], NULL, vcpu_worker,
  302. test_args.vcpus[i]);
  303. vcpus_complete_new_stage(*current_stage);
  304. pr_info("Started all vCPUs successfully\n");
  305. /* Test the stage of KVM creating mappings */
  306. *current_stage = KVM_CREATE_MAPPINGS;
  307. clock_gettime(CLOCK_MONOTONIC, &start);
  308. vcpus_complete_new_stage(*current_stage);
  309. ts_diff = timespec_elapsed(start);
  310. pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
  311. ts_diff.tv_sec, ts_diff.tv_nsec);
  312. /* Test the stage of KVM updating mappings */
  313. vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
  314. KVM_MEM_LOG_DIRTY_PAGES);
  315. *current_stage = KVM_UPDATE_MAPPINGS;
  316. clock_gettime(CLOCK_MONOTONIC, &start);
  317. vcpus_complete_new_stage(*current_stage);
  318. ts_diff = timespec_elapsed(start);
  319. pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
  320. ts_diff.tv_sec, ts_diff.tv_nsec);
  321. /* Test the stage of KVM adjusting mappings */
  322. vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
  323. *current_stage = KVM_ADJUST_MAPPINGS;
  324. clock_gettime(CLOCK_MONOTONIC, &start);
  325. vcpus_complete_new_stage(*current_stage);
  326. ts_diff = timespec_elapsed(start);
  327. pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n",
  328. ts_diff.tv_sec, ts_diff.tv_nsec);
  329. /* Tell the vcpu thread to quit */
  330. host_quit = true;
  331. for (i = 0; i < nr_vcpus; i++) {
  332. ret = sem_post(&test_stage_updated);
  333. TEST_ASSERT(ret == 0, "Error in sem_post");
  334. }
  335. for (i = 0; i < nr_vcpus; i++)
  336. pthread_join(vcpu_threads[i], NULL);
  337. ret = sem_destroy(&test_stage_updated);
  338. TEST_ASSERT(ret == 0, "Error in sem_destroy");
  339. ret = sem_destroy(&test_stage_completed);
  340. TEST_ASSERT(ret == 0, "Error in sem_destroy");
  341. free(vcpu_threads);
  342. kvm_vm_free(vm);
  343. }
  344. static void help(char *name)
  345. {
  346. puts("");
  347. printf("usage: %s [-h] [-p offset] [-m mode] "
  348. "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
  349. puts("");
  350. printf(" -p: specify guest physical test memory offset\n"
  351. " Warning: a low offset can conflict with the loaded test code.\n");
  352. guest_modes_help();
  353. printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n"
  354. " (default: 1G)\n");
  355. printf(" -v: specify the number of vCPUs to run\n"
  356. " (default: 1)\n");
  357. backing_src_help("-s");
  358. puts("");
  359. }
  360. int main(int argc, char *argv[])
  361. {
  362. int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
  363. struct test_params p = {
  364. .test_mem_size = DEFAULT_TEST_MEM_SIZE,
  365. .src_type = DEFAULT_VM_MEM_SRC,
  366. };
  367. int opt;
  368. guest_modes_append_default();
  369. while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
  370. switch (opt) {
  371. case 'p':
  372. p.phys_offset = strtoull(optarg, NULL, 0);
  373. break;
  374. case 'm':
  375. guest_modes_cmdline(optarg);
  376. break;
  377. case 'b':
  378. p.test_mem_size = parse_size(optarg);
  379. break;
  380. case 'v':
  381. nr_vcpus = atoi_positive("Number of vCPUs", optarg);
  382. TEST_ASSERT(nr_vcpus <= max_vcpus,
  383. "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
  384. break;
  385. case 's':
  386. p.src_type = parse_backing_src_type(optarg);
  387. break;
  388. case 'h':
  389. default:
  390. help(argv[0]);
  391. exit(0);
  392. }
  393. }
  394. for_each_guest_mode(run_test, &p);
  395. return 0;
  396. }