memslot_perf_test.c 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * A memslot-related performance benchmark.
  4. *
  5. * Copyright (C) 2021 Oracle and/or its affiliates.
  6. *
  7. * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
  8. */
  9. #include <pthread.h>
  10. #include <sched.h>
  11. #include <semaphore.h>
  12. #include <stdatomic.h>
  13. #include <stdbool.h>
  14. #include <stdint.h>
  15. #include <stdio.h>
  16. #include <stdlib.h>
  17. #include <string.h>
  18. #include <sys/mman.h>
  19. #include <time.h>
  20. #include <unistd.h>
  21. #include <linux/compiler.h>
  22. #include <linux/sizes.h>
  23. #include <test_util.h>
  24. #include <kvm_util.h>
  25. #include <processor.h>
  26. #include <ucall_common.h>
  27. #define MEM_EXTRA_SIZE SZ_64K
  28. #define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE)
  29. #define MEM_GPA SZ_256M
  30. #define MEM_AUX_GPA MEM_GPA
  31. #define MEM_SYNC_GPA MEM_AUX_GPA
  32. #define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE)
  33. #define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
  34. /*
  35. * 32 MiB is max size that gets well over 100 iterations on 509 slots.
  36. * Considering that each slot needs to have at least one page up to
  37. * 8194 slots in use can then be tested (although with slightly
  38. * limited resolution).
  39. */
  40. #define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE)
  41. #define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
  42. /*
  43. * 128 MiB is min size that fills 32k slots with at least one page in each
  44. * while at the same time gets 100+ iterations in such test
  45. *
  46. * 2 MiB chunk size like a typical huge page
  47. */
  48. #define MEM_TEST_UNMAP_SIZE SZ_128M
  49. #define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
  50. /*
  51. * For the move active test the middle of the test area is placed on
  52. * a memslot boundary: half lies in the memslot being moved, half in
  53. * other memslot(s).
  54. *
  55. * We have different number of memory slots, excluding the reserved
  56. * memory slot 0, on various architectures and configurations. The
  57. * memory size in this test is calculated by picking the maximal
  58. * last memory slot's memory size, with alignment to the largest
  59. * supported page size (64KB). In this way, the selected memory
  60. * size for this test is compatible with test_memslot_move_prepare().
  61. *
  62. * architecture slots memory-per-slot memory-on-last-slot
  63. * --------------------------------------------------------------
  64. * x86-4KB 32763 16KB 160KB
  65. * arm64-4KB 32766 16KB 112KB
  66. * arm64-16KB 32766 16KB 112KB
  67. * arm64-64KB 8192 64KB 128KB
  68. */
  69. #define MEM_TEST_MOVE_SIZE (3 * SZ_64K)
  70. #define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
  71. static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
  72. "invalid move test region size");
  73. #define MEM_TEST_VAL_1 0x1122334455667788
  74. #define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
  75. struct vm_data {
  76. struct kvm_vm *vm;
  77. struct kvm_vcpu *vcpu;
  78. pthread_t vcpu_thread;
  79. uint32_t nslots;
  80. uint64_t npages;
  81. uint64_t pages_per_slot;
  82. void **hva_slots;
  83. bool mmio_ok;
  84. uint64_t mmio_gpa_min;
  85. uint64_t mmio_gpa_max;
  86. };
  87. struct sync_area {
  88. uint32_t guest_page_size;
  89. atomic_bool start_flag;
  90. atomic_bool exit_flag;
  91. atomic_bool sync_flag;
  92. void *move_area_ptr;
  93. };
  94. /*
  95. * Technically, we need also for the atomic bool to be address-free, which
  96. * is recommended, but not strictly required, by C11 for lockless
  97. * implementations.
  98. * However, in practice both GCC and Clang fulfill this requirement on
  99. * all KVM-supported platforms.
  100. */
  101. static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
  102. static sem_t vcpu_ready;
  103. static bool map_unmap_verify;
  104. #ifdef __x86_64__
  105. static bool disable_slot_zap_quirk;
  106. #endif
  107. static bool verbose;
  108. #define pr_info_v(...) \
  109. do { \
  110. if (verbose) \
  111. pr_info(__VA_ARGS__); \
  112. } while (0)
  113. static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
  114. {
  115. TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
  116. TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
  117. TEST_ASSERT(run->mmio.len == 8,
  118. "Unexpected exit mmio size = %u", run->mmio.len);
  119. TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
  120. run->mmio.phys_addr <= data->mmio_gpa_max,
  121. "Unexpected exit mmio address = 0x%llx",
  122. run->mmio.phys_addr);
  123. }
  124. static void *vcpu_worker(void *__data)
  125. {
  126. struct vm_data *data = __data;
  127. struct kvm_vcpu *vcpu = data->vcpu;
  128. struct kvm_run *run = vcpu->run;
  129. struct ucall uc;
  130. while (1) {
  131. vcpu_run(vcpu);
  132. switch (get_ucall(vcpu, &uc)) {
  133. case UCALL_SYNC:
  134. TEST_ASSERT(uc.args[1] == 0,
  135. "Unexpected sync ucall, got %lx",
  136. (ulong)uc.args[1]);
  137. sem_post(&vcpu_ready);
  138. continue;
  139. case UCALL_NONE:
  140. if (run->exit_reason == KVM_EXIT_MMIO)
  141. check_mmio_access(data, run);
  142. else
  143. goto done;
  144. break;
  145. case UCALL_ABORT:
  146. REPORT_GUEST_ASSERT(uc);
  147. break;
  148. case UCALL_DONE:
  149. goto done;
  150. default:
  151. TEST_FAIL("Unknown ucall %lu", uc.cmd);
  152. }
  153. }
  154. done:
  155. return NULL;
  156. }
  157. static void wait_for_vcpu(void)
  158. {
  159. struct timespec ts;
  160. TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
  161. "clock_gettime() failed: %d", errno);
  162. ts.tv_sec += 2;
  163. TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
  164. "sem_timedwait() failed: %d", errno);
  165. }
  166. static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
  167. {
  168. uint64_t gpage, pgoffs;
  169. uint32_t slot, slotoffs;
  170. void *base;
  171. uint32_t guest_page_size = data->vm->page_size;
  172. TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
  173. TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
  174. "Too high gpa to translate");
  175. gpa -= MEM_GPA;
  176. gpage = gpa / guest_page_size;
  177. pgoffs = gpa % guest_page_size;
  178. slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
  179. slotoffs = gpage - (slot * data->pages_per_slot);
  180. if (rempages) {
  181. uint64_t slotpages;
  182. if (slot == data->nslots - 1)
  183. slotpages = data->npages - slot * data->pages_per_slot;
  184. else
  185. slotpages = data->pages_per_slot;
  186. TEST_ASSERT(!pgoffs,
  187. "Asking for remaining pages in slot but gpa not page aligned");
  188. *rempages = slotpages - slotoffs;
  189. }
  190. base = data->hva_slots[slot];
  191. return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
  192. }
  193. static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
  194. {
  195. uint32_t guest_page_size = data->vm->page_size;
  196. TEST_ASSERT(slot < data->nslots, "Too high slot number");
  197. return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
  198. }
  199. static struct vm_data *alloc_vm(void)
  200. {
  201. struct vm_data *data;
  202. data = malloc(sizeof(*data));
  203. TEST_ASSERT(data, "malloc(vmdata) failed");
  204. data->vm = NULL;
  205. data->vcpu = NULL;
  206. data->hva_slots = NULL;
  207. return data;
  208. }
  209. static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
  210. uint64_t pages_per_slot, uint64_t rempages)
  211. {
  212. if (!pages_per_slot)
  213. return false;
  214. if ((pages_per_slot * guest_page_size) % host_page_size)
  215. return false;
  216. if ((rempages * guest_page_size) % host_page_size)
  217. return false;
  218. return true;
  219. }
  220. static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
  221. {
  222. uint32_t guest_page_size = data->vm->page_size;
  223. uint64_t mempages, pages_per_slot, rempages;
  224. uint64_t slots;
  225. mempages = data->npages;
  226. slots = data->nslots;
  227. while (--slots > 1) {
  228. pages_per_slot = mempages / slots;
  229. if (!pages_per_slot)
  230. continue;
  231. rempages = mempages % pages_per_slot;
  232. if (check_slot_pages(host_page_size, guest_page_size,
  233. pages_per_slot, rempages))
  234. return slots + 1; /* slot 0 is reserved */
  235. }
  236. return 0;
  237. }
  238. static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
  239. void *guest_code, uint64_t mem_size,
  240. struct timespec *slot_runtime)
  241. {
  242. uint64_t mempages, rempages;
  243. uint64_t guest_addr;
  244. uint32_t slot, host_page_size, guest_page_size;
  245. struct timespec tstart;
  246. struct sync_area *sync;
  247. host_page_size = getpagesize();
  248. guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
  249. mempages = mem_size / guest_page_size;
  250. data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
  251. TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
  252. data->npages = mempages;
  253. TEST_ASSERT(data->npages > 1, "Can't test without any memory");
  254. data->nslots = nslots;
  255. data->pages_per_slot = data->npages / data->nslots;
  256. rempages = data->npages % data->nslots;
  257. if (!check_slot_pages(host_page_size, guest_page_size,
  258. data->pages_per_slot, rempages)) {
  259. *maxslots = get_max_slots(data, host_page_size);
  260. return false;
  261. }
  262. data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
  263. TEST_ASSERT(data->hva_slots, "malloc() fail");
  264. pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
  265. data->nslots, data->pages_per_slot, rempages);
  266. clock_gettime(CLOCK_MONOTONIC, &tstart);
  267. for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
  268. uint64_t npages;
  269. npages = data->pages_per_slot;
  270. if (slot == data->nslots)
  271. npages += rempages;
  272. vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
  273. guest_addr, slot, npages,
  274. 0);
  275. guest_addr += npages * guest_page_size;
  276. }
  277. *slot_runtime = timespec_elapsed(tstart);
  278. for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
  279. uint64_t npages;
  280. uint64_t gpa;
  281. npages = data->pages_per_slot;
  282. if (slot == data->nslots)
  283. npages += rempages;
  284. gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
  285. TEST_ASSERT(gpa == guest_addr,
  286. "vm_phy_pages_alloc() failed");
  287. data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
  288. memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
  289. guest_addr += npages * guest_page_size;
  290. }
  291. virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
  292. sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
  293. sync->guest_page_size = data->vm->page_size;
  294. atomic_init(&sync->start_flag, false);
  295. atomic_init(&sync->exit_flag, false);
  296. atomic_init(&sync->sync_flag, false);
  297. data->mmio_ok = false;
  298. return true;
  299. }
  300. static void launch_vm(struct vm_data *data)
  301. {
  302. pr_info_v("Launching the test VM\n");
  303. pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
  304. /* Ensure the guest thread is spun up. */
  305. wait_for_vcpu();
  306. }
  307. static void free_vm(struct vm_data *data)
  308. {
  309. kvm_vm_free(data->vm);
  310. free(data->hva_slots);
  311. free(data);
  312. }
  313. static void wait_guest_exit(struct vm_data *data)
  314. {
  315. pthread_join(data->vcpu_thread, NULL);
  316. }
  317. static void let_guest_run(struct sync_area *sync)
  318. {
  319. atomic_store_explicit(&sync->start_flag, true, memory_order_release);
  320. }
  321. static void guest_spin_until_start(void)
  322. {
  323. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  324. while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
  325. ;
  326. }
  327. static void make_guest_exit(struct sync_area *sync)
  328. {
  329. atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
  330. }
  331. static bool _guest_should_exit(void)
  332. {
  333. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  334. return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
  335. }
  336. #define guest_should_exit() unlikely(_guest_should_exit())
  337. /*
  338. * noinline so we can easily see how much time the host spends waiting
  339. * for the guest.
  340. * For the same reason use alarm() instead of polling clock_gettime()
  341. * to implement a wait timeout.
  342. */
  343. static noinline void host_perform_sync(struct sync_area *sync)
  344. {
  345. alarm(10);
  346. atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
  347. while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
  348. ;
  349. alarm(0);
  350. }
  351. static bool guest_perform_sync(void)
  352. {
  353. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  354. bool expected;
  355. do {
  356. if (guest_should_exit())
  357. return false;
  358. expected = true;
  359. } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
  360. &expected, false,
  361. memory_order_acq_rel,
  362. memory_order_relaxed));
  363. return true;
  364. }
  365. static void guest_code_test_memslot_move(void)
  366. {
  367. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  368. uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
  369. uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
  370. GUEST_SYNC(0);
  371. guest_spin_until_start();
  372. while (!guest_should_exit()) {
  373. uintptr_t ptr;
  374. for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
  375. ptr += page_size)
  376. *(uint64_t *)ptr = MEM_TEST_VAL_1;
  377. /*
  378. * No host sync here since the MMIO exits are so expensive
  379. * that the host would spend most of its time waiting for
  380. * the guest and so instead of measuring memslot move
  381. * performance we would measure the performance and
  382. * likelihood of MMIO exits
  383. */
  384. }
  385. GUEST_DONE();
  386. }
  387. static void guest_code_test_memslot_map(void)
  388. {
  389. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  390. uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
  391. GUEST_SYNC(0);
  392. guest_spin_until_start();
  393. while (1) {
  394. uintptr_t ptr;
  395. for (ptr = MEM_TEST_GPA;
  396. ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
  397. ptr += page_size)
  398. *(uint64_t *)ptr = MEM_TEST_VAL_1;
  399. if (!guest_perform_sync())
  400. break;
  401. for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
  402. ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
  403. ptr += page_size)
  404. *(uint64_t *)ptr = MEM_TEST_VAL_2;
  405. if (!guest_perform_sync())
  406. break;
  407. }
  408. GUEST_DONE();
  409. }
  410. static void guest_code_test_memslot_unmap(void)
  411. {
  412. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  413. GUEST_SYNC(0);
  414. guest_spin_until_start();
  415. while (1) {
  416. uintptr_t ptr = MEM_TEST_GPA;
  417. /*
  418. * We can afford to access (map) just a small number of pages
  419. * per host sync as otherwise the host will spend
  420. * a significant amount of its time waiting for the guest
  421. * (instead of doing unmap operations), so this will
  422. * effectively turn this test into a map performance test.
  423. *
  424. * Just access a single page to be on the safe side.
  425. */
  426. *(uint64_t *)ptr = MEM_TEST_VAL_1;
  427. if (!guest_perform_sync())
  428. break;
  429. ptr += MEM_TEST_UNMAP_SIZE / 2;
  430. *(uint64_t *)ptr = MEM_TEST_VAL_2;
  431. if (!guest_perform_sync())
  432. break;
  433. }
  434. GUEST_DONE();
  435. }
  436. static void guest_code_test_memslot_rw(void)
  437. {
  438. struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
  439. uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
  440. GUEST_SYNC(0);
  441. guest_spin_until_start();
  442. while (1) {
  443. uintptr_t ptr;
  444. for (ptr = MEM_TEST_GPA;
  445. ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
  446. *(uint64_t *)ptr = MEM_TEST_VAL_1;
  447. if (!guest_perform_sync())
  448. break;
  449. for (ptr = MEM_TEST_GPA + page_size / 2;
  450. ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
  451. uint64_t val = *(uint64_t *)ptr;
  452. GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
  453. *(uint64_t *)ptr = 0;
  454. }
  455. if (!guest_perform_sync())
  456. break;
  457. }
  458. GUEST_DONE();
  459. }
  460. static bool test_memslot_move_prepare(struct vm_data *data,
  461. struct sync_area *sync,
  462. uint64_t *maxslots, bool isactive)
  463. {
  464. uint32_t guest_page_size = data->vm->page_size;
  465. uint64_t movesrcgpa, movetestgpa;
  466. #ifdef __x86_64__
  467. if (disable_slot_zap_quirk)
  468. vm_enable_cap(data->vm, KVM_CAP_DISABLE_QUIRKS2, KVM_X86_QUIRK_SLOT_ZAP_ALL);
  469. #endif
  470. movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
  471. if (isactive) {
  472. uint64_t lastpages;
  473. vm_gpa2hva(data, movesrcgpa, &lastpages);
  474. if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
  475. *maxslots = 0;
  476. return false;
  477. }
  478. }
  479. movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
  480. sync->move_area_ptr = (void *)movetestgpa;
  481. if (isactive) {
  482. data->mmio_ok = true;
  483. data->mmio_gpa_min = movesrcgpa;
  484. data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
  485. }
  486. return true;
  487. }
  488. static bool test_memslot_move_prepare_active(struct vm_data *data,
  489. struct sync_area *sync,
  490. uint64_t *maxslots)
  491. {
  492. return test_memslot_move_prepare(data, sync, maxslots, true);
  493. }
  494. static bool test_memslot_move_prepare_inactive(struct vm_data *data,
  495. struct sync_area *sync,
  496. uint64_t *maxslots)
  497. {
  498. return test_memslot_move_prepare(data, sync, maxslots, false);
  499. }
  500. static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
  501. {
  502. uint64_t movesrcgpa;
  503. movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
  504. vm_mem_region_move(data->vm, data->nslots - 1 + 1,
  505. MEM_TEST_MOVE_GPA_DEST);
  506. vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
  507. }
  508. static void test_memslot_do_unmap(struct vm_data *data,
  509. uint64_t offsp, uint64_t count)
  510. {
  511. uint64_t gpa, ctr;
  512. uint32_t guest_page_size = data->vm->page_size;
  513. for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
  514. uint64_t npages;
  515. void *hva;
  516. int ret;
  517. hva = vm_gpa2hva(data, gpa, &npages);
  518. TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
  519. npages = min(npages, count - ctr);
  520. ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
  521. TEST_ASSERT(!ret,
  522. "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
  523. hva, gpa);
  524. ctr += npages;
  525. gpa += npages * guest_page_size;
  526. }
  527. TEST_ASSERT(ctr == count,
  528. "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
  529. }
  530. static void test_memslot_map_unmap_check(struct vm_data *data,
  531. uint64_t offsp, uint64_t valexp)
  532. {
  533. uint64_t gpa;
  534. uint64_t *val;
  535. uint32_t guest_page_size = data->vm->page_size;
  536. if (!map_unmap_verify)
  537. return;
  538. gpa = MEM_TEST_GPA + offsp * guest_page_size;
  539. val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
  540. TEST_ASSERT(*val == valexp,
  541. "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
  542. *val, valexp, gpa);
  543. *val = 0;
  544. }
  545. static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
  546. {
  547. uint32_t guest_page_size = data->vm->page_size;
  548. uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
  549. /*
  550. * Unmap the second half of the test area while guest writes to (maps)
  551. * the first half.
  552. */
  553. test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
  554. /*
  555. * Wait for the guest to finish writing the first half of the test
  556. * area, verify the written value on the first and the last page of
  557. * this area and then unmap it.
  558. * Meanwhile, the guest is writing to (mapping) the second half of
  559. * the test area.
  560. */
  561. host_perform_sync(sync);
  562. test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
  563. test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
  564. test_memslot_do_unmap(data, 0, guest_pages / 2);
  565. /*
  566. * Wait for the guest to finish writing the second half of the test
  567. * area and verify the written value on the first and the last page
  568. * of this area.
  569. * The area will be unmapped at the beginning of the next loop
  570. * iteration.
  571. * Meanwhile, the guest is writing to (mapping) the first half of
  572. * the test area.
  573. */
  574. host_perform_sync(sync);
  575. test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
  576. test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
  577. }
  578. static void test_memslot_unmap_loop_common(struct vm_data *data,
  579. struct sync_area *sync,
  580. uint64_t chunk)
  581. {
  582. uint32_t guest_page_size = data->vm->page_size;
  583. uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
  584. uint64_t ctr;
  585. /*
  586. * Wait for the guest to finish mapping page(s) in the first half
  587. * of the test area, verify the written value and then perform unmap
  588. * of this area.
  589. * Meanwhile, the guest is writing to (mapping) page(s) in the second
  590. * half of the test area.
  591. */
  592. host_perform_sync(sync);
  593. test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
  594. for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
  595. test_memslot_do_unmap(data, ctr, chunk);
  596. /* Likewise, but for the opposite host / guest areas */
  597. host_perform_sync(sync);
  598. test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
  599. for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
  600. test_memslot_do_unmap(data, ctr, chunk);
  601. }
  602. static void test_memslot_unmap_loop(struct vm_data *data,
  603. struct sync_area *sync)
  604. {
  605. uint32_t host_page_size = getpagesize();
  606. uint32_t guest_page_size = data->vm->page_size;
  607. uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
  608. 1 : host_page_size / guest_page_size;
  609. test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
  610. }
  611. static void test_memslot_unmap_loop_chunked(struct vm_data *data,
  612. struct sync_area *sync)
  613. {
  614. uint32_t guest_page_size = data->vm->page_size;
  615. uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
  616. test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
  617. }
  618. static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
  619. {
  620. uint64_t gptr;
  621. uint32_t guest_page_size = data->vm->page_size;
  622. for (gptr = MEM_TEST_GPA + guest_page_size / 2;
  623. gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
  624. *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
  625. host_perform_sync(sync);
  626. for (gptr = MEM_TEST_GPA;
  627. gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
  628. uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
  629. uint64_t val = *vptr;
  630. TEST_ASSERT(val == MEM_TEST_VAL_1,
  631. "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
  632. val, gptr);
  633. *vptr = 0;
  634. }
  635. host_perform_sync(sync);
  636. }
  637. struct test_data {
  638. const char *name;
  639. uint64_t mem_size;
  640. void (*guest_code)(void);
  641. bool (*prepare)(struct vm_data *data, struct sync_area *sync,
  642. uint64_t *maxslots);
  643. void (*loop)(struct vm_data *data, struct sync_area *sync);
  644. };
  645. static bool test_execute(int nslots, uint64_t *maxslots,
  646. unsigned int maxtime,
  647. const struct test_data *tdata,
  648. uint64_t *nloops,
  649. struct timespec *slot_runtime,
  650. struct timespec *guest_runtime)
  651. {
  652. uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
  653. struct vm_data *data;
  654. struct sync_area *sync;
  655. struct timespec tstart;
  656. bool ret = true;
  657. data = alloc_vm();
  658. if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
  659. mem_size, slot_runtime)) {
  660. ret = false;
  661. goto exit_free;
  662. }
  663. sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
  664. if (tdata->prepare &&
  665. !tdata->prepare(data, sync, maxslots)) {
  666. ret = false;
  667. goto exit_free;
  668. }
  669. launch_vm(data);
  670. clock_gettime(CLOCK_MONOTONIC, &tstart);
  671. let_guest_run(sync);
  672. while (1) {
  673. *guest_runtime = timespec_elapsed(tstart);
  674. if (guest_runtime->tv_sec >= maxtime)
  675. break;
  676. tdata->loop(data, sync);
  677. (*nloops)++;
  678. }
  679. make_guest_exit(sync);
  680. wait_guest_exit(data);
  681. exit_free:
  682. free_vm(data);
  683. return ret;
  684. }
  685. static const struct test_data tests[] = {
  686. {
  687. .name = "map",
  688. .mem_size = MEM_SIZE_MAP,
  689. .guest_code = guest_code_test_memslot_map,
  690. .loop = test_memslot_map_loop,
  691. },
  692. {
  693. .name = "unmap",
  694. .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
  695. .guest_code = guest_code_test_memslot_unmap,
  696. .loop = test_memslot_unmap_loop,
  697. },
  698. {
  699. .name = "unmap chunked",
  700. .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
  701. .guest_code = guest_code_test_memslot_unmap,
  702. .loop = test_memslot_unmap_loop_chunked,
  703. },
  704. {
  705. .name = "move active area",
  706. .guest_code = guest_code_test_memslot_move,
  707. .prepare = test_memslot_move_prepare_active,
  708. .loop = test_memslot_move_loop,
  709. },
  710. {
  711. .name = "move inactive area",
  712. .guest_code = guest_code_test_memslot_move,
  713. .prepare = test_memslot_move_prepare_inactive,
  714. .loop = test_memslot_move_loop,
  715. },
  716. {
  717. .name = "RW",
  718. .guest_code = guest_code_test_memslot_rw,
  719. .loop = test_memslot_rw_loop
  720. },
  721. };
  722. #define NTESTS ARRAY_SIZE(tests)
  723. struct test_args {
  724. int tfirst;
  725. int tlast;
  726. int nslots;
  727. int seconds;
  728. int runs;
  729. };
  730. static void help(char *name, struct test_args *targs)
  731. {
  732. int ctr;
  733. pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
  734. name);
  735. pr_info(" -h: print this help screen.\n");
  736. pr_info(" -v: enable verbose mode (not for benchmarking).\n");
  737. pr_info(" -d: enable extra debug checks.\n");
  738. pr_info(" -q: Disable memslot zap quirk during memslot move.\n");
  739. pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
  740. targs->nslots);
  741. pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
  742. targs->tfirst, NTESTS - 1);
  743. pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
  744. targs->tlast, NTESTS - 1);
  745. pr_info(" -l: specify the test length in seconds (currently: %i)\n",
  746. targs->seconds);
  747. pr_info(" -r: specify the number of runs per test (currently: %i)\n",
  748. targs->runs);
  749. pr_info("\nAvailable tests:\n");
  750. for (ctr = 0; ctr < NTESTS; ctr++)
  751. pr_info("%d: %s\n", ctr, tests[ctr].name);
  752. }
  753. static bool check_memory_sizes(void)
  754. {
  755. uint32_t host_page_size = getpagesize();
  756. uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
  757. if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
  758. pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
  759. host_page_size, guest_page_size);
  760. return false;
  761. }
  762. if (MEM_SIZE % guest_page_size ||
  763. MEM_TEST_SIZE % guest_page_size) {
  764. pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
  765. return false;
  766. }
  767. if (MEM_SIZE_MAP % guest_page_size ||
  768. MEM_TEST_MAP_SIZE % guest_page_size ||
  769. (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 ||
  770. (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
  771. pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
  772. return false;
  773. }
  774. if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE ||
  775. MEM_TEST_UNMAP_SIZE % guest_page_size ||
  776. (MEM_TEST_UNMAP_SIZE / guest_page_size) %
  777. (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
  778. pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
  779. return false;
  780. }
  781. return true;
  782. }
  783. static bool parse_args(int argc, char *argv[],
  784. struct test_args *targs)
  785. {
  786. uint32_t max_mem_slots;
  787. int opt;
  788. while ((opt = getopt(argc, argv, "hvdqs:f:e:l:r:")) != -1) {
  789. switch (opt) {
  790. case 'h':
  791. default:
  792. help(argv[0], targs);
  793. return false;
  794. case 'v':
  795. verbose = true;
  796. break;
  797. case 'd':
  798. map_unmap_verify = true;
  799. break;
  800. #ifdef __x86_64__
  801. case 'q':
  802. disable_slot_zap_quirk = true;
  803. TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) &
  804. KVM_X86_QUIRK_SLOT_ZAP_ALL);
  805. break;
  806. #endif
  807. case 's':
  808. targs->nslots = atoi_paranoid(optarg);
  809. if (targs->nslots <= 1 && targs->nslots != -1) {
  810. pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
  811. return false;
  812. }
  813. break;
  814. case 'f':
  815. targs->tfirst = atoi_non_negative("First test", optarg);
  816. break;
  817. case 'e':
  818. targs->tlast = atoi_non_negative("Last test", optarg);
  819. if (targs->tlast >= NTESTS) {
  820. pr_info("Last test to run has to be non-negative and less than %zu\n",
  821. NTESTS);
  822. return false;
  823. }
  824. break;
  825. case 'l':
  826. targs->seconds = atoi_non_negative("Test length", optarg);
  827. break;
  828. case 'r':
  829. targs->runs = atoi_positive("Runs per test", optarg);
  830. break;
  831. }
  832. }
  833. if (optind < argc) {
  834. help(argv[0], targs);
  835. return false;
  836. }
  837. if (targs->tfirst > targs->tlast) {
  838. pr_info("First test to run cannot be greater than the last test to run\n");
  839. return false;
  840. }
  841. max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
  842. if (max_mem_slots <= 1) {
  843. pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
  844. return false;
  845. }
  846. /* Memory slot 0 is reserved */
  847. if (targs->nslots == -1)
  848. targs->nslots = max_mem_slots - 1;
  849. else
  850. targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
  851. pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
  852. targs->nslots + 1);
  853. return true;
  854. }
  855. struct test_result {
  856. struct timespec slot_runtime, guest_runtime, iter_runtime;
  857. int64_t slottimens, runtimens;
  858. uint64_t nloops;
  859. };
  860. static bool test_loop(const struct test_data *data,
  861. const struct test_args *targs,
  862. struct test_result *rbestslottime,
  863. struct test_result *rbestruntime)
  864. {
  865. uint64_t maxslots;
  866. struct test_result result = {};
  867. if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
  868. &result.nloops,
  869. &result.slot_runtime, &result.guest_runtime)) {
  870. if (maxslots)
  871. pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
  872. maxslots);
  873. else
  874. pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
  875. return false;
  876. }
  877. pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
  878. result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
  879. result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
  880. if (!result.nloops) {
  881. pr_info("No full loops done - too short test time or system too loaded?\n");
  882. return true;
  883. }
  884. result.iter_runtime = timespec_div(result.guest_runtime,
  885. result.nloops);
  886. pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
  887. result.nloops,
  888. result.iter_runtime.tv_sec,
  889. result.iter_runtime.tv_nsec);
  890. result.slottimens = timespec_to_ns(result.slot_runtime);
  891. result.runtimens = timespec_to_ns(result.iter_runtime);
  892. /*
  893. * Only rank the slot setup time for tests using the whole test memory
  894. * area so they are comparable
  895. */
  896. if (!data->mem_size &&
  897. (!rbestslottime->slottimens ||
  898. result.slottimens < rbestslottime->slottimens))
  899. *rbestslottime = result;
  900. if (!rbestruntime->runtimens ||
  901. result.runtimens < rbestruntime->runtimens)
  902. *rbestruntime = result;
  903. return true;
  904. }
  905. int main(int argc, char *argv[])
  906. {
  907. struct test_args targs = {
  908. .tfirst = 0,
  909. .tlast = NTESTS - 1,
  910. .nslots = -1,
  911. .seconds = 5,
  912. .runs = 1,
  913. };
  914. struct test_result rbestslottime = {};
  915. int tctr;
  916. if (!check_memory_sizes())
  917. return -1;
  918. if (!parse_args(argc, argv, &targs))
  919. return -1;
  920. for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
  921. const struct test_data *data = &tests[tctr];
  922. unsigned int runctr;
  923. struct test_result rbestruntime = {};
  924. if (tctr > targs.tfirst)
  925. pr_info("\n");
  926. pr_info("Testing %s performance with %i runs, %d seconds each\n",
  927. data->name, targs.runs, targs.seconds);
  928. for (runctr = 0; runctr < targs.runs; runctr++)
  929. if (!test_loop(data, &targs,
  930. &rbestslottime, &rbestruntime))
  931. break;
  932. if (rbestruntime.runtimens)
  933. pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
  934. rbestruntime.iter_runtime.tv_sec,
  935. rbestruntime.iter_runtime.tv_nsec,
  936. rbestruntime.nloops);
  937. }
  938. if (rbestslottime.slottimens)
  939. pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
  940. rbestslottime.slot_runtime.tv_sec,
  941. rbestslottime.slot_runtime.tv_nsec);
  942. return 0;
  943. }