guest_memfd_test.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright Intel Corporation, 2023
  4. *
  5. * Author: Chao Peng <chao.p.peng@linux.intel.com>
  6. */
  7. #include <stdlib.h>
  8. #include <string.h>
  9. #include <unistd.h>
  10. #include <errno.h>
  11. #include <stdio.h>
  12. #include <fcntl.h>
  13. #include <linux/bitmap.h>
  14. #include <linux/falloc.h>
  15. #include <linux/sizes.h>
  16. #include <sys/mman.h>
  17. #include <sys/types.h>
  18. #include <sys/stat.h>
  19. #include "kvm_util.h"
  20. #include "numaif.h"
  21. #include "test_util.h"
  22. #include "ucall_common.h"
  23. static size_t page_size;
  24. static void test_file_read_write(int fd, size_t total_size)
  25. {
  26. char buf[64];
  27. TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
  28. "read on a guest_mem fd should fail");
  29. TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
  30. "write on a guest_mem fd should fail");
  31. TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
  32. "pread on a guest_mem fd should fail");
  33. TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
  34. "pwrite on a guest_mem fd should fail");
  35. }
  36. static void test_mmap_cow(int fd, size_t size)
  37. {
  38. void *mem;
  39. mem = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
  40. TEST_ASSERT(mem == MAP_FAILED, "Copy-on-write not allowed by guest_memfd.");
  41. }
  42. static void test_mmap_supported(int fd, size_t total_size)
  43. {
  44. const char val = 0xaa;
  45. char *mem;
  46. size_t i;
  47. int ret;
  48. mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  49. memset(mem, val, total_size);
  50. for (i = 0; i < total_size; i++)
  51. TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
  52. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0,
  53. page_size);
  54. TEST_ASSERT(!ret, "fallocate the first page should succeed.");
  55. for (i = 0; i < page_size; i++)
  56. TEST_ASSERT_EQ(READ_ONCE(mem[i]), 0x00);
  57. for (; i < total_size; i++)
  58. TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
  59. memset(mem, val, page_size);
  60. for (i = 0; i < total_size; i++)
  61. TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
  62. kvm_munmap(mem, total_size);
  63. }
  64. static void test_mbind(int fd, size_t total_size)
  65. {
  66. const unsigned long nodemask_0 = 1; /* nid: 0 */
  67. unsigned long nodemask = 0;
  68. unsigned long maxnode = BITS_PER_TYPE(nodemask);
  69. int policy;
  70. char *mem;
  71. int ret;
  72. if (!is_multi_numa_node_system())
  73. return;
  74. mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  75. /* Test MPOL_INTERLEAVE policy */
  76. kvm_mbind(mem, page_size * 2, MPOL_INTERLEAVE, &nodemask_0, maxnode, 0);
  77. kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
  78. TEST_ASSERT(policy == MPOL_INTERLEAVE && nodemask == nodemask_0,
  79. "Wanted MPOL_INTERLEAVE (%u) and nodemask 0x%lx, got %u and 0x%lx",
  80. MPOL_INTERLEAVE, nodemask_0, policy, nodemask);
  81. /* Test basic MPOL_BIND policy */
  82. kvm_mbind(mem + page_size * 2, page_size * 2, MPOL_BIND, &nodemask_0, maxnode, 0);
  83. kvm_get_mempolicy(&policy, &nodemask, maxnode, mem + page_size * 2, MPOL_F_ADDR);
  84. TEST_ASSERT(policy == MPOL_BIND && nodemask == nodemask_0,
  85. "Wanted MPOL_BIND (%u) and nodemask 0x%lx, got %u and 0x%lx",
  86. MPOL_BIND, nodemask_0, policy, nodemask);
  87. /* Test MPOL_DEFAULT policy */
  88. kvm_mbind(mem, total_size, MPOL_DEFAULT, NULL, 0, 0);
  89. kvm_get_mempolicy(&policy, &nodemask, maxnode, mem, MPOL_F_ADDR);
  90. TEST_ASSERT(policy == MPOL_DEFAULT && !nodemask,
  91. "Wanted MPOL_DEFAULT (%u) and nodemask 0x0, got %u and 0x%lx",
  92. MPOL_DEFAULT, policy, nodemask);
  93. /* Test with invalid policy */
  94. ret = mbind(mem, page_size, 999, &nodemask_0, maxnode, 0);
  95. TEST_ASSERT(ret == -1 && errno == EINVAL,
  96. "mbind with invalid policy should fail with EINVAL");
  97. kvm_munmap(mem, total_size);
  98. }
  99. static void test_numa_allocation(int fd, size_t total_size)
  100. {
  101. unsigned long node0_mask = 1; /* Node 0 */
  102. unsigned long node1_mask = 2; /* Node 1 */
  103. unsigned long maxnode = 8;
  104. void *pages[4];
  105. int status[4];
  106. char *mem;
  107. int i;
  108. if (!is_multi_numa_node_system())
  109. return;
  110. mem = kvm_mmap(total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  111. for (i = 0; i < 4; i++)
  112. pages[i] = (char *)mem + page_size * i;
  113. /* Set NUMA policy after allocation */
  114. memset(mem, 0xaa, page_size);
  115. kvm_mbind(pages[0], page_size, MPOL_BIND, &node0_mask, maxnode, 0);
  116. kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, page_size);
  117. /* Set NUMA policy before allocation */
  118. kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
  119. kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
  120. memset(mem, 0xaa, total_size);
  121. /* Validate if pages are allocated on specified NUMA nodes */
  122. kvm_move_pages(0, 4, pages, NULL, status, 0);
  123. TEST_ASSERT(status[0] == 1, "Expected page 0 on node 1, got it on node %d", status[0]);
  124. TEST_ASSERT(status[1] == 1, "Expected page 1 on node 1, got it on node %d", status[1]);
  125. TEST_ASSERT(status[2] == 0, "Expected page 2 on node 0, got it on node %d", status[2]);
  126. TEST_ASSERT(status[3] == 0, "Expected page 3 on node 0, got it on node %d", status[3]);
  127. /* Punch hole for all pages */
  128. kvm_fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, total_size);
  129. /* Change NUMA policy nodes and reallocate */
  130. kvm_mbind(pages[0], page_size * 2, MPOL_BIND, &node0_mask, maxnode, 0);
  131. kvm_mbind(pages[2], page_size * 2, MPOL_BIND, &node1_mask, maxnode, 0);
  132. memset(mem, 0xaa, total_size);
  133. kvm_move_pages(0, 4, pages, NULL, status, 0);
  134. TEST_ASSERT(status[0] == 0, "Expected page 0 on node 0, got it on node %d", status[0]);
  135. TEST_ASSERT(status[1] == 0, "Expected page 1 on node 0, got it on node %d", status[1]);
  136. TEST_ASSERT(status[2] == 1, "Expected page 2 on node 1, got it on node %d", status[2]);
  137. TEST_ASSERT(status[3] == 1, "Expected page 3 on node 1, got it on node %d", status[3]);
  138. kvm_munmap(mem, total_size);
  139. }
  140. static void test_fault_sigbus(int fd, size_t accessible_size, size_t map_size)
  141. {
  142. const char val = 0xaa;
  143. char *mem;
  144. size_t i;
  145. mem = kvm_mmap(map_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  146. TEST_EXPECT_SIGBUS(memset(mem, val, map_size));
  147. TEST_EXPECT_SIGBUS((void)READ_ONCE(mem[accessible_size]));
  148. for (i = 0; i < accessible_size; i++)
  149. TEST_ASSERT_EQ(READ_ONCE(mem[i]), val);
  150. kvm_munmap(mem, map_size);
  151. }
  152. static void test_fault_overflow(int fd, size_t total_size)
  153. {
  154. test_fault_sigbus(fd, total_size, total_size * 4);
  155. }
  156. static void test_fault_private(int fd, size_t total_size)
  157. {
  158. test_fault_sigbus(fd, 0, total_size);
  159. }
  160. static void test_mmap_not_supported(int fd, size_t total_size)
  161. {
  162. char *mem;
  163. mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  164. TEST_ASSERT_EQ(mem, MAP_FAILED);
  165. mem = mmap(NULL, total_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
  166. TEST_ASSERT_EQ(mem, MAP_FAILED);
  167. }
  168. static void test_file_size(int fd, size_t total_size)
  169. {
  170. struct stat sb;
  171. int ret;
  172. ret = fstat(fd, &sb);
  173. TEST_ASSERT(!ret, "fstat should succeed");
  174. TEST_ASSERT_EQ(sb.st_size, total_size);
  175. TEST_ASSERT_EQ(sb.st_blksize, page_size);
  176. }
  177. static void test_fallocate(int fd, size_t total_size)
  178. {
  179. int ret;
  180. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
  181. TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
  182. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  183. page_size - 1, page_size);
  184. TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
  185. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
  186. TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
  187. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
  188. TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
  189. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  190. total_size, page_size);
  191. TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
  192. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  193. total_size + page_size, page_size);
  194. TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
  195. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  196. page_size, page_size - 1);
  197. TEST_ASSERT(ret, "fallocate with unaligned size should fail");
  198. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  199. page_size, page_size);
  200. TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
  201. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
  202. TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
  203. }
  204. static void test_invalid_punch_hole(int fd, size_t total_size)
  205. {
  206. struct {
  207. off_t offset;
  208. off_t len;
  209. } testcases[] = {
  210. {0, 1},
  211. {0, page_size - 1},
  212. {0, page_size + 1},
  213. {1, 1},
  214. {1, page_size - 1},
  215. {1, page_size},
  216. {1, page_size + 1},
  217. {page_size, 1},
  218. {page_size, page_size - 1},
  219. {page_size, page_size + 1},
  220. };
  221. int ret, i;
  222. for (i = 0; i < ARRAY_SIZE(testcases); i++) {
  223. ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
  224. testcases[i].offset, testcases[i].len);
  225. TEST_ASSERT(ret == -1 && errno == EINVAL,
  226. "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
  227. testcases[i].offset, testcases[i].len);
  228. }
  229. }
  230. static void test_create_guest_memfd_invalid_sizes(struct kvm_vm *vm,
  231. uint64_t guest_memfd_flags)
  232. {
  233. size_t size;
  234. int fd;
  235. for (size = 1; size < page_size; size++) {
  236. fd = __vm_create_guest_memfd(vm, size, guest_memfd_flags);
  237. TEST_ASSERT(fd < 0 && errno == EINVAL,
  238. "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
  239. size);
  240. }
  241. }
  242. static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
  243. {
  244. int fd1, fd2, ret;
  245. struct stat st1, st2;
  246. fd1 = __vm_create_guest_memfd(vm, page_size, 0);
  247. TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
  248. ret = fstat(fd1, &st1);
  249. TEST_ASSERT(ret != -1, "memfd fstat should succeed");
  250. TEST_ASSERT(st1.st_size == page_size, "memfd st_size should match requested size");
  251. fd2 = __vm_create_guest_memfd(vm, page_size * 2, 0);
  252. TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
  253. ret = fstat(fd2, &st2);
  254. TEST_ASSERT(ret != -1, "memfd fstat should succeed");
  255. TEST_ASSERT(st2.st_size == page_size * 2, "second memfd st_size should match requested size");
  256. ret = fstat(fd1, &st1);
  257. TEST_ASSERT(ret != -1, "memfd fstat should succeed");
  258. TEST_ASSERT(st1.st_size == page_size, "first memfd st_size should still match requested size");
  259. TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
  260. close(fd2);
  261. close(fd1);
  262. }
  263. static void test_guest_memfd_flags(struct kvm_vm *vm)
  264. {
  265. uint64_t valid_flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
  266. uint64_t flag;
  267. int fd;
  268. for (flag = BIT(0); flag; flag <<= 1) {
  269. fd = __vm_create_guest_memfd(vm, page_size, flag);
  270. if (flag & valid_flags) {
  271. TEST_ASSERT(fd >= 0,
  272. "guest_memfd() with flag '0x%lx' should succeed",
  273. flag);
  274. close(fd);
  275. } else {
  276. TEST_ASSERT(fd < 0 && errno == EINVAL,
  277. "guest_memfd() with flag '0x%lx' should fail with EINVAL",
  278. flag);
  279. }
  280. }
  281. }
  282. #define gmem_test(__test, __vm, __flags) \
  283. do { \
  284. int fd = vm_create_guest_memfd(__vm, page_size * 4, __flags); \
  285. \
  286. test_##__test(fd, page_size * 4); \
  287. close(fd); \
  288. } while (0)
  289. static void __test_guest_memfd(struct kvm_vm *vm, uint64_t flags)
  290. {
  291. test_create_guest_memfd_multiple(vm);
  292. test_create_guest_memfd_invalid_sizes(vm, flags);
  293. gmem_test(file_read_write, vm, flags);
  294. if (flags & GUEST_MEMFD_FLAG_MMAP) {
  295. if (flags & GUEST_MEMFD_FLAG_INIT_SHARED) {
  296. gmem_test(mmap_supported, vm, flags);
  297. gmem_test(fault_overflow, vm, flags);
  298. gmem_test(numa_allocation, vm, flags);
  299. } else {
  300. gmem_test(fault_private, vm, flags);
  301. }
  302. gmem_test(mmap_cow, vm, flags);
  303. gmem_test(mbind, vm, flags);
  304. } else {
  305. gmem_test(mmap_not_supported, vm, flags);
  306. }
  307. gmem_test(file_size, vm, flags);
  308. gmem_test(fallocate, vm, flags);
  309. gmem_test(invalid_punch_hole, vm, flags);
  310. }
  311. static void test_guest_memfd(unsigned long vm_type)
  312. {
  313. struct kvm_vm *vm = vm_create_barebones_type(vm_type);
  314. uint64_t flags;
  315. test_guest_memfd_flags(vm);
  316. __test_guest_memfd(vm, 0);
  317. flags = vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS);
  318. if (flags & GUEST_MEMFD_FLAG_MMAP)
  319. __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP);
  320. /* MMAP should always be supported if INIT_SHARED is supported. */
  321. if (flags & GUEST_MEMFD_FLAG_INIT_SHARED)
  322. __test_guest_memfd(vm, GUEST_MEMFD_FLAG_MMAP |
  323. GUEST_MEMFD_FLAG_INIT_SHARED);
  324. kvm_vm_free(vm);
  325. }
  326. static void guest_code(uint8_t *mem, uint64_t size)
  327. {
  328. size_t i;
  329. for (i = 0; i < size; i++)
  330. __GUEST_ASSERT(mem[i] == 0xaa,
  331. "Guest expected 0xaa at offset %lu, got 0x%x", i, mem[i]);
  332. memset(mem, 0xff, size);
  333. GUEST_DONE();
  334. }
  335. static void test_guest_memfd_guest(void)
  336. {
  337. /*
  338. * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
  339. * the guest's code, stack, and page tables, and low memory contains
  340. * the PCI hole and other MMIO regions that need to be avoided.
  341. */
  342. const uint64_t gpa = SZ_4G;
  343. const int slot = 1;
  344. struct kvm_vcpu *vcpu;
  345. struct kvm_vm *vm;
  346. uint8_t *mem;
  347. size_t size;
  348. int fd, i;
  349. if (!kvm_check_cap(KVM_CAP_GUEST_MEMFD_FLAGS))
  350. return;
  351. vm = __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, &vcpu, 1, guest_code);
  352. TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_MMAP,
  353. "Default VM type should support MMAP, supported flags = 0x%x",
  354. vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
  355. TEST_ASSERT(vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS) & GUEST_MEMFD_FLAG_INIT_SHARED,
  356. "Default VM type should support INIT_SHARED, supported flags = 0x%x",
  357. vm_check_cap(vm, KVM_CAP_GUEST_MEMFD_FLAGS));
  358. size = vm->page_size;
  359. fd = vm_create_guest_memfd(vm, size, GUEST_MEMFD_FLAG_MMAP |
  360. GUEST_MEMFD_FLAG_INIT_SHARED);
  361. vm_set_user_memory_region2(vm, slot, KVM_MEM_GUEST_MEMFD, gpa, size, NULL, fd, 0);
  362. mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  363. memset(mem, 0xaa, size);
  364. kvm_munmap(mem, size);
  365. virt_pg_map(vm, gpa, gpa);
  366. vcpu_args_set(vcpu, 2, gpa, size);
  367. vcpu_run(vcpu);
  368. TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
  369. mem = kvm_mmap(size, PROT_READ | PROT_WRITE, MAP_SHARED, fd);
  370. for (i = 0; i < size; i++)
  371. TEST_ASSERT_EQ(mem[i], 0xff);
  372. close(fd);
  373. kvm_vm_free(vm);
  374. }
  375. int main(int argc, char *argv[])
  376. {
  377. unsigned long vm_types, vm_type;
  378. TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
  379. page_size = getpagesize();
  380. /*
  381. * Not all architectures support KVM_CAP_VM_TYPES. However, those that
  382. * support guest_memfd have that support for the default VM type.
  383. */
  384. vm_types = kvm_check_cap(KVM_CAP_VM_TYPES);
  385. if (!vm_types)
  386. vm_types = BIT(VM_TYPE_DEFAULT);
  387. for_each_set_bit(vm_type, &vm_types, BITS_PER_TYPE(vm_types))
  388. test_guest_memfd(vm_type);
  389. test_guest_memfd_guest();
  390. }