mount-notify_test_ns.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. // Copyright (c) 2025 Miklos Szeredi <miklos@szeredi.hu>
  3. #define _GNU_SOURCE
  4. // Needed for linux/fanotify.h
  5. typedef struct {
  6. int val[2];
  7. } __kernel_fsid_t;
  8. #define __kernel_fsid_t __kernel_fsid_t
  9. #include <fcntl.h>
  10. #include <sched.h>
  11. #include <stdio.h>
  12. #include <string.h>
  13. #include <sys/stat.h>
  14. #include <sys/mount.h>
  15. #include <unistd.h>
  16. #include <sys/syscall.h>
  17. #include <sys/fanotify.h>
  18. #include "kselftest_harness.h"
  19. #include "../statmount/statmount.h"
  20. #include "../utils.h"
  21. static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
  22. static const int mark_types[] = {
  23. FAN_MARK_FILESYSTEM,
  24. FAN_MARK_MOUNT,
  25. FAN_MARK_INODE
  26. };
  27. static const int mark_cmds[] = {
  28. FAN_MARK_ADD,
  29. FAN_MARK_REMOVE,
  30. FAN_MARK_FLUSH
  31. };
  32. #define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
  33. FIXTURE(fanotify) {
  34. int fan_fd[NUM_FAN_FDS];
  35. char buf[256];
  36. unsigned int rem;
  37. void *next;
  38. char root_mntpoint[sizeof(root_mntpoint_templ)];
  39. int orig_root;
  40. int orig_ns_fd;
  41. int ns_fd;
  42. uint64_t root_id;
  43. };
  44. FIXTURE_SETUP(fanotify)
  45. {
  46. int i, ret;
  47. self->orig_ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
  48. ASSERT_GE(self->orig_ns_fd, 0);
  49. ret = setup_userns();
  50. ASSERT_EQ(ret, 0);
  51. self->ns_fd = open("/proc/self/ns/mnt", O_RDONLY);
  52. ASSERT_GE(self->ns_fd, 0);
  53. strcpy(self->root_mntpoint, root_mntpoint_templ);
  54. ASSERT_NE(mkdtemp(self->root_mntpoint), NULL);
  55. self->orig_root = open("/", O_PATH | O_CLOEXEC);
  56. ASSERT_GE(self->orig_root, 0);
  57. ASSERT_EQ(mount("tmpfs", self->root_mntpoint, "tmpfs", 0, NULL), 0);
  58. ASSERT_EQ(chroot(self->root_mntpoint), 0);
  59. ASSERT_EQ(chdir("/"), 0);
  60. ASSERT_EQ(mkdir("a", 0700), 0);
  61. ASSERT_EQ(mkdir("b", 0700), 0);
  62. self->root_id = get_unique_mnt_id("/");
  63. ASSERT_NE(self->root_id, 0);
  64. for (i = 0; i < NUM_FAN_FDS; i++) {
  65. int fan_fd = fanotify_init(FAN_REPORT_FID, 0);
  66. // Verify that watching tmpfs mounted inside userns is allowed
  67. ret = fanotify_mark(fan_fd, FAN_MARK_ADD | mark_types[i],
  68. FAN_OPEN, AT_FDCWD, "/");
  69. ASSERT_EQ(ret, 0);
  70. // ...but watching entire orig root filesystem is not allowed
  71. ret = fanotify_mark(fan_fd, FAN_MARK_ADD | FAN_MARK_FILESYSTEM,
  72. FAN_OPEN, self->orig_root, ".");
  73. ASSERT_NE(ret, 0);
  74. close(fan_fd);
  75. self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
  76. 0);
  77. ASSERT_GE(self->fan_fd[i], 0);
  78. // Verify that watching mntns where group was created is allowed
  79. ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
  80. FAN_MARK_MNTNS,
  81. FAN_MNT_ATTACH | FAN_MNT_DETACH,
  82. self->ns_fd, NULL);
  83. ASSERT_EQ(ret, 0);
  84. // ...but watching orig mntns is not allowed
  85. ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
  86. FAN_MARK_MNTNS,
  87. FAN_MNT_ATTACH | FAN_MNT_DETACH,
  88. self->orig_ns_fd, NULL);
  89. ASSERT_NE(ret, 0);
  90. // On fd[0] we do an extra ADD that changes nothing.
  91. // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
  92. ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
  93. FAN_MARK_MNTNS,
  94. FAN_MNT_ATTACH | FAN_MNT_DETACH,
  95. self->ns_fd, NULL);
  96. ASSERT_EQ(ret, 0);
  97. }
  98. self->rem = 0;
  99. }
  100. FIXTURE_TEARDOWN(fanotify)
  101. {
  102. int i;
  103. ASSERT_EQ(self->rem, 0);
  104. for (i = 0; i < NUM_FAN_FDS; i++)
  105. close(self->fan_fd[i]);
  106. ASSERT_EQ(fchdir(self->orig_root), 0);
  107. ASSERT_EQ(chroot("."), 0);
  108. EXPECT_EQ(umount2(self->root_mntpoint, MNT_DETACH), 0);
  109. EXPECT_EQ(chdir(self->root_mntpoint), 0);
  110. EXPECT_EQ(chdir("/"), 0);
  111. EXPECT_EQ(rmdir(self->root_mntpoint), 0);
  112. }
  113. static uint64_t expect_notify(struct __test_metadata *const _metadata,
  114. FIXTURE_DATA(fanotify) *self,
  115. uint64_t *mask)
  116. {
  117. struct fanotify_event_metadata *meta;
  118. struct fanotify_event_info_mnt *mnt;
  119. unsigned int thislen;
  120. if (!self->rem) {
  121. ssize_t len;
  122. int i;
  123. for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
  124. len = read(self->fan_fd[i], self->buf,
  125. sizeof(self->buf));
  126. if (i > 0) {
  127. // Groups 1,2 should get EAGAIN
  128. ASSERT_EQ(len, -1);
  129. ASSERT_EQ(errno, EAGAIN);
  130. } else {
  131. // Group 0 should get events
  132. ASSERT_GT(len, 0);
  133. }
  134. }
  135. self->rem = len;
  136. self->next = (void *) self->buf;
  137. }
  138. meta = self->next;
  139. ASSERT_TRUE(FAN_EVENT_OK(meta, self->rem));
  140. thislen = meta->event_len;
  141. self->rem -= thislen;
  142. self->next += thislen;
  143. *mask = meta->mask;
  144. thislen -= sizeof(*meta);
  145. mnt = ((void *) meta) + meta->event_len - thislen;
  146. ASSERT_EQ(thislen, sizeof(*mnt));
  147. return mnt->mnt_id;
  148. }
  149. static void expect_notify_n(struct __test_metadata *const _metadata,
  150. FIXTURE_DATA(fanotify) *self,
  151. unsigned int n, uint64_t mask[], uint64_t mnts[])
  152. {
  153. unsigned int i;
  154. for (i = 0; i < n; i++)
  155. mnts[i] = expect_notify(_metadata, self, &mask[i]);
  156. }
  157. static uint64_t expect_notify_mask(struct __test_metadata *const _metadata,
  158. FIXTURE_DATA(fanotify) *self,
  159. uint64_t expect_mask)
  160. {
  161. uint64_t mntid, mask;
  162. mntid = expect_notify(_metadata, self, &mask);
  163. ASSERT_EQ(expect_mask, mask);
  164. return mntid;
  165. }
  166. static void expect_notify_mask_n(struct __test_metadata *const _metadata,
  167. FIXTURE_DATA(fanotify) *self,
  168. uint64_t mask, unsigned int n, uint64_t mnts[])
  169. {
  170. unsigned int i;
  171. for (i = 0; i < n; i++)
  172. mnts[i] = expect_notify_mask(_metadata, self, mask);
  173. }
  174. static void verify_mount_ids(struct __test_metadata *const _metadata,
  175. const uint64_t list1[], const uint64_t list2[],
  176. size_t num)
  177. {
  178. unsigned int i, j;
  179. // Check that neither list has any duplicates
  180. for (i = 0; i < num; i++) {
  181. for (j = 0; j < num; j++) {
  182. if (i != j) {
  183. ASSERT_NE(list1[i], list1[j]);
  184. ASSERT_NE(list2[i], list2[j]);
  185. }
  186. }
  187. }
  188. // Check that all list1 memebers can be found in list2. Together with
  189. // the above it means that the list1 and list2 represent the same sets.
  190. for (i = 0; i < num; i++) {
  191. for (j = 0; j < num; j++) {
  192. if (list1[i] == list2[j])
  193. break;
  194. }
  195. ASSERT_NE(j, num);
  196. }
  197. }
  198. static void check_mounted(struct __test_metadata *const _metadata,
  199. const uint64_t mnts[], size_t num)
  200. {
  201. ssize_t ret;
  202. uint64_t *list;
  203. list = malloc((num + 1) * sizeof(list[0]));
  204. ASSERT_NE(list, NULL);
  205. ret = listmount(LSMT_ROOT, 0, 0, list, num + 1, 0);
  206. ASSERT_EQ(ret, num);
  207. verify_mount_ids(_metadata, mnts, list, num);
  208. free(list);
  209. }
  210. static void setup_mount_tree(struct __test_metadata *const _metadata,
  211. int log2_num)
  212. {
  213. int ret, i;
  214. ret = mount("", "/", NULL, MS_SHARED, NULL);
  215. ASSERT_EQ(ret, 0);
  216. for (i = 0; i < log2_num; i++) {
  217. ret = mount("/", "/", NULL, MS_BIND, NULL);
  218. ASSERT_EQ(ret, 0);
  219. }
  220. }
  221. TEST_F(fanotify, bind)
  222. {
  223. int ret;
  224. uint64_t mnts[2] = { self->root_id };
  225. ret = mount("/", "/", NULL, MS_BIND, NULL);
  226. ASSERT_EQ(ret, 0);
  227. mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  228. ASSERT_NE(mnts[0], mnts[1]);
  229. check_mounted(_metadata, mnts, 2);
  230. // Cleanup
  231. uint64_t detach_id;
  232. ret = umount("/");
  233. ASSERT_EQ(ret, 0);
  234. detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
  235. ASSERT_EQ(detach_id, mnts[1]);
  236. check_mounted(_metadata, mnts, 1);
  237. }
  238. TEST_F(fanotify, move)
  239. {
  240. int ret;
  241. uint64_t mnts[2] = { self->root_id };
  242. uint64_t move_id;
  243. ret = mount("/", "/a", NULL, MS_BIND, NULL);
  244. ASSERT_EQ(ret, 0);
  245. mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  246. ASSERT_NE(mnts[0], mnts[1]);
  247. check_mounted(_metadata, mnts, 2);
  248. ret = move_mount(AT_FDCWD, "/a", AT_FDCWD, "/b", 0);
  249. ASSERT_EQ(ret, 0);
  250. move_id = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH);
  251. ASSERT_EQ(move_id, mnts[1]);
  252. // Cleanup
  253. ret = umount("/b");
  254. ASSERT_EQ(ret, 0);
  255. check_mounted(_metadata, mnts, 1);
  256. }
  257. TEST_F(fanotify, propagate)
  258. {
  259. const unsigned int log2_num = 4;
  260. const unsigned int num = (1 << log2_num);
  261. uint64_t mnts[num];
  262. setup_mount_tree(_metadata, log2_num);
  263. expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, num - 1, mnts + 1);
  264. mnts[0] = self->root_id;
  265. check_mounted(_metadata, mnts, num);
  266. // Cleanup
  267. int ret;
  268. uint64_t mnts2[num];
  269. ret = umount2("/", MNT_DETACH);
  270. ASSERT_EQ(ret, 0);
  271. ret = mount("", "/", NULL, MS_PRIVATE, NULL);
  272. ASSERT_EQ(ret, 0);
  273. mnts2[0] = self->root_id;
  274. expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, num - 1, mnts2 + 1);
  275. verify_mount_ids(_metadata, mnts, mnts2, num);
  276. check_mounted(_metadata, mnts, 1);
  277. }
  278. TEST_F(fanotify, fsmount)
  279. {
  280. int ret, fs, mnt;
  281. uint64_t mnts[2] = { self->root_id };
  282. fs = fsopen("tmpfs", 0);
  283. ASSERT_GE(fs, 0);
  284. ret = fsconfig(fs, FSCONFIG_CMD_CREATE, 0, 0, 0);
  285. ASSERT_EQ(ret, 0);
  286. mnt = fsmount(fs, 0, 0);
  287. ASSERT_GE(mnt, 0);
  288. close(fs);
  289. ret = move_mount(mnt, "", AT_FDCWD, "/a", MOVE_MOUNT_F_EMPTY_PATH);
  290. ASSERT_EQ(ret, 0);
  291. close(mnt);
  292. mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  293. ASSERT_NE(mnts[0], mnts[1]);
  294. check_mounted(_metadata, mnts, 2);
  295. // Cleanup
  296. uint64_t detach_id;
  297. ret = umount("/a");
  298. ASSERT_EQ(ret, 0);
  299. detach_id = expect_notify_mask(_metadata, self, FAN_MNT_DETACH);
  300. ASSERT_EQ(detach_id, mnts[1]);
  301. check_mounted(_metadata, mnts, 1);
  302. }
  303. TEST_F(fanotify, reparent)
  304. {
  305. uint64_t mnts[6] = { self->root_id };
  306. uint64_t dmnts[3];
  307. uint64_t masks[3];
  308. unsigned int i;
  309. int ret;
  310. // Create setup with a[1] -> b[2] propagation
  311. ret = mount("/", "/a", NULL, MS_BIND, NULL);
  312. ASSERT_EQ(ret, 0);
  313. ret = mount("", "/a", NULL, MS_SHARED, NULL);
  314. ASSERT_EQ(ret, 0);
  315. ret = mount("/a", "/b", NULL, MS_BIND, NULL);
  316. ASSERT_EQ(ret, 0);
  317. ret = mount("", "/b", NULL, MS_SLAVE, NULL);
  318. ASSERT_EQ(ret, 0);
  319. expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
  320. check_mounted(_metadata, mnts, 3);
  321. // Mount on a[3], which is propagated to b[4]
  322. ret = mount("/", "/a", NULL, MS_BIND, NULL);
  323. ASSERT_EQ(ret, 0);
  324. expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 3);
  325. check_mounted(_metadata, mnts, 5);
  326. // Mount on b[5], not propagated
  327. ret = mount("/", "/b", NULL, MS_BIND, NULL);
  328. ASSERT_EQ(ret, 0);
  329. mnts[5] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  330. check_mounted(_metadata, mnts, 6);
  331. // Umount a[3], which is propagated to b[4], but not b[5]
  332. // This will result in b[5] "falling" on b[2]
  333. ret = umount("/a");
  334. ASSERT_EQ(ret, 0);
  335. expect_notify_n(_metadata, self, 3, masks, dmnts);
  336. verify_mount_ids(_metadata, mnts + 3, dmnts, 3);
  337. for (i = 0; i < 3; i++) {
  338. if (dmnts[i] == mnts[5]) {
  339. ASSERT_EQ(masks[i], FAN_MNT_ATTACH | FAN_MNT_DETACH);
  340. } else {
  341. ASSERT_EQ(masks[i], FAN_MNT_DETACH);
  342. }
  343. }
  344. mnts[3] = mnts[5];
  345. check_mounted(_metadata, mnts, 4);
  346. // Cleanup
  347. ret = umount("/b");
  348. ASSERT_EQ(ret, 0);
  349. ret = umount("/a");
  350. ASSERT_EQ(ret, 0);
  351. ret = umount("/b");
  352. ASSERT_EQ(ret, 0);
  353. expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 3, dmnts);
  354. verify_mount_ids(_metadata, mnts + 1, dmnts, 3);
  355. check_mounted(_metadata, mnts, 1);
  356. }
  357. TEST_F(fanotify, rmdir)
  358. {
  359. uint64_t mnts[3] = { self->root_id };
  360. int ret;
  361. ret = mount("/", "/a", NULL, MS_BIND, NULL);
  362. ASSERT_EQ(ret, 0);
  363. ret = mount("/", "/a/b", NULL, MS_BIND, NULL);
  364. ASSERT_EQ(ret, 0);
  365. expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH, 2, mnts + 1);
  366. check_mounted(_metadata, mnts, 3);
  367. ret = chdir("/a");
  368. ASSERT_EQ(ret, 0);
  369. ret = fork();
  370. ASSERT_GE(ret, 0);
  371. if (ret == 0) {
  372. chdir("/");
  373. unshare(CLONE_NEWNS);
  374. mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
  375. umount2("/a", MNT_DETACH);
  376. // This triggers a detach in the other namespace
  377. rmdir("/a");
  378. exit(0);
  379. }
  380. wait(NULL);
  381. expect_notify_mask_n(_metadata, self, FAN_MNT_DETACH, 2, mnts + 1);
  382. check_mounted(_metadata, mnts, 1);
  383. // Cleanup
  384. ret = chdir("/");
  385. ASSERT_EQ(ret, 0);
  386. }
  387. TEST_F(fanotify, pivot_root)
  388. {
  389. uint64_t mnts[3] = { self->root_id };
  390. uint64_t mnts2[3];
  391. int ret;
  392. ret = mount("tmpfs", "/a", "tmpfs", 0, NULL);
  393. ASSERT_EQ(ret, 0);
  394. mnts[2] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  395. ret = mkdir("/a/new", 0700);
  396. ASSERT_EQ(ret, 0);
  397. ret = mkdir("/a/old", 0700);
  398. ASSERT_EQ(ret, 0);
  399. ret = mount("/a", "/a/new", NULL, MS_BIND, NULL);
  400. ASSERT_EQ(ret, 0);
  401. mnts[1] = expect_notify_mask(_metadata, self, FAN_MNT_ATTACH);
  402. check_mounted(_metadata, mnts, 3);
  403. ret = syscall(SYS_pivot_root, "/a/new", "/a/new/old");
  404. ASSERT_EQ(ret, 0);
  405. expect_notify_mask_n(_metadata, self, FAN_MNT_ATTACH | FAN_MNT_DETACH, 2, mnts2);
  406. verify_mount_ids(_metadata, mnts, mnts2, 2);
  407. check_mounted(_metadata, mnts, 3);
  408. // Cleanup
  409. ret = syscall(SYS_pivot_root, "/old", "/old/a/new");
  410. ASSERT_EQ(ret, 0);
  411. ret = umount("/a/new");
  412. ASSERT_EQ(ret, 0);
  413. ret = umount("/a");
  414. ASSERT_EQ(ret, 0);
  415. check_mounted(_metadata, mnts, 1);
  416. }
  417. TEST_HARNESS_MAIN