pnode.c 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * linux/fs/pnode.c
  4. *
  5. * (C) Copyright IBM Corporation 2005.
  6. * Author : Ram Pai (linuxram@us.ibm.com)
  7. */
  8. #include <linux/mnt_namespace.h>
  9. #include <linux/mount.h>
  10. #include <linux/fs.h>
  11. #include <linux/nsproxy.h>
  12. #include <uapi/linux/mount.h>
  13. #include "internal.h"
  14. #include "pnode.h"
  15. /* return the next shared peer mount of @p */
  16. static inline struct mount *next_peer(struct mount *p)
  17. {
  18. return list_entry(p->mnt_share.next, struct mount, mnt_share);
  19. }
  20. static inline struct mount *first_slave(struct mount *p)
  21. {
  22. return hlist_entry(p->mnt_slave_list.first, struct mount, mnt_slave);
  23. }
  24. static inline struct mount *next_slave(struct mount *p)
  25. {
  26. return hlist_entry(p->mnt_slave.next, struct mount, mnt_slave);
  27. }
  28. /* locks: namespace_shared && is_mounted(mnt) */
  29. static struct mount *get_peer_under_root(struct mount *mnt,
  30. struct mnt_namespace *ns,
  31. const struct path *root)
  32. {
  33. struct mount *m = mnt;
  34. do {
  35. /* Check the namespace first for optimization */
  36. if (m->mnt_ns == ns && is_path_reachable(m, m->mnt.mnt_root, root))
  37. return m;
  38. m = next_peer(m);
  39. } while (m != mnt);
  40. return NULL;
  41. }
  42. /*
  43. * Get ID of closest dominating peer group having a representative
  44. * under the given root.
  45. *
  46. * locks: namespace_shared
  47. */
  48. int get_dominating_id(struct mount *mnt, const struct path *root)
  49. {
  50. struct mount *m;
  51. for (m = mnt->mnt_master; m != NULL; m = m->mnt_master) {
  52. struct mount *d = get_peer_under_root(m, mnt->mnt_ns, root);
  53. if (d)
  54. return d->mnt_group_id;
  55. }
  56. return 0;
  57. }
  58. static inline bool will_be_unmounted(struct mount *m)
  59. {
  60. return m->mnt.mnt_flags & MNT_UMOUNT;
  61. }
  62. static void transfer_propagation(struct mount *mnt, struct mount *to)
  63. {
  64. struct hlist_node *p = NULL, *n;
  65. struct mount *m;
  66. hlist_for_each_entry_safe(m, n, &mnt->mnt_slave_list, mnt_slave) {
  67. m->mnt_master = to;
  68. if (!to)
  69. hlist_del_init(&m->mnt_slave);
  70. else
  71. p = &m->mnt_slave;
  72. }
  73. if (p)
  74. hlist_splice_init(&mnt->mnt_slave_list, p, &to->mnt_slave_list);
  75. }
  76. /*
  77. * EXCL[namespace_sem]
  78. */
  79. void change_mnt_propagation(struct mount *mnt, int type)
  80. {
  81. struct mount *m = mnt->mnt_master;
  82. if (type == MS_SHARED) {
  83. set_mnt_shared(mnt);
  84. return;
  85. }
  86. if (IS_MNT_SHARED(mnt)) {
  87. if (list_empty(&mnt->mnt_share)) {
  88. mnt_release_group_id(mnt);
  89. } else {
  90. m = next_peer(mnt);
  91. list_del_init(&mnt->mnt_share);
  92. mnt->mnt_group_id = 0;
  93. }
  94. CLEAR_MNT_SHARED(mnt);
  95. transfer_propagation(mnt, m);
  96. }
  97. hlist_del_init(&mnt->mnt_slave);
  98. if (type == MS_SLAVE) {
  99. mnt->mnt_master = m;
  100. if (m)
  101. hlist_add_head(&mnt->mnt_slave, &m->mnt_slave_list);
  102. } else {
  103. mnt->mnt_master = NULL;
  104. if (type == MS_UNBINDABLE)
  105. mnt->mnt_t_flags |= T_UNBINDABLE;
  106. else
  107. mnt->mnt_t_flags &= ~T_UNBINDABLE;
  108. }
  109. }
  110. static struct mount *trace_transfers(struct mount *m)
  111. {
  112. while (1) {
  113. struct mount *next = next_peer(m);
  114. if (next != m) {
  115. list_del_init(&m->mnt_share);
  116. m->mnt_group_id = 0;
  117. m->mnt_master = next;
  118. } else {
  119. if (IS_MNT_SHARED(m))
  120. mnt_release_group_id(m);
  121. next = m->mnt_master;
  122. }
  123. hlist_del_init(&m->mnt_slave);
  124. CLEAR_MNT_SHARED(m);
  125. SET_MNT_MARK(m);
  126. if (!next || !will_be_unmounted(next))
  127. return next;
  128. if (IS_MNT_MARKED(next))
  129. return next->mnt_master;
  130. m = next;
  131. }
  132. }
  133. static void set_destinations(struct mount *m, struct mount *master)
  134. {
  135. struct mount *next;
  136. while ((next = m->mnt_master) != master) {
  137. m->mnt_master = master;
  138. m = next;
  139. }
  140. }
  141. void bulk_make_private(struct list_head *set)
  142. {
  143. struct mount *m;
  144. list_for_each_entry(m, set, mnt_list)
  145. if (!IS_MNT_MARKED(m))
  146. set_destinations(m, trace_transfers(m));
  147. list_for_each_entry(m, set, mnt_list) {
  148. transfer_propagation(m, m->mnt_master);
  149. m->mnt_master = NULL;
  150. CLEAR_MNT_MARK(m);
  151. }
  152. }
  153. static struct mount *__propagation_next(struct mount *m,
  154. struct mount *origin)
  155. {
  156. while (1) {
  157. struct mount *master = m->mnt_master;
  158. if (master == origin->mnt_master) {
  159. struct mount *next = next_peer(m);
  160. return (next == origin) ? NULL : next;
  161. } else if (m->mnt_slave.next)
  162. return next_slave(m);
  163. /* back at master */
  164. m = master;
  165. }
  166. }
  167. /*
  168. * get the next mount in the propagation tree.
  169. * @m: the mount seen last
  170. * @origin: the original mount from where the tree walk initiated
  171. *
  172. * Note that peer groups form contiguous segments of slave lists.
  173. * We rely on that in get_source() to be able to find out if
  174. * vfsmount found while iterating with propagation_next() is
  175. * a peer of one we'd found earlier.
  176. */
  177. static struct mount *propagation_next(struct mount *m,
  178. struct mount *origin)
  179. {
  180. /* are there any slaves of this mount? */
  181. if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
  182. return first_slave(m);
  183. return __propagation_next(m, origin);
  184. }
  185. static struct mount *skip_propagation_subtree(struct mount *m,
  186. struct mount *origin)
  187. {
  188. /*
  189. * Advance m past everything that gets propagation from it.
  190. */
  191. struct mount *p = __propagation_next(m, origin);
  192. while (p && peers(m, p))
  193. p = __propagation_next(p, origin);
  194. return p;
  195. }
  196. static struct mount *next_group(struct mount *m, struct mount *origin)
  197. {
  198. while (1) {
  199. while (1) {
  200. struct mount *next;
  201. if (!IS_MNT_NEW(m) && !hlist_empty(&m->mnt_slave_list))
  202. return first_slave(m);
  203. next = next_peer(m);
  204. if (m->mnt_group_id == origin->mnt_group_id) {
  205. if (next == origin)
  206. return NULL;
  207. } else if (m->mnt_slave.next != &next->mnt_slave)
  208. break;
  209. m = next;
  210. }
  211. /* m is the last peer */
  212. while (1) {
  213. struct mount *master = m->mnt_master;
  214. if (m->mnt_slave.next)
  215. return next_slave(m);
  216. m = next_peer(master);
  217. if (master->mnt_group_id == origin->mnt_group_id)
  218. break;
  219. if (master->mnt_slave.next == &m->mnt_slave)
  220. break;
  221. m = master;
  222. }
  223. if (m == origin)
  224. return NULL;
  225. }
  226. }
  227. static bool need_secondary(struct mount *m, struct mountpoint *dest_mp)
  228. {
  229. /* skip ones added by this propagate_mnt() */
  230. if (IS_MNT_NEW(m))
  231. return false;
  232. /* skip if mountpoint isn't visible in m */
  233. if (!is_subdir(dest_mp->m_dentry, m->mnt.mnt_root))
  234. return false;
  235. /* skip if m is in the anon_ns */
  236. if (is_anon_ns(m->mnt_ns))
  237. return false;
  238. return true;
  239. }
  240. static struct mount *find_master(struct mount *m,
  241. struct mount *last_copy,
  242. struct mount *original)
  243. {
  244. struct mount *p;
  245. // ascend until there's a copy for something with the same master
  246. for (;;) {
  247. p = m->mnt_master;
  248. if (!p || IS_MNT_MARKED(p))
  249. break;
  250. m = p;
  251. }
  252. while (!peers(last_copy, original)) {
  253. struct mount *parent = last_copy->mnt_parent;
  254. if (parent->mnt_master == p) {
  255. if (!peers(parent, m))
  256. last_copy = last_copy->mnt_master;
  257. break;
  258. }
  259. last_copy = last_copy->mnt_master;
  260. }
  261. return last_copy;
  262. }
  263. /**
  264. * propagate_mnt() - create secondary copies for tree attachment
  265. * @dest_mnt: destination mount.
  266. * @dest_mp: destination mountpoint.
  267. * @source_mnt: source mount.
  268. * @tree_list: list of secondaries to be attached.
  269. *
  270. * Create secondary copies for attaching a tree with root @source_mnt
  271. * at mount @dest_mnt with mountpoint @dest_mp. Link all new mounts
  272. * into a propagation graph. Set mountpoints for all secondaries,
  273. * link their roots into @tree_list via ->mnt_hash.
  274. */
  275. int propagate_mnt(struct mount *dest_mnt, struct mountpoint *dest_mp,
  276. struct mount *source_mnt, struct hlist_head *tree_list)
  277. {
  278. struct mount *m, *n, *copy, *this;
  279. int err = 0, type;
  280. if (dest_mnt->mnt_master)
  281. SET_MNT_MARK(dest_mnt->mnt_master);
  282. /* iterate over peer groups, depth first */
  283. for (m = dest_mnt; m && !err; m = next_group(m, dest_mnt)) {
  284. if (m == dest_mnt) { // have one for dest_mnt itself
  285. copy = source_mnt;
  286. type = CL_MAKE_SHARED;
  287. n = next_peer(m);
  288. if (n == m)
  289. continue;
  290. } else {
  291. type = CL_SLAVE;
  292. /* beginning of peer group among the slaves? */
  293. if (IS_MNT_SHARED(m))
  294. type |= CL_MAKE_SHARED;
  295. n = m;
  296. }
  297. do {
  298. if (!need_secondary(n, dest_mp))
  299. continue;
  300. if (type & CL_SLAVE) // first in this peer group
  301. copy = find_master(n, copy, source_mnt);
  302. this = copy_tree(copy, copy->mnt.mnt_root, type);
  303. if (IS_ERR(this)) {
  304. err = PTR_ERR(this);
  305. break;
  306. }
  307. scoped_guard(mount_locked_reader)
  308. mnt_set_mountpoint(n, dest_mp, this);
  309. if (n->mnt_master)
  310. SET_MNT_MARK(n->mnt_master);
  311. copy = this;
  312. hlist_add_head(&this->mnt_hash, tree_list);
  313. err = count_mounts(n->mnt_ns, this);
  314. if (err)
  315. break;
  316. type = CL_MAKE_SHARED;
  317. } while ((n = next_peer(n)) != m);
  318. }
  319. hlist_for_each_entry(n, tree_list, mnt_hash) {
  320. m = n->mnt_parent;
  321. if (m->mnt_master)
  322. CLEAR_MNT_MARK(m->mnt_master);
  323. }
  324. if (dest_mnt->mnt_master)
  325. CLEAR_MNT_MARK(dest_mnt->mnt_master);
  326. return err;
  327. }
  328. /*
  329. * return true if the refcount is greater than count
  330. */
  331. static inline int do_refcount_check(struct mount *mnt, int count)
  332. {
  333. return mnt_get_count(mnt) > count;
  334. }
  335. /**
  336. * propagation_would_overmount - check whether propagation from @from
  337. * would overmount @to
  338. * @from: shared mount
  339. * @to: mount to check
  340. * @mp: future mountpoint of @to on @from
  341. *
  342. * If @from propagates mounts to @to, @from and @to must either be peers
  343. * or one of the masters in the hierarchy of masters of @to must be a
  344. * peer of @from.
  345. *
  346. * If the root of the @to mount is equal to the future mountpoint @mp of
  347. * the @to mount on @from then @to will be overmounted by whatever is
  348. * propagated to it.
  349. *
  350. * Context: This function expects namespace_lock() to be held and that
  351. * @mp is stable.
  352. * Return: If @from overmounts @to, true is returned, false if not.
  353. */
  354. bool propagation_would_overmount(const struct mount *from,
  355. const struct mount *to,
  356. const struct mountpoint *mp)
  357. {
  358. if (!IS_MNT_SHARED(from))
  359. return false;
  360. if (to->mnt.mnt_root != mp->m_dentry)
  361. return false;
  362. for (const struct mount *m = to; m; m = m->mnt_master) {
  363. if (peers(from, m))
  364. return true;
  365. }
  366. return false;
  367. }
  368. /*
  369. * check if the mount 'mnt' can be unmounted successfully.
  370. * @mnt: the mount to be checked for unmount
  371. * NOTE: unmounting 'mnt' would naturally propagate to all
  372. * other mounts its parent propagates to.
  373. * Check if any of these mounts that **do not have submounts**
  374. * have more references than 'refcnt'. If so return busy.
  375. *
  376. * vfsmount lock must be held for write
  377. */
  378. int propagate_mount_busy(struct mount *mnt, int refcnt)
  379. {
  380. struct mount *parent = mnt->mnt_parent;
  381. /*
  382. * quickly check if the current mount can be unmounted.
  383. * If not, we don't have to go checking for all other
  384. * mounts
  385. */
  386. if (!list_empty(&mnt->mnt_mounts) || do_refcount_check(mnt, refcnt))
  387. return 1;
  388. if (mnt == parent)
  389. return 0;
  390. for (struct mount *m = propagation_next(parent, parent); m;
  391. m = propagation_next(m, parent)) {
  392. struct list_head *head;
  393. struct mount *child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
  394. if (!child)
  395. continue;
  396. head = &child->mnt_mounts;
  397. if (!list_empty(head)) {
  398. /*
  399. * a mount that covers child completely wouldn't prevent
  400. * it being pulled out; any other would.
  401. */
  402. if (!list_is_singular(head) || !child->overmount)
  403. continue;
  404. }
  405. if (do_refcount_check(child, 1))
  406. return 1;
  407. }
  408. return 0;
  409. }
  410. /*
  411. * Clear MNT_LOCKED when it can be shown to be safe.
  412. *
  413. * mount_lock lock must be held for write
  414. */
  415. void propagate_mount_unlock(struct mount *mnt)
  416. {
  417. struct mount *parent = mnt->mnt_parent;
  418. struct mount *m, *child;
  419. BUG_ON(parent == mnt);
  420. for (m = propagation_next(parent, parent); m;
  421. m = propagation_next(m, parent)) {
  422. child = __lookup_mnt(&m->mnt, mnt->mnt_mountpoint);
  423. if (child)
  424. child->mnt.mnt_flags &= ~MNT_LOCKED;
  425. }
  426. }
  427. static inline bool is_candidate(struct mount *m)
  428. {
  429. return m->mnt_t_flags & T_UMOUNT_CANDIDATE;
  430. }
  431. static void umount_one(struct mount *m, struct list_head *to_umount)
  432. {
  433. m->mnt.mnt_flags |= MNT_UMOUNT;
  434. list_del_init(&m->mnt_child);
  435. move_from_ns(m);
  436. list_add_tail(&m->mnt_list, to_umount);
  437. }
  438. static void remove_from_candidate_list(struct mount *m)
  439. {
  440. m->mnt_t_flags &= ~(T_MARKED | T_UMOUNT_CANDIDATE);
  441. list_del_init(&m->mnt_list);
  442. }
  443. static void gather_candidates(struct list_head *set,
  444. struct list_head *candidates)
  445. {
  446. struct mount *m, *p, *q;
  447. list_for_each_entry(m, set, mnt_list) {
  448. if (is_candidate(m))
  449. continue;
  450. m->mnt_t_flags |= T_UMOUNT_CANDIDATE;
  451. p = m->mnt_parent;
  452. q = propagation_next(p, p);
  453. while (q) {
  454. struct mount *child = __lookup_mnt(&q->mnt,
  455. m->mnt_mountpoint);
  456. if (child) {
  457. /*
  458. * We might've already run into this one. That
  459. * must've happened on earlier iteration of the
  460. * outer loop; in that case we can skip those
  461. * parents that get propagation from q - there
  462. * will be nothing new on those as well.
  463. */
  464. if (is_candidate(child)) {
  465. q = skip_propagation_subtree(q, p);
  466. continue;
  467. }
  468. child->mnt_t_flags |= T_UMOUNT_CANDIDATE;
  469. if (!will_be_unmounted(child))
  470. list_add(&child->mnt_list, candidates);
  471. }
  472. q = propagation_next(q, p);
  473. }
  474. }
  475. list_for_each_entry(m, set, mnt_list)
  476. m->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
  477. }
  478. /*
  479. * We know that some child of @m can't be unmounted. In all places where the
  480. * chain of descent of @m has child not overmounting the root of parent,
  481. * the parent can't be unmounted either.
  482. */
  483. static void trim_ancestors(struct mount *m)
  484. {
  485. struct mount *p;
  486. for (p = m->mnt_parent; is_candidate(p); m = p, p = p->mnt_parent) {
  487. if (IS_MNT_MARKED(m)) // all candidates beneath are overmounts
  488. return;
  489. SET_MNT_MARK(m);
  490. if (m != p->overmount)
  491. p->mnt_t_flags &= ~T_UMOUNT_CANDIDATE;
  492. }
  493. }
  494. /*
  495. * Find and exclude all umount candidates forbidden by @m
  496. * (see Documentation/filesystems/propagate_umount.txt)
  497. * If we can immediately tell that @m is OK to unmount (unlocked
  498. * and all children are already committed to unmounting) commit
  499. * to unmounting it.
  500. * Only @m itself might be taken from the candidates list;
  501. * anything found by trim_ancestors() is marked non-candidate
  502. * and left on the list.
  503. */
  504. static void trim_one(struct mount *m, struct list_head *to_umount)
  505. {
  506. bool remove_this = false, found = false, umount_this = false;
  507. struct mount *n;
  508. if (!is_candidate(m)) { // trim_ancestors() left it on list
  509. remove_from_candidate_list(m);
  510. return;
  511. }
  512. list_for_each_entry(n, &m->mnt_mounts, mnt_child) {
  513. if (!is_candidate(n)) {
  514. found = true;
  515. if (n != m->overmount) {
  516. remove_this = true;
  517. break;
  518. }
  519. }
  520. }
  521. if (found) {
  522. trim_ancestors(m);
  523. } else if (!IS_MNT_LOCKED(m) && list_empty(&m->mnt_mounts)) {
  524. remove_this = true;
  525. umount_this = true;
  526. }
  527. if (remove_this) {
  528. remove_from_candidate_list(m);
  529. if (umount_this)
  530. umount_one(m, to_umount);
  531. }
  532. }
  533. static void handle_locked(struct mount *m, struct list_head *to_umount)
  534. {
  535. struct mount *cutoff = m, *p;
  536. if (!is_candidate(m)) { // trim_ancestors() left it on list
  537. remove_from_candidate_list(m);
  538. return;
  539. }
  540. for (p = m; is_candidate(p); p = p->mnt_parent) {
  541. remove_from_candidate_list(p);
  542. if (!IS_MNT_LOCKED(p))
  543. cutoff = p->mnt_parent;
  544. }
  545. if (will_be_unmounted(p))
  546. cutoff = p;
  547. while (m != cutoff) {
  548. umount_one(m, to_umount);
  549. m = m->mnt_parent;
  550. }
  551. }
  552. /*
  553. * @m is not to going away, and it overmounts the top of a stack of mounts
  554. * that are going away. We know that all of those are fully overmounted
  555. * by the one above (@m being the topmost of the chain), so @m can be slid
  556. * in place where the bottom of the stack is attached.
  557. *
  558. * NOTE: here we temporarily violate a constraint - two mounts end up with
  559. * the same parent and mountpoint; that will be remedied as soon as we
  560. * return from propagate_umount() - its caller (umount_tree()) will detach
  561. * the stack from the parent it (and now @m) is attached to. umount_tree()
  562. * might choose to keep unmounted pieces stuck to each other, but it always
  563. * detaches them from the mounts that remain in the tree.
  564. */
  565. static void reparent(struct mount *m)
  566. {
  567. struct mount *p = m;
  568. struct mountpoint *mp;
  569. do {
  570. mp = p->mnt_mp;
  571. p = p->mnt_parent;
  572. } while (will_be_unmounted(p));
  573. mnt_change_mountpoint(p, mp, m);
  574. mnt_notify_add(m);
  575. }
  576. /**
  577. * propagate_umount - apply propagation rules to the set of mounts for umount()
  578. * @set: the list of mounts to be unmounted.
  579. *
  580. * Collect all mounts that receive propagation from the mount in @set and have
  581. * no obstacles to being unmounted. Add these additional mounts to the set.
  582. *
  583. * See Documentation/filesystems/propagate_umount.txt if you do anything in
  584. * this area.
  585. *
  586. * Locks held:
  587. * mount_lock (write_seqlock), namespace_sem (exclusive).
  588. */
  589. void propagate_umount(struct list_head *set)
  590. {
  591. struct mount *m, *p;
  592. LIST_HEAD(to_umount); // committed to unmounting
  593. LIST_HEAD(candidates); // undecided umount candidates
  594. // collect all candidates
  595. gather_candidates(set, &candidates);
  596. // reduce the set until it's non-shifting
  597. list_for_each_entry_safe(m, p, &candidates, mnt_list)
  598. trim_one(m, &to_umount);
  599. // ... and non-revealing
  600. while (!list_empty(&candidates)) {
  601. m = list_first_entry(&candidates,struct mount, mnt_list);
  602. handle_locked(m, &to_umount);
  603. }
  604. // now to_umount consists of all acceptable candidates
  605. // deal with reparenting of surviving overmounts on those
  606. list_for_each_entry(m, &to_umount, mnt_list) {
  607. struct mount *over = m->overmount;
  608. if (over && !will_be_unmounted(over))
  609. reparent(over);
  610. }
  611. // and fold them into the set
  612. list_splice_tail_init(&to_umount, set);
  613. }