readdir.c 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. *
  4. * Copyright (C) 2011 Novell Inc.
  5. */
  6. #include <linux/fs.h>
  7. #include <linux/slab.h>
  8. #include <linux/namei.h>
  9. #include <linux/file.h>
  10. #include <linux/filelock.h>
  11. #include <linux/xattr.h>
  12. #include <linux/rbtree.h>
  13. #include <linux/security.h>
  14. #include <linux/cred.h>
  15. #include <linux/ratelimit.h>
  16. #include <linux/overflow.h>
  17. #include "overlayfs.h"
  18. struct ovl_cache_entry {
  19. unsigned int len;
  20. unsigned int type;
  21. u64 real_ino;
  22. u64 ino;
  23. struct list_head l_node;
  24. struct rb_node node;
  25. struct ovl_cache_entry *next_maybe_whiteout;
  26. bool is_upper;
  27. bool is_whiteout;
  28. bool check_xwhiteout;
  29. const char *c_name;
  30. int c_len;
  31. char name[];
  32. };
  33. struct ovl_dir_cache {
  34. long refcount;
  35. u64 version;
  36. struct list_head entries;
  37. struct rb_root root;
  38. };
  39. struct ovl_readdir_data {
  40. struct dir_context ctx;
  41. struct dentry *dentry;
  42. bool is_lowest;
  43. struct rb_root *root;
  44. struct list_head *list;
  45. struct list_head middle;
  46. struct ovl_cache_entry *first_maybe_whiteout;
  47. struct unicode_map *map;
  48. int count;
  49. int err;
  50. bool is_upper;
  51. bool d_type_supported;
  52. bool in_xwhiteouts_dir;
  53. };
  54. struct ovl_dir_file {
  55. bool is_real;
  56. bool is_upper;
  57. struct ovl_dir_cache *cache;
  58. struct list_head *cursor;
  59. struct file *realfile;
  60. struct file *upperfile;
  61. };
  62. static struct ovl_cache_entry *ovl_cache_entry_from_node(struct rb_node *n)
  63. {
  64. return rb_entry(n, struct ovl_cache_entry, node);
  65. }
  66. static int ovl_casefold(struct ovl_readdir_data *rdd, const char *str, int len,
  67. char **dst)
  68. {
  69. const struct qstr qstr = { .name = str, .len = len };
  70. char *cf_name;
  71. int cf_len;
  72. if (!IS_ENABLED(CONFIG_UNICODE) || !rdd->map ||
  73. name_is_dot_dotdot(str, len))
  74. return 0;
  75. cf_name = kmalloc(NAME_MAX, GFP_KERNEL);
  76. if (!cf_name) {
  77. rdd->err = -ENOMEM;
  78. return -ENOMEM;
  79. }
  80. cf_len = utf8_casefold(rdd->map, &qstr, cf_name, NAME_MAX);
  81. if (cf_len > 0)
  82. *dst = cf_name;
  83. else
  84. kfree(cf_name);
  85. return cf_len;
  86. }
  87. static bool ovl_cache_entry_find_link(const char *name, int len,
  88. struct rb_node ***link,
  89. struct rb_node **parent)
  90. {
  91. bool found = false;
  92. struct rb_node **newp = *link;
  93. while (!found && *newp) {
  94. int cmp;
  95. struct ovl_cache_entry *tmp;
  96. *parent = *newp;
  97. tmp = ovl_cache_entry_from_node(*newp);
  98. cmp = strncmp(name, tmp->c_name, len);
  99. if (cmp > 0)
  100. newp = &tmp->node.rb_right;
  101. else if (cmp < 0 || len < tmp->c_len)
  102. newp = &tmp->node.rb_left;
  103. else
  104. found = true;
  105. }
  106. *link = newp;
  107. return found;
  108. }
  109. static struct ovl_cache_entry *ovl_cache_entry_find(struct rb_root *root,
  110. const char *name, int len)
  111. {
  112. struct rb_node *node = root->rb_node;
  113. int cmp;
  114. while (node) {
  115. struct ovl_cache_entry *p = ovl_cache_entry_from_node(node);
  116. cmp = strncmp(name, p->c_name, len);
  117. if (cmp > 0)
  118. node = p->node.rb_right;
  119. else if (cmp < 0 || len < p->c_len)
  120. node = p->node.rb_left;
  121. else
  122. return p;
  123. }
  124. return NULL;
  125. }
  126. static bool ovl_calc_d_ino(struct ovl_readdir_data *rdd,
  127. struct ovl_cache_entry *p)
  128. {
  129. /* Don't care if not doing ovl_iter() */
  130. if (!rdd->dentry)
  131. return false;
  132. /* Always recalc d_ino when remapping lower inode numbers */
  133. if (ovl_xino_bits(OVL_FS(rdd->dentry->d_sb)))
  134. return true;
  135. /* Always recalc d_ino for parent */
  136. if (name_is_dotdot(p->name, p->len))
  137. return true;
  138. /* If this is lower, then native d_ino will do */
  139. if (!rdd->is_upper)
  140. return false;
  141. /*
  142. * Recalc d_ino for '.' and for all entries if dir is impure (contains
  143. * copied up entries)
  144. */
  145. if (name_is_dot(p->name, p->len) ||
  146. ovl_test_flag(OVL_IMPURE, d_inode(rdd->dentry)))
  147. return true;
  148. return false;
  149. }
  150. static struct ovl_cache_entry *ovl_cache_entry_new(struct ovl_readdir_data *rdd,
  151. const char *name, int len,
  152. const char *c_name, int c_len,
  153. u64 ino, unsigned int d_type)
  154. {
  155. struct ovl_cache_entry *p;
  156. p = kmalloc_flex(*p, name, len + 1);
  157. if (!p)
  158. return NULL;
  159. memcpy(p->name, name, len);
  160. p->name[len] = '\0';
  161. p->len = len;
  162. p->type = d_type;
  163. p->real_ino = ino;
  164. p->ino = ino;
  165. /* Defer setting d_ino for upper entry to ovl_iterate() */
  166. if (ovl_calc_d_ino(rdd, p))
  167. p->ino = 0;
  168. p->is_upper = rdd->is_upper;
  169. p->is_whiteout = false;
  170. /* Defer check for overlay.whiteout to ovl_iterate() */
  171. p->check_xwhiteout = rdd->in_xwhiteouts_dir && d_type == DT_REG;
  172. if (c_name && c_name != name) {
  173. p->c_name = c_name;
  174. p->c_len = c_len;
  175. } else {
  176. p->c_name = p->name;
  177. p->c_len = len;
  178. }
  179. if (d_type == DT_CHR) {
  180. p->next_maybe_whiteout = rdd->first_maybe_whiteout;
  181. rdd->first_maybe_whiteout = p;
  182. }
  183. return p;
  184. }
  185. /* Return 0 for found, 1 for added, <0 for error */
  186. static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
  187. const char *name, int len,
  188. const char *c_name, int c_len,
  189. u64 ino,
  190. unsigned int d_type)
  191. {
  192. struct rb_node **newp = &rdd->root->rb_node;
  193. struct rb_node *parent = NULL;
  194. struct ovl_cache_entry *p;
  195. if (ovl_cache_entry_find_link(c_name, c_len, &newp, &parent))
  196. return 0;
  197. p = ovl_cache_entry_new(rdd, name, len, c_name, c_len, ino, d_type);
  198. if (p == NULL) {
  199. rdd->err = -ENOMEM;
  200. return -ENOMEM;
  201. }
  202. list_add_tail(&p->l_node, rdd->list);
  203. rb_link_node(&p->node, parent, newp);
  204. rb_insert_color(&p->node, rdd->root);
  205. return 1;
  206. }
  207. /* Return 0 for found, 1 for added, <0 for error */
  208. static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
  209. const char *name, int namelen,
  210. const char *c_name, int c_len,
  211. loff_t offset, u64 ino, unsigned int d_type)
  212. {
  213. struct ovl_cache_entry *p;
  214. p = ovl_cache_entry_find(rdd->root, c_name, c_len);
  215. if (p) {
  216. list_move_tail(&p->l_node, &rdd->middle);
  217. return 0;
  218. } else {
  219. p = ovl_cache_entry_new(rdd, name, namelen, c_name, c_len,
  220. ino, d_type);
  221. if (p == NULL)
  222. rdd->err = -ENOMEM;
  223. else
  224. list_add_tail(&p->l_node, &rdd->middle);
  225. }
  226. return rdd->err ?: 1;
  227. }
  228. static void ovl_cache_entry_free(struct ovl_cache_entry *p)
  229. {
  230. if (p->c_name != p->name)
  231. kfree(p->c_name);
  232. kfree(p);
  233. }
  234. void ovl_cache_free(struct list_head *list)
  235. {
  236. struct ovl_cache_entry *p;
  237. struct ovl_cache_entry *n;
  238. list_for_each_entry_safe(p, n, list, l_node)
  239. ovl_cache_entry_free(p);
  240. INIT_LIST_HEAD(list);
  241. }
  242. void ovl_dir_cache_free(struct inode *inode)
  243. {
  244. struct ovl_dir_cache *cache = ovl_dir_cache(inode);
  245. if (cache) {
  246. ovl_cache_free(&cache->entries);
  247. kfree(cache);
  248. }
  249. }
  250. static void ovl_cache_put(struct ovl_dir_file *od, struct inode *inode)
  251. {
  252. struct ovl_dir_cache *cache = od->cache;
  253. WARN_ON(cache->refcount <= 0);
  254. cache->refcount--;
  255. if (!cache->refcount) {
  256. if (ovl_dir_cache(inode) == cache)
  257. ovl_set_dir_cache(inode, NULL);
  258. ovl_cache_free(&cache->entries);
  259. kfree(cache);
  260. }
  261. }
  262. static bool ovl_fill_merge(struct dir_context *ctx, const char *name,
  263. int namelen, loff_t offset, u64 ino,
  264. unsigned int d_type)
  265. {
  266. struct ovl_readdir_data *rdd =
  267. container_of(ctx, struct ovl_readdir_data, ctx);
  268. struct ovl_fs *ofs = OVL_FS(rdd->dentry->d_sb);
  269. const char *c_name = NULL;
  270. char *cf_name = NULL;
  271. int c_len = 0, ret;
  272. if (ofs->casefold)
  273. c_len = ovl_casefold(rdd, name, namelen, &cf_name);
  274. if (rdd->err)
  275. return false;
  276. if (c_len <= 0) {
  277. c_name = name;
  278. c_len = namelen;
  279. } else {
  280. c_name = cf_name;
  281. }
  282. rdd->count++;
  283. if (!rdd->is_lowest)
  284. ret = ovl_cache_entry_add_rb(rdd, name, namelen, c_name, c_len, ino, d_type);
  285. else
  286. ret = ovl_fill_lowest(rdd, name, namelen, c_name, c_len, offset, ino, d_type);
  287. /*
  288. * If ret == 1, that means that c_name is being used as part of struct
  289. * ovl_cache_entry and will be freed at ovl_cache_free(). Otherwise,
  290. * c_name was found in the rb-tree so we can free it here.
  291. */
  292. if (ret != 1 && c_name != name)
  293. kfree(c_name);
  294. return ret >= 0;
  295. }
  296. static int ovl_check_whiteouts(const struct path *path, struct ovl_readdir_data *rdd)
  297. {
  298. struct dentry *dentry, *dir = path->dentry;
  299. while (rdd->first_maybe_whiteout) {
  300. struct ovl_cache_entry *p =
  301. rdd->first_maybe_whiteout;
  302. rdd->first_maybe_whiteout = p->next_maybe_whiteout;
  303. dentry = lookup_one_positive_killable(mnt_idmap(path->mnt),
  304. &QSTR_LEN(p->name, p->len),
  305. dir);
  306. if (!IS_ERR(dentry)) {
  307. p->is_whiteout = ovl_is_whiteout(dentry);
  308. dput(dentry);
  309. } else if (PTR_ERR(dentry) == -EINTR) {
  310. return -EINTR;
  311. }
  312. }
  313. return 0;
  314. }
  315. static inline int ovl_dir_read(const struct path *realpath,
  316. struct ovl_readdir_data *rdd)
  317. {
  318. struct file *realfile;
  319. int err;
  320. realfile = ovl_path_open(realpath, O_RDONLY | O_LARGEFILE);
  321. if (IS_ERR(realfile))
  322. return PTR_ERR(realfile);
  323. rdd->first_maybe_whiteout = NULL;
  324. rdd->ctx.pos = 0;
  325. do {
  326. rdd->count = 0;
  327. rdd->err = 0;
  328. err = iterate_dir(realfile, &rdd->ctx);
  329. if (err >= 0)
  330. err = rdd->err;
  331. } while (!err && rdd->count);
  332. if (!err && rdd->first_maybe_whiteout && rdd->dentry)
  333. err = ovl_check_whiteouts(realpath, rdd);
  334. fput(realfile);
  335. return err;
  336. }
  337. static void ovl_dir_reset(struct file *file)
  338. {
  339. struct ovl_dir_file *od = file->private_data;
  340. struct ovl_dir_cache *cache = od->cache;
  341. struct inode *inode = file_inode(file);
  342. bool is_real;
  343. if (cache && ovl_inode_version_get(inode) != cache->version) {
  344. ovl_cache_put(od, inode);
  345. od->cache = NULL;
  346. od->cursor = NULL;
  347. }
  348. is_real = ovl_dir_is_real(inode);
  349. if (od->is_real != is_real) {
  350. /* is_real can only become false when dir is copied up */
  351. if (WARN_ON(is_real))
  352. return;
  353. od->is_real = false;
  354. }
  355. }
  356. static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list,
  357. struct rb_root *root)
  358. {
  359. int err;
  360. struct path realpath;
  361. struct ovl_readdir_data rdd = {
  362. .ctx.actor = ovl_fill_merge,
  363. .ctx.count = INT_MAX,
  364. .dentry = dentry,
  365. .list = list,
  366. .root = root,
  367. .is_lowest = false,
  368. .map = NULL,
  369. };
  370. int idx, next;
  371. const struct ovl_layer *layer;
  372. struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  373. for (idx = 0; idx != -1; idx = next) {
  374. next = ovl_path_next(idx, dentry, &realpath, &layer);
  375. if (ofs->casefold)
  376. rdd.map = sb_encoding(realpath.dentry->d_sb);
  377. rdd.is_upper = ovl_dentry_upper(dentry) == realpath.dentry;
  378. rdd.in_xwhiteouts_dir = layer->has_xwhiteouts &&
  379. ovl_dentry_has_xwhiteouts(dentry);
  380. if (next != -1) {
  381. err = ovl_dir_read(&realpath, &rdd);
  382. if (err)
  383. break;
  384. } else {
  385. /*
  386. * Insert lowest layer entries before upper ones, this
  387. * allows offsets to be reasonably constant
  388. */
  389. list_add(&rdd.middle, rdd.list);
  390. rdd.is_lowest = true;
  391. err = ovl_dir_read(&realpath, &rdd);
  392. list_del(&rdd.middle);
  393. }
  394. }
  395. return err;
  396. }
  397. static void ovl_seek_cursor(struct ovl_dir_file *od, loff_t pos)
  398. {
  399. struct list_head *p;
  400. loff_t off = 0;
  401. list_for_each(p, &od->cache->entries) {
  402. if (off >= pos)
  403. break;
  404. off++;
  405. }
  406. /* Cursor is safe since the cache is stable */
  407. od->cursor = p;
  408. }
  409. static struct ovl_dir_cache *ovl_cache_get(struct dentry *dentry)
  410. {
  411. int res;
  412. struct ovl_dir_cache *cache;
  413. struct inode *inode = d_inode(dentry);
  414. cache = ovl_dir_cache(inode);
  415. if (cache && ovl_inode_version_get(inode) == cache->version) {
  416. WARN_ON(!cache->refcount);
  417. cache->refcount++;
  418. return cache;
  419. }
  420. ovl_set_dir_cache(d_inode(dentry), NULL);
  421. cache = kzalloc_obj(struct ovl_dir_cache);
  422. if (!cache)
  423. return ERR_PTR(-ENOMEM);
  424. cache->refcount = 1;
  425. INIT_LIST_HEAD(&cache->entries);
  426. cache->root = RB_ROOT;
  427. res = ovl_dir_read_merged(dentry, &cache->entries, &cache->root);
  428. if (res) {
  429. ovl_cache_free(&cache->entries);
  430. kfree(cache);
  431. return ERR_PTR(res);
  432. }
  433. cache->version = ovl_inode_version_get(inode);
  434. ovl_set_dir_cache(inode, cache);
  435. return cache;
  436. }
  437. /* Map inode number to lower fs unique range */
  438. static u64 ovl_remap_lower_ino(u64 ino, int xinobits, int fsid,
  439. const char *name, int namelen, bool warn)
  440. {
  441. unsigned int xinoshift = 64 - xinobits;
  442. if (unlikely(ino >> xinoshift)) {
  443. if (warn) {
  444. pr_warn_ratelimited("d_ino too big (%.*s, ino=%llu, xinobits=%d)\n",
  445. namelen, name, ino, xinobits);
  446. }
  447. return ino;
  448. }
  449. /*
  450. * The lowest xinobit is reserved for mapping the non-peresistent inode
  451. * numbers range, but this range is only exposed via st_ino, not here.
  452. */
  453. return ino | ((u64)fsid) << (xinoshift + 1);
  454. }
  455. /*
  456. * Set d_ino for upper entries if needed. Non-upper entries should always report
  457. * the uppermost real inode ino and should not call this function.
  458. *
  459. * When not all layer are on same fs, report real ino also for upper.
  460. *
  461. * When all layers are on the same fs, and upper has a reference to
  462. * copy up origin, call vfs_getattr() on the overlay entry to make
  463. * sure that d_ino will be consistent with st_ino from stat(2).
  464. *
  465. * Also checks the overlay.whiteout xattr by doing a full lookup which will return
  466. * negative in this case.
  467. */
  468. static int ovl_cache_update(const struct path *path, struct ovl_cache_entry *p, bool update_ino)
  469. {
  470. struct dentry *dir = path->dentry;
  471. struct ovl_fs *ofs = OVL_FS(dir->d_sb);
  472. struct dentry *this = NULL;
  473. enum ovl_path_type type;
  474. u64 ino = p->real_ino;
  475. int xinobits = ovl_xino_bits(ofs);
  476. int err = 0;
  477. if (!ovl_same_dev(ofs) && !p->check_xwhiteout)
  478. goto out;
  479. if (name_is_dot_dotdot(p->name, p->len)) {
  480. if (p->len == 1) {
  481. this = dget(dir);
  482. goto get;
  483. }
  484. if (p->len == 2) {
  485. /* we shall not be moved */
  486. this = dget(dir->d_parent);
  487. goto get;
  488. }
  489. }
  490. /* This checks also for xwhiteouts */
  491. this = lookup_one(mnt_idmap(path->mnt), &QSTR_LEN(p->name, p->len), dir);
  492. if (IS_ERR_OR_NULL(this) || !this->d_inode) {
  493. /* Mark a stale entry */
  494. p->is_whiteout = true;
  495. if (IS_ERR(this)) {
  496. err = PTR_ERR(this);
  497. this = NULL;
  498. goto fail;
  499. }
  500. goto out;
  501. }
  502. get:
  503. if (!ovl_same_dev(ofs) || !update_ino)
  504. goto out;
  505. type = ovl_path_type(this);
  506. if (OVL_TYPE_ORIGIN(type)) {
  507. struct kstat stat;
  508. struct path statpath = *path;
  509. statpath.dentry = this;
  510. err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
  511. if (err)
  512. goto fail;
  513. /*
  514. * Directory inode is always on overlay st_dev.
  515. * Non-dir with ovl_same_dev() could be on pseudo st_dev in case
  516. * of xino bits overflow.
  517. */
  518. WARN_ON_ONCE(S_ISDIR(stat.mode) &&
  519. dir->d_sb->s_dev != stat.dev);
  520. ino = stat.ino;
  521. } else if (xinobits && !OVL_TYPE_UPPER(type)) {
  522. ino = ovl_remap_lower_ino(ino, xinobits,
  523. ovl_layer_lower(this)->fsid,
  524. p->name, p->len,
  525. ovl_xino_warn(ofs));
  526. }
  527. out:
  528. p->ino = ino;
  529. dput(this);
  530. return err;
  531. fail:
  532. pr_warn_ratelimited("failed to look up (%s) for ino (%i)\n",
  533. p->name, err);
  534. goto out;
  535. }
  536. static bool ovl_fill_plain(struct dir_context *ctx, const char *name,
  537. int namelen, loff_t offset, u64 ino,
  538. unsigned int d_type)
  539. {
  540. struct ovl_cache_entry *p;
  541. struct ovl_readdir_data *rdd =
  542. container_of(ctx, struct ovl_readdir_data, ctx);
  543. rdd->count++;
  544. p = ovl_cache_entry_new(rdd, name, namelen, NULL, 0, ino, d_type);
  545. if (p == NULL) {
  546. rdd->err = -ENOMEM;
  547. return false;
  548. }
  549. list_add_tail(&p->l_node, rdd->list);
  550. return true;
  551. }
  552. static int ovl_dir_read_impure(const struct path *path, struct list_head *list,
  553. struct rb_root *root)
  554. {
  555. int err;
  556. struct path realpath;
  557. struct ovl_cache_entry *p, *n;
  558. struct ovl_readdir_data rdd = {
  559. .ctx.actor = ovl_fill_plain,
  560. .ctx.count = INT_MAX,
  561. .list = list,
  562. .root = root,
  563. };
  564. INIT_LIST_HEAD(list);
  565. *root = RB_ROOT;
  566. ovl_path_upper(path->dentry, &realpath);
  567. err = ovl_dir_read(&realpath, &rdd);
  568. if (err)
  569. return err;
  570. list_for_each_entry_safe(p, n, list, l_node) {
  571. if (!name_is_dot_dotdot(p->name, p->len)) {
  572. err = ovl_cache_update(path, p, true);
  573. if (err)
  574. return err;
  575. }
  576. if (p->ino == p->real_ino) {
  577. list_del(&p->l_node);
  578. ovl_cache_entry_free(p);
  579. } else {
  580. struct rb_node **newp = &root->rb_node;
  581. struct rb_node *parent = NULL;
  582. if (WARN_ON(ovl_cache_entry_find_link(p->name, p->len,
  583. &newp, &parent)))
  584. return -EIO;
  585. rb_link_node(&p->node, parent, newp);
  586. rb_insert_color(&p->node, root);
  587. }
  588. }
  589. return 0;
  590. }
  591. static struct ovl_dir_cache *ovl_cache_get_impure(const struct path *path)
  592. {
  593. int res;
  594. struct dentry *dentry = path->dentry;
  595. struct inode *inode = d_inode(dentry);
  596. struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  597. struct ovl_dir_cache *cache;
  598. cache = ovl_dir_cache(inode);
  599. if (cache && ovl_inode_version_get(inode) == cache->version)
  600. return cache;
  601. /* Impure cache is not refcounted, free it here */
  602. ovl_dir_cache_free(inode);
  603. ovl_set_dir_cache(inode, NULL);
  604. cache = kzalloc_obj(struct ovl_dir_cache);
  605. if (!cache)
  606. return ERR_PTR(-ENOMEM);
  607. res = ovl_dir_read_impure(path, &cache->entries, &cache->root);
  608. if (res) {
  609. ovl_cache_free(&cache->entries);
  610. kfree(cache);
  611. return ERR_PTR(res);
  612. }
  613. if (list_empty(&cache->entries)) {
  614. /*
  615. * A good opportunity to get rid of an unneeded "impure" flag.
  616. * Removing the "impure" xattr is best effort.
  617. */
  618. if (!ovl_want_write(dentry)) {
  619. ovl_removexattr(ofs, ovl_dentry_upper(dentry),
  620. OVL_XATTR_IMPURE);
  621. ovl_drop_write(dentry);
  622. }
  623. ovl_clear_flag(OVL_IMPURE, inode);
  624. kfree(cache);
  625. return NULL;
  626. }
  627. cache->version = ovl_inode_version_get(inode);
  628. ovl_set_dir_cache(inode, cache);
  629. return cache;
  630. }
  631. struct ovl_readdir_translate {
  632. struct dir_context *orig_ctx;
  633. struct ovl_dir_cache *cache;
  634. struct dir_context ctx;
  635. u64 parent_ino;
  636. int fsid;
  637. int xinobits;
  638. bool xinowarn;
  639. };
  640. static bool ovl_fill_real(struct dir_context *ctx, const char *name,
  641. int namelen, loff_t offset, u64 ino,
  642. unsigned int d_type)
  643. {
  644. struct ovl_readdir_translate *rdt =
  645. container_of(ctx, struct ovl_readdir_translate, ctx);
  646. struct dir_context *orig_ctx = rdt->orig_ctx;
  647. bool res;
  648. if (rdt->parent_ino && name_is_dotdot(name, namelen)) {
  649. ino = rdt->parent_ino;
  650. } else if (rdt->cache) {
  651. struct ovl_cache_entry *p;
  652. p = ovl_cache_entry_find(&rdt->cache->root, name, namelen);
  653. if (p)
  654. ino = p->ino;
  655. } else if (rdt->xinobits) {
  656. ino = ovl_remap_lower_ino(ino, rdt->xinobits, rdt->fsid,
  657. name, namelen, rdt->xinowarn);
  658. }
  659. res = orig_ctx->actor(orig_ctx, name, namelen, offset, ino, d_type);
  660. ctx->count = orig_ctx->count;
  661. return res;
  662. }
  663. static bool ovl_is_impure_dir(struct file *file)
  664. {
  665. struct ovl_dir_file *od = file->private_data;
  666. struct inode *dir = file_inode(file);
  667. /*
  668. * Only upper dir can be impure, but if we are in the middle of
  669. * iterating a lower real dir, dir could be copied up and marked
  670. * impure. We only want the impure cache if we started iterating
  671. * a real upper dir to begin with.
  672. */
  673. return od->is_upper && ovl_test_flag(OVL_IMPURE, dir);
  674. }
  675. static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
  676. {
  677. int err;
  678. struct ovl_dir_file *od = file->private_data;
  679. struct dentry *dir = file->f_path.dentry;
  680. struct ovl_fs *ofs = OVL_FS(dir->d_sb);
  681. const struct ovl_layer *lower_layer = ovl_layer_lower(dir);
  682. struct ovl_readdir_translate rdt = {
  683. .ctx.actor = ovl_fill_real,
  684. .ctx.count = ctx->count,
  685. .orig_ctx = ctx,
  686. .xinobits = ovl_xino_bits(ofs),
  687. .xinowarn = ovl_xino_warn(ofs),
  688. };
  689. if (rdt.xinobits && lower_layer)
  690. rdt.fsid = lower_layer->fsid;
  691. if (OVL_TYPE_MERGE(ovl_path_type(dir->d_parent))) {
  692. struct kstat stat;
  693. struct path statpath = file->f_path;
  694. statpath.dentry = dir->d_parent;
  695. err = vfs_getattr(&statpath, &stat, STATX_INO, 0);
  696. if (err)
  697. return err;
  698. WARN_ON_ONCE(dir->d_sb->s_dev != stat.dev);
  699. rdt.parent_ino = stat.ino;
  700. }
  701. if (ovl_is_impure_dir(file)) {
  702. rdt.cache = ovl_cache_get_impure(&file->f_path);
  703. if (IS_ERR(rdt.cache))
  704. return PTR_ERR(rdt.cache);
  705. }
  706. err = iterate_dir(od->realfile, &rdt.ctx);
  707. ctx->pos = rdt.ctx.pos;
  708. return err;
  709. }
  710. static int ovl_iterate_merged(struct file *file, struct dir_context *ctx)
  711. {
  712. struct ovl_dir_file *od = file->private_data;
  713. struct dentry *dentry = file->f_path.dentry;
  714. struct ovl_cache_entry *p;
  715. int err = 0;
  716. if (!od->cache) {
  717. struct ovl_dir_cache *cache;
  718. cache = ovl_cache_get(dentry);
  719. err = PTR_ERR(cache);
  720. if (IS_ERR(cache))
  721. return err;
  722. od->cache = cache;
  723. ovl_seek_cursor(od, ctx->pos);
  724. }
  725. while (od->cursor != &od->cache->entries) {
  726. p = list_entry(od->cursor, struct ovl_cache_entry, l_node);
  727. if (!p->is_whiteout) {
  728. if (!p->ino || p->check_xwhiteout) {
  729. err = ovl_cache_update(&file->f_path, p, !p->ino);
  730. if (err)
  731. return err;
  732. }
  733. }
  734. /* ovl_cache_update() sets is_whiteout on stale entry */
  735. if (!p->is_whiteout) {
  736. if (!dir_emit(ctx, p->name, p->len, p->ino, p->type))
  737. break;
  738. }
  739. od->cursor = p->l_node.next;
  740. ctx->pos++;
  741. }
  742. return err;
  743. }
  744. static bool ovl_need_adjust_d_ino(struct file *file)
  745. {
  746. struct dentry *dentry = file->f_path.dentry;
  747. struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
  748. /* If parent is merge, then need to adjust d_ino for '..' */
  749. if (ovl_xino_bits(ofs))
  750. return true;
  751. /* Can't do consistent inode numbering */
  752. if (!ovl_same_fs(ofs))
  753. return false;
  754. /* If dir is impure then need to adjust d_ino for copied up entries */
  755. if (ovl_is_impure_dir(file) ||
  756. OVL_TYPE_MERGE(ovl_path_type(dentry->d_parent)))
  757. return true;
  758. /* Pure: no need to adjust d_ino */
  759. return false;
  760. }
  761. static int ovl_iterate(struct file *file, struct dir_context *ctx)
  762. {
  763. struct ovl_dir_file *od = file->private_data;
  764. if (!ctx->pos)
  765. ovl_dir_reset(file);
  766. with_ovl_creds(file_dentry(file)->d_sb) {
  767. if (!od->is_real)
  768. return ovl_iterate_merged(file, ctx);
  769. if (ovl_need_adjust_d_ino(file))
  770. return ovl_iterate_real(file, ctx);
  771. return iterate_dir(od->realfile, ctx);
  772. }
  773. }
  774. static loff_t ovl_dir_llseek(struct file *file, loff_t offset, int origin)
  775. {
  776. loff_t res;
  777. struct ovl_dir_file *od = file->private_data;
  778. inode_lock(file_inode(file));
  779. if (!file->f_pos)
  780. ovl_dir_reset(file);
  781. if (od->is_real) {
  782. res = vfs_llseek(od->realfile, offset, origin);
  783. file->f_pos = od->realfile->f_pos;
  784. } else {
  785. res = -EINVAL;
  786. switch (origin) {
  787. case SEEK_CUR:
  788. offset += file->f_pos;
  789. break;
  790. case SEEK_SET:
  791. break;
  792. default:
  793. goto out_unlock;
  794. }
  795. if (offset < 0)
  796. goto out_unlock;
  797. if (offset != file->f_pos) {
  798. file->f_pos = offset;
  799. if (od->cache)
  800. ovl_seek_cursor(od, offset);
  801. }
  802. res = offset;
  803. }
  804. out_unlock:
  805. inode_unlock(file_inode(file));
  806. return res;
  807. }
  808. static struct file *ovl_dir_open_realfile(const struct file *file,
  809. const struct path *realpath)
  810. {
  811. with_ovl_creds(file_inode(file)->i_sb)
  812. return ovl_path_open(realpath, O_RDONLY | (file->f_flags & O_LARGEFILE));
  813. }
  814. /*
  815. * Like ovl_real_fdget(), returns upperfile if dir was copied up since open.
  816. * Unlike ovl_real_fdget(), this caches upperfile in file->private_data.
  817. *
  818. * TODO: use same abstract type for file->private_data of dir and file so
  819. * upperfile could also be cached for files as well.
  820. */
  821. struct file *ovl_dir_real_file(const struct file *file, bool want_upper)
  822. {
  823. struct ovl_dir_file *od = file->private_data;
  824. struct dentry *dentry = file->f_path.dentry;
  825. struct file *old, *realfile = od->realfile;
  826. if (!OVL_TYPE_UPPER(ovl_path_type(dentry)))
  827. return want_upper ? NULL : realfile;
  828. /*
  829. * Need to check if we started out being a lower dir, but got copied up
  830. */
  831. if (!od->is_upper) {
  832. realfile = READ_ONCE(od->upperfile);
  833. if (!realfile) {
  834. struct path upperpath;
  835. ovl_path_upper(dentry, &upperpath);
  836. realfile = ovl_dir_open_realfile(file, &upperpath);
  837. if (IS_ERR(realfile))
  838. return realfile;
  839. old = cmpxchg_release(&od->upperfile, NULL, realfile);
  840. if (old) {
  841. fput(realfile);
  842. realfile = old;
  843. }
  844. }
  845. }
  846. return realfile;
  847. }
  848. static int ovl_dir_fsync(struct file *file, loff_t start, loff_t end,
  849. int datasync)
  850. {
  851. struct file *realfile;
  852. int err;
  853. err = ovl_sync_status(OVL_FS(file_inode(file)->i_sb));
  854. if (err <= 0)
  855. return err;
  856. realfile = ovl_dir_real_file(file, true);
  857. err = PTR_ERR_OR_ZERO(realfile);
  858. /* Nothing to sync for lower */
  859. if (!realfile || err)
  860. return err;
  861. return vfs_fsync_range(realfile, start, end, datasync);
  862. }
  863. static int ovl_dir_release(struct inode *inode, struct file *file)
  864. {
  865. struct ovl_dir_file *od = file->private_data;
  866. if (od->cache) {
  867. inode_lock(inode);
  868. ovl_cache_put(od, inode);
  869. inode_unlock(inode);
  870. }
  871. fput(od->realfile);
  872. if (od->upperfile)
  873. fput(od->upperfile);
  874. kfree(od);
  875. return 0;
  876. }
  877. static int ovl_dir_open(struct inode *inode, struct file *file)
  878. {
  879. struct path realpath;
  880. struct file *realfile;
  881. struct ovl_dir_file *od;
  882. enum ovl_path_type type;
  883. od = kzalloc(sizeof(struct ovl_dir_file), GFP_KERNEL);
  884. if (!od)
  885. return -ENOMEM;
  886. type = ovl_path_real(file->f_path.dentry, &realpath);
  887. realfile = ovl_dir_open_realfile(file, &realpath);
  888. if (IS_ERR(realfile)) {
  889. kfree(od);
  890. return PTR_ERR(realfile);
  891. }
  892. od->realfile = realfile;
  893. od->is_real = ovl_dir_is_real(inode);
  894. od->is_upper = OVL_TYPE_UPPER(type);
  895. file->private_data = od;
  896. return 0;
  897. }
  898. WRAP_DIR_ITER(ovl_iterate) // FIXME!
  899. const struct file_operations ovl_dir_operations = {
  900. .read = generic_read_dir,
  901. .open = ovl_dir_open,
  902. .iterate_shared = shared_ovl_iterate,
  903. .llseek = ovl_dir_llseek,
  904. .fsync = ovl_dir_fsync,
  905. .release = ovl_dir_release,
  906. .setlease = generic_setlease,
  907. };
  908. int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list)
  909. {
  910. int err;
  911. struct ovl_cache_entry *p, *n;
  912. struct rb_root root = RB_ROOT;
  913. with_ovl_creds(dentry->d_sb)
  914. err = ovl_dir_read_merged(dentry, list, &root);
  915. if (err)
  916. return err;
  917. err = 0;
  918. list_for_each_entry_safe(p, n, list, l_node) {
  919. /*
  920. * Select whiteouts in upperdir, they should
  921. * be cleared when deleting this directory.
  922. */
  923. if (p->is_whiteout) {
  924. if (p->is_upper)
  925. continue;
  926. goto del_entry;
  927. }
  928. if (name_is_dot_dotdot(p->name, p->len))
  929. goto del_entry;
  930. err = -ENOTEMPTY;
  931. break;
  932. del_entry:
  933. list_del(&p->l_node);
  934. ovl_cache_entry_free(p);
  935. }
  936. return err;
  937. }
  938. void ovl_cleanup_whiteouts(struct ovl_fs *ofs, struct dentry *upper,
  939. struct list_head *list)
  940. {
  941. struct ovl_cache_entry *p;
  942. list_for_each_entry(p, list, l_node) {
  943. struct dentry *dentry;
  944. if (WARN_ON(!p->is_whiteout || !p->is_upper))
  945. continue;
  946. dentry = ovl_lookup_upper_unlocked(ofs, p->name, upper, p->len);
  947. if (IS_ERR(dentry)) {
  948. pr_err("lookup '%s/%.*s' failed (%i)\n",
  949. upper->d_name.name, p->len, p->name,
  950. (int) PTR_ERR(dentry));
  951. continue;
  952. }
  953. if (dentry->d_inode)
  954. ovl_cleanup(ofs, upper, dentry);
  955. dput(dentry);
  956. }
  957. }
  958. static bool ovl_check_d_type(struct dir_context *ctx, const char *name,
  959. int namelen, loff_t offset, u64 ino,
  960. unsigned int d_type)
  961. {
  962. struct ovl_readdir_data *rdd =
  963. container_of(ctx, struct ovl_readdir_data, ctx);
  964. /* Even if d_type is not supported, DT_DIR is returned for . and .. */
  965. if (name_is_dot_dotdot(name, namelen))
  966. return true;
  967. if (d_type != DT_UNKNOWN)
  968. rdd->d_type_supported = true;
  969. return true;
  970. }
  971. /*
  972. * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
  973. * if error is encountered.
  974. */
  975. int ovl_check_d_type_supported(const struct path *realpath)
  976. {
  977. int err;
  978. struct ovl_readdir_data rdd = {
  979. .ctx.actor = ovl_check_d_type,
  980. .ctx.count = INT_MAX,
  981. .d_type_supported = false,
  982. };
  983. err = ovl_dir_read(realpath, &rdd);
  984. if (err)
  985. return err;
  986. return rdd.d_type_supported;
  987. }
  988. #define OVL_INCOMPATDIR_NAME "incompat"
  989. static int ovl_workdir_cleanup_recurse(struct ovl_fs *ofs, const struct path *path,
  990. int level)
  991. {
  992. int err;
  993. LIST_HEAD(list);
  994. struct ovl_cache_entry *p;
  995. struct ovl_readdir_data rdd = {
  996. .ctx.actor = ovl_fill_plain,
  997. .ctx.count = INT_MAX,
  998. .list = &list,
  999. };
  1000. bool incompat = false;
  1001. /*
  1002. * The "work/incompat" directory is treated specially - if it is not
  1003. * empty, instead of printing a generic error and mounting read-only,
  1004. * we will error about incompat features and fail the mount.
  1005. *
  1006. * When called from ovl_indexdir_cleanup(), path->dentry->d_name.name
  1007. * starts with '#'.
  1008. */
  1009. if (level == 2 &&
  1010. !strcmp(path->dentry->d_name.name, OVL_INCOMPATDIR_NAME))
  1011. incompat = true;
  1012. err = ovl_dir_read(path, &rdd);
  1013. if (err)
  1014. goto out;
  1015. list_for_each_entry(p, &list, l_node) {
  1016. struct dentry *dentry;
  1017. if (name_is_dot_dotdot(p->name, p->len)) {
  1018. continue;
  1019. } else if (incompat) {
  1020. pr_err("overlay with incompat feature '%s' cannot be mounted\n",
  1021. p->name);
  1022. err = -EINVAL;
  1023. break;
  1024. }
  1025. dentry = ovl_lookup_upper_unlocked(ofs, p->name, path->dentry, p->len);
  1026. if (IS_ERR(dentry))
  1027. continue;
  1028. if (dentry->d_inode)
  1029. err = ovl_workdir_cleanup(ofs, path->dentry, path->mnt,
  1030. dentry, level);
  1031. dput(dentry);
  1032. if (err)
  1033. break;
  1034. }
  1035. out:
  1036. ovl_cache_free(&list);
  1037. return err;
  1038. }
  1039. int ovl_workdir_cleanup(struct ovl_fs *ofs, struct dentry *parent,
  1040. struct vfsmount *mnt, struct dentry *dentry, int level)
  1041. {
  1042. int err;
  1043. if (!d_is_dir(dentry) || level > 1)
  1044. return ovl_cleanup(ofs, parent, dentry);
  1045. dentry = start_removing_dentry(parent, dentry);
  1046. if (IS_ERR(dentry))
  1047. return PTR_ERR(dentry);
  1048. err = ovl_do_rmdir(ofs, parent->d_inode, dentry);
  1049. end_removing(dentry);
  1050. if (err) {
  1051. struct path path = { .mnt = mnt, .dentry = dentry };
  1052. err = ovl_workdir_cleanup_recurse(ofs, &path, level + 1);
  1053. if (!err)
  1054. err = ovl_cleanup(ofs, parent, dentry);
  1055. }
  1056. return err;
  1057. }
  1058. int ovl_indexdir_cleanup(struct ovl_fs *ofs)
  1059. {
  1060. int err;
  1061. struct dentry *indexdir = ofs->workdir;
  1062. struct dentry *index = NULL;
  1063. struct path path = { .mnt = ovl_upper_mnt(ofs), .dentry = indexdir };
  1064. LIST_HEAD(list);
  1065. struct ovl_cache_entry *p;
  1066. struct ovl_readdir_data rdd = {
  1067. .ctx.actor = ovl_fill_plain,
  1068. .ctx.count = INT_MAX,
  1069. .list = &list,
  1070. };
  1071. err = ovl_dir_read(&path, &rdd);
  1072. if (err)
  1073. goto out;
  1074. list_for_each_entry(p, &list, l_node) {
  1075. if (name_is_dot_dotdot(p->name, p->len))
  1076. continue;
  1077. index = ovl_lookup_upper_unlocked(ofs, p->name, indexdir, p->len);
  1078. if (IS_ERR(index)) {
  1079. err = PTR_ERR(index);
  1080. index = NULL;
  1081. break;
  1082. }
  1083. /* Cleanup leftover from index create/cleanup attempt */
  1084. if (index->d_name.name[0] == '#') {
  1085. err = ovl_workdir_cleanup(ofs, indexdir, path.mnt, index, 1);
  1086. if (err)
  1087. break;
  1088. goto next;
  1089. }
  1090. err = ovl_verify_index(ofs, index);
  1091. if (!err) {
  1092. goto next;
  1093. } else if (err == -ESTALE) {
  1094. /* Cleanup stale index entries */
  1095. err = ovl_cleanup(ofs, indexdir, index);
  1096. } else if (err != -ENOENT) {
  1097. /*
  1098. * Abort mount to avoid corrupting the index if
  1099. * an incompatible index entry was found or on out
  1100. * of memory.
  1101. */
  1102. break;
  1103. } else if (ofs->config.nfs_export) {
  1104. /*
  1105. * Whiteout orphan index to block future open by
  1106. * handle after overlay nlink dropped to zero.
  1107. */
  1108. err = ovl_cleanup_and_whiteout(ofs, indexdir, index);
  1109. } else {
  1110. /* Cleanup orphan index entries */
  1111. err = ovl_cleanup(ofs, indexdir, index);
  1112. }
  1113. if (err)
  1114. break;
  1115. next:
  1116. dput(index);
  1117. index = NULL;
  1118. }
  1119. dput(index);
  1120. out:
  1121. ovl_cache_free(&list);
  1122. if (err)
  1123. pr_err("failed index dir cleanup (%i)\n", err);
  1124. return err;
  1125. }