file.c 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * fs/kernfs/file.c - kernfs file implementation
  4. *
  5. * Copyright (c) 2001-3 Patrick Mochel
  6. * Copyright (c) 2007 SUSE Linux Products GmbH
  7. * Copyright (c) 2007, 2013 Tejun Heo <tj@kernel.org>
  8. */
  9. #include <linux/fs.h>
  10. #include <linux/seq_file.h>
  11. #include <linux/slab.h>
  12. #include <linux/poll.h>
  13. #include <linux/pagemap.h>
  14. #include <linux/sched/mm.h>
  15. #include <linux/fsnotify.h>
  16. #include <linux/uio.h>
  17. #include "kernfs-internal.h"
  18. struct kernfs_open_node {
  19. struct rcu_head rcu_head;
  20. atomic_t event;
  21. wait_queue_head_t poll;
  22. struct list_head files; /* goes through kernfs_open_file.list */
  23. unsigned int nr_mmapped;
  24. unsigned int nr_to_release;
  25. };
  26. /*
  27. * kernfs_notify() may be called from any context and bounces notifications
  28. * through a work item. To minimize space overhead in kernfs_node, the
  29. * pending queue is implemented as a singly linked list of kernfs_nodes.
  30. * The list is terminated with the self pointer so that whether a
  31. * kernfs_node is on the list or not can be determined by testing the next
  32. * pointer for %NULL.
  33. */
  34. #define KERNFS_NOTIFY_EOL ((void *)&kernfs_notify_list)
  35. static DEFINE_SPINLOCK(kernfs_notify_lock);
  36. static struct kernfs_node *kernfs_notify_list = KERNFS_NOTIFY_EOL;
  37. static inline struct mutex *kernfs_open_file_mutex_ptr(struct kernfs_node *kn)
  38. {
  39. int idx = hash_ptr(kn, NR_KERNFS_LOCK_BITS);
  40. return &kernfs_locks->open_file_mutex[idx];
  41. }
  42. static inline struct mutex *kernfs_open_file_mutex_lock(struct kernfs_node *kn)
  43. {
  44. struct mutex *lock;
  45. lock = kernfs_open_file_mutex_ptr(kn);
  46. mutex_lock(lock);
  47. return lock;
  48. }
  49. /**
  50. * of_on - Get the kernfs_open_node of the specified kernfs_open_file
  51. * @of: target kernfs_open_file
  52. *
  53. * Return: the kernfs_open_node of the kernfs_open_file
  54. */
  55. static struct kernfs_open_node *of_on(struct kernfs_open_file *of)
  56. {
  57. return rcu_dereference_protected(of->kn->attr.open,
  58. !list_empty(&of->list));
  59. }
  60. /* Get active reference to kernfs node for an open file */
  61. static struct kernfs_open_file *kernfs_get_active_of(struct kernfs_open_file *of)
  62. {
  63. /* Skip if file was already released */
  64. if (unlikely(of->released))
  65. return NULL;
  66. if (!kernfs_get_active(of->kn))
  67. return NULL;
  68. return of;
  69. }
  70. static void kernfs_put_active_of(struct kernfs_open_file *of)
  71. {
  72. return kernfs_put_active(of->kn);
  73. }
  74. /**
  75. * kernfs_deref_open_node_locked - Get kernfs_open_node corresponding to @kn
  76. *
  77. * @kn: target kernfs_node.
  78. *
  79. * Fetch and return ->attr.open of @kn when caller holds the
  80. * kernfs_open_file_mutex_ptr(kn).
  81. *
  82. * Update of ->attr.open happens under kernfs_open_file_mutex_ptr(kn). So when
  83. * the caller guarantees that this mutex is being held, other updaters can't
  84. * change ->attr.open and this means that we can safely deref ->attr.open
  85. * outside RCU read-side critical section.
  86. *
  87. * The caller needs to make sure that kernfs_open_file_mutex is held.
  88. *
  89. * Return: @kn->attr.open when kernfs_open_file_mutex is held.
  90. */
  91. static struct kernfs_open_node *
  92. kernfs_deref_open_node_locked(struct kernfs_node *kn)
  93. {
  94. return rcu_dereference_protected(kn->attr.open,
  95. lockdep_is_held(kernfs_open_file_mutex_ptr(kn)));
  96. }
  97. static struct kernfs_open_file *kernfs_of(struct file *file)
  98. {
  99. return ((struct seq_file *)file->private_data)->private;
  100. }
  101. /*
  102. * Determine the kernfs_ops for the given kernfs_node. This function must
  103. * be called while holding an active reference.
  104. */
  105. static const struct kernfs_ops *kernfs_ops(struct kernfs_node *kn)
  106. {
  107. if (kn->flags & KERNFS_LOCKDEP)
  108. lockdep_assert_held(kn);
  109. return kn->attr.ops;
  110. }
  111. /*
  112. * As kernfs_seq_stop() is also called after kernfs_seq_start() or
  113. * kernfs_seq_next() failure, it needs to distinguish whether it's stopping
  114. * a seq_file iteration which is fully initialized with an active reference
  115. * or an aborted kernfs_seq_start() due to get_active failure. The
  116. * position pointer is the only context for each seq_file iteration and
  117. * thus the stop condition should be encoded in it. As the return value is
  118. * directly visible to userland, ERR_PTR(-ENODEV) is the only acceptable
  119. * choice to indicate get_active failure.
  120. *
  121. * Unfortunately, this is complicated due to the optional custom seq_file
  122. * operations which may return ERR_PTR(-ENODEV) too. kernfs_seq_stop()
  123. * can't distinguish whether ERR_PTR(-ENODEV) is from get_active failure or
  124. * custom seq_file operations and thus can't decide whether put_active
  125. * should be performed or not only on ERR_PTR(-ENODEV).
  126. *
  127. * This is worked around by factoring out the custom seq_stop() and
  128. * put_active part into kernfs_seq_stop_active(), skipping it from
  129. * kernfs_seq_stop() if ERR_PTR(-ENODEV) while invoking it directly after
  130. * custom seq_file operations fail with ERR_PTR(-ENODEV) - this ensures
  131. * that kernfs_seq_stop_active() is skipped only after get_active failure.
  132. */
  133. static void kernfs_seq_stop_active(struct seq_file *sf, void *v)
  134. {
  135. struct kernfs_open_file *of = sf->private;
  136. const struct kernfs_ops *ops = kernfs_ops(of->kn);
  137. if (ops->seq_stop)
  138. ops->seq_stop(sf, v);
  139. kernfs_put_active_of(of);
  140. }
  141. static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
  142. {
  143. struct kernfs_open_file *of = sf->private;
  144. const struct kernfs_ops *ops;
  145. /*
  146. * @of->mutex nests outside active ref and is primarily to ensure that
  147. * the ops aren't called concurrently for the same open file.
  148. */
  149. mutex_lock(&of->mutex);
  150. if (!kernfs_get_active_of(of))
  151. return ERR_PTR(-ENODEV);
  152. ops = kernfs_ops(of->kn);
  153. if (ops->seq_start) {
  154. void *next = ops->seq_start(sf, ppos);
  155. /* see the comment above kernfs_seq_stop_active() */
  156. if (next == ERR_PTR(-ENODEV))
  157. kernfs_seq_stop_active(sf, next);
  158. return next;
  159. }
  160. return single_start(sf, ppos);
  161. }
  162. static void *kernfs_seq_next(struct seq_file *sf, void *v, loff_t *ppos)
  163. {
  164. struct kernfs_open_file *of = sf->private;
  165. const struct kernfs_ops *ops = kernfs_ops(of->kn);
  166. if (ops->seq_next) {
  167. void *next = ops->seq_next(sf, v, ppos);
  168. /* see the comment above kernfs_seq_stop_active() */
  169. if (next == ERR_PTR(-ENODEV))
  170. kernfs_seq_stop_active(sf, next);
  171. return next;
  172. } else {
  173. /*
  174. * The same behavior and code as single_open(), always
  175. * terminate after the initial read.
  176. */
  177. ++*ppos;
  178. return NULL;
  179. }
  180. }
  181. static void kernfs_seq_stop(struct seq_file *sf, void *v)
  182. {
  183. struct kernfs_open_file *of = sf->private;
  184. if (v != ERR_PTR(-ENODEV))
  185. kernfs_seq_stop_active(sf, v);
  186. mutex_unlock(&of->mutex);
  187. }
  188. static int kernfs_seq_show(struct seq_file *sf, void *v)
  189. {
  190. struct kernfs_open_file *of = sf->private;
  191. of->event = atomic_read(&of_on(of)->event);
  192. return of->kn->attr.ops->seq_show(sf, v);
  193. }
  194. static const struct seq_operations kernfs_seq_ops = {
  195. .start = kernfs_seq_start,
  196. .next = kernfs_seq_next,
  197. .stop = kernfs_seq_stop,
  198. .show = kernfs_seq_show,
  199. };
  200. /*
  201. * As reading a bin file can have side-effects, the exact offset and bytes
  202. * specified in read(2) call should be passed to the read callback making
  203. * it difficult to use seq_file. Implement simplistic custom buffering for
  204. * bin files.
  205. */
  206. static ssize_t kernfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  207. {
  208. struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
  209. ssize_t len = min_t(size_t, iov_iter_count(iter), PAGE_SIZE);
  210. const struct kernfs_ops *ops;
  211. char *buf;
  212. buf = of->prealloc_buf;
  213. if (buf)
  214. mutex_lock(&of->prealloc_mutex);
  215. else
  216. buf = kmalloc(len, GFP_KERNEL);
  217. if (!buf)
  218. return -ENOMEM;
  219. /*
  220. * @of->mutex nests outside active ref and is used both to ensure that
  221. * the ops aren't called concurrently for the same open file.
  222. */
  223. mutex_lock(&of->mutex);
  224. if (!kernfs_get_active_of(of)) {
  225. len = -ENODEV;
  226. mutex_unlock(&of->mutex);
  227. goto out_free;
  228. }
  229. of->event = atomic_read(&of_on(of)->event);
  230. ops = kernfs_ops(of->kn);
  231. if (ops->read)
  232. len = ops->read(of, buf, len, iocb->ki_pos);
  233. else
  234. len = -EINVAL;
  235. kernfs_put_active_of(of);
  236. mutex_unlock(&of->mutex);
  237. if (len < 0)
  238. goto out_free;
  239. if (copy_to_iter(buf, len, iter) != len) {
  240. len = -EFAULT;
  241. goto out_free;
  242. }
  243. iocb->ki_pos += len;
  244. out_free:
  245. if (buf == of->prealloc_buf)
  246. mutex_unlock(&of->prealloc_mutex);
  247. else
  248. kfree(buf);
  249. return len;
  250. }
  251. static ssize_t kernfs_fop_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  252. {
  253. if (kernfs_of(iocb->ki_filp)->kn->flags & KERNFS_HAS_SEQ_SHOW)
  254. return seq_read_iter(iocb, iter);
  255. return kernfs_file_read_iter(iocb, iter);
  256. }
  257. /*
  258. * Copy data in from userland and pass it to the matching kernfs write
  259. * operation.
  260. *
  261. * There is no easy way for us to know if userspace is only doing a partial
  262. * write, so we don't support them. We expect the entire buffer to come on
  263. * the first write. Hint: if you're writing a value, first read the file,
  264. * modify only the value you're changing, then write entire buffer
  265. * back.
  266. */
  267. static ssize_t kernfs_fop_write_iter(struct kiocb *iocb, struct iov_iter *iter)
  268. {
  269. struct kernfs_open_file *of = kernfs_of(iocb->ki_filp);
  270. ssize_t len = iov_iter_count(iter);
  271. const struct kernfs_ops *ops;
  272. char *buf;
  273. if (of->atomic_write_len) {
  274. if (len > of->atomic_write_len)
  275. return -E2BIG;
  276. } else {
  277. len = min_t(size_t, len, PAGE_SIZE);
  278. }
  279. buf = of->prealloc_buf;
  280. if (buf)
  281. mutex_lock(&of->prealloc_mutex);
  282. else
  283. buf = kmalloc(len + 1, GFP_KERNEL);
  284. if (!buf)
  285. return -ENOMEM;
  286. if (copy_from_iter(buf, len, iter) != len) {
  287. len = -EFAULT;
  288. goto out_free;
  289. }
  290. buf[len] = '\0'; /* guarantee string termination */
  291. /*
  292. * @of->mutex nests outside active ref and is used both to ensure that
  293. * the ops aren't called concurrently for the same open file.
  294. */
  295. mutex_lock(&of->mutex);
  296. if (!kernfs_get_active_of(of)) {
  297. mutex_unlock(&of->mutex);
  298. len = -ENODEV;
  299. goto out_free;
  300. }
  301. ops = kernfs_ops(of->kn);
  302. if (ops->write)
  303. len = ops->write(of, buf, len, iocb->ki_pos);
  304. else
  305. len = -EINVAL;
  306. kernfs_put_active_of(of);
  307. mutex_unlock(&of->mutex);
  308. if (len > 0)
  309. iocb->ki_pos += len;
  310. out_free:
  311. if (buf == of->prealloc_buf)
  312. mutex_unlock(&of->prealloc_mutex);
  313. else
  314. kfree(buf);
  315. return len;
  316. }
  317. static void kernfs_vma_open(struct vm_area_struct *vma)
  318. {
  319. struct file *file = vma->vm_file;
  320. struct kernfs_open_file *of = kernfs_of(file);
  321. if (!of->vm_ops)
  322. return;
  323. if (!kernfs_get_active_of(of))
  324. return;
  325. if (of->vm_ops->open)
  326. of->vm_ops->open(vma);
  327. kernfs_put_active_of(of);
  328. }
  329. static vm_fault_t kernfs_vma_fault(struct vm_fault *vmf)
  330. {
  331. struct file *file = vmf->vma->vm_file;
  332. struct kernfs_open_file *of = kernfs_of(file);
  333. vm_fault_t ret;
  334. if (!of->vm_ops)
  335. return VM_FAULT_SIGBUS;
  336. if (!kernfs_get_active_of(of))
  337. return VM_FAULT_SIGBUS;
  338. ret = VM_FAULT_SIGBUS;
  339. if (of->vm_ops->fault)
  340. ret = of->vm_ops->fault(vmf);
  341. kernfs_put_active_of(of);
  342. return ret;
  343. }
  344. static vm_fault_t kernfs_vma_page_mkwrite(struct vm_fault *vmf)
  345. {
  346. struct file *file = vmf->vma->vm_file;
  347. struct kernfs_open_file *of = kernfs_of(file);
  348. vm_fault_t ret;
  349. if (!of->vm_ops)
  350. return VM_FAULT_SIGBUS;
  351. if (!kernfs_get_active_of(of))
  352. return VM_FAULT_SIGBUS;
  353. ret = 0;
  354. if (of->vm_ops->page_mkwrite)
  355. ret = of->vm_ops->page_mkwrite(vmf);
  356. else
  357. file_update_time(file);
  358. kernfs_put_active_of(of);
  359. return ret;
  360. }
  361. static int kernfs_vma_access(struct vm_area_struct *vma, unsigned long addr,
  362. void *buf, int len, int write)
  363. {
  364. struct file *file = vma->vm_file;
  365. struct kernfs_open_file *of = kernfs_of(file);
  366. int ret;
  367. if (!of->vm_ops)
  368. return -EINVAL;
  369. if (!kernfs_get_active_of(of))
  370. return -EINVAL;
  371. ret = -EINVAL;
  372. if (of->vm_ops->access)
  373. ret = of->vm_ops->access(vma, addr, buf, len, write);
  374. kernfs_put_active_of(of);
  375. return ret;
  376. }
  377. static const struct vm_operations_struct kernfs_vm_ops = {
  378. .open = kernfs_vma_open,
  379. .fault = kernfs_vma_fault,
  380. .page_mkwrite = kernfs_vma_page_mkwrite,
  381. .access = kernfs_vma_access,
  382. };
  383. static int kernfs_fop_mmap(struct file *file, struct vm_area_struct *vma)
  384. {
  385. struct kernfs_open_file *of = kernfs_of(file);
  386. const struct kernfs_ops *ops;
  387. int rc;
  388. /*
  389. * mmap path and of->mutex are prone to triggering spurious lockdep
  390. * warnings and we don't want to add spurious locking dependency
  391. * between the two. Check whether mmap is actually implemented
  392. * without grabbing @of->mutex by testing HAS_MMAP flag. See the
  393. * comment in kernfs_fop_open() for more details.
  394. */
  395. if (!(of->kn->flags & KERNFS_HAS_MMAP))
  396. return -ENODEV;
  397. mutex_lock(&of->mutex);
  398. rc = -ENODEV;
  399. if (!kernfs_get_active_of(of))
  400. goto out_unlock;
  401. ops = kernfs_ops(of->kn);
  402. rc = ops->mmap(of, vma);
  403. if (rc)
  404. goto out_put;
  405. /*
  406. * PowerPC's pci_mmap of legacy_mem uses shmem_zero_setup()
  407. * to satisfy versions of X which crash if the mmap fails: that
  408. * substitutes a new vm_file, and we don't then want bin_vm_ops.
  409. */
  410. if (vma->vm_file != file)
  411. goto out_put;
  412. rc = -EINVAL;
  413. if (of->mmapped && of->vm_ops != vma->vm_ops)
  414. goto out_put;
  415. /*
  416. * It is not possible to successfully wrap close.
  417. * So error if someone is trying to use close.
  418. */
  419. if (vma->vm_ops && vma->vm_ops->close)
  420. goto out_put;
  421. rc = 0;
  422. if (!of->mmapped) {
  423. of->mmapped = true;
  424. of_on(of)->nr_mmapped++;
  425. of->vm_ops = vma->vm_ops;
  426. }
  427. vma->vm_ops = &kernfs_vm_ops;
  428. out_put:
  429. kernfs_put_active_of(of);
  430. out_unlock:
  431. mutex_unlock(&of->mutex);
  432. return rc;
  433. }
  434. /**
  435. * kernfs_get_open_node - get or create kernfs_open_node
  436. * @kn: target kernfs_node
  437. * @of: kernfs_open_file for this instance of open
  438. *
  439. * If @kn->attr.open exists, increment its reference count; otherwise,
  440. * create one. @of is chained to the files list.
  441. *
  442. * Locking:
  443. * Kernel thread context (may sleep).
  444. *
  445. * Return:
  446. * %0 on success, -errno on failure.
  447. */
  448. static int kernfs_get_open_node(struct kernfs_node *kn,
  449. struct kernfs_open_file *of)
  450. {
  451. struct kernfs_open_node *on;
  452. struct mutex *mutex;
  453. mutex = kernfs_open_file_mutex_lock(kn);
  454. on = kernfs_deref_open_node_locked(kn);
  455. if (!on) {
  456. /* not there, initialize a new one */
  457. on = kzalloc_obj(*on);
  458. if (!on) {
  459. mutex_unlock(mutex);
  460. return -ENOMEM;
  461. }
  462. atomic_set(&on->event, 1);
  463. init_waitqueue_head(&on->poll);
  464. INIT_LIST_HEAD(&on->files);
  465. rcu_assign_pointer(kn->attr.open, on);
  466. }
  467. list_add_tail(&of->list, &on->files);
  468. if (kn->flags & KERNFS_HAS_RELEASE)
  469. on->nr_to_release++;
  470. mutex_unlock(mutex);
  471. return 0;
  472. }
  473. /**
  474. * kernfs_unlink_open_file - Unlink @of from @kn.
  475. *
  476. * @kn: target kernfs_node
  477. * @of: associated kernfs_open_file
  478. * @open_failed: ->open() failed, cancel ->release()
  479. *
  480. * Unlink @of from list of @kn's associated open files. If list of
  481. * associated open files becomes empty, disassociate and free
  482. * kernfs_open_node.
  483. *
  484. * LOCKING:
  485. * None.
  486. */
  487. static void kernfs_unlink_open_file(struct kernfs_node *kn,
  488. struct kernfs_open_file *of,
  489. bool open_failed)
  490. {
  491. struct kernfs_open_node *on;
  492. struct mutex *mutex;
  493. mutex = kernfs_open_file_mutex_lock(kn);
  494. on = kernfs_deref_open_node_locked(kn);
  495. if (!on) {
  496. mutex_unlock(mutex);
  497. return;
  498. }
  499. if (of) {
  500. if (kn->flags & KERNFS_HAS_RELEASE) {
  501. WARN_ON_ONCE(of->released == open_failed);
  502. if (open_failed)
  503. on->nr_to_release--;
  504. }
  505. if (of->mmapped)
  506. on->nr_mmapped--;
  507. list_del(&of->list);
  508. }
  509. if (list_empty(&on->files)) {
  510. rcu_assign_pointer(kn->attr.open, NULL);
  511. kfree_rcu(on, rcu_head);
  512. }
  513. mutex_unlock(mutex);
  514. }
  515. static int kernfs_fop_open(struct inode *inode, struct file *file)
  516. {
  517. struct kernfs_node *kn = inode->i_private;
  518. struct kernfs_root *root = kernfs_root(kn);
  519. const struct kernfs_ops *ops;
  520. struct kernfs_open_file *of;
  521. bool has_read, has_write, has_mmap;
  522. int error = -EACCES;
  523. if (!kernfs_get_active(kn))
  524. return -ENODEV;
  525. ops = kernfs_ops(kn);
  526. has_read = ops->seq_show || ops->read || ops->mmap;
  527. has_write = ops->write || ops->mmap;
  528. has_mmap = ops->mmap;
  529. /* see the flag definition for details */
  530. if (root->flags & KERNFS_ROOT_EXTRA_OPEN_PERM_CHECK) {
  531. if ((file->f_mode & FMODE_WRITE) &&
  532. (!(inode->i_mode & S_IWUGO) || !has_write))
  533. goto err_out;
  534. if ((file->f_mode & FMODE_READ) &&
  535. (!(inode->i_mode & S_IRUGO) || !has_read))
  536. goto err_out;
  537. }
  538. /* allocate a kernfs_open_file for the file */
  539. error = -ENOMEM;
  540. of = kzalloc_obj(struct kernfs_open_file);
  541. if (!of)
  542. goto err_out;
  543. /*
  544. * The following is done to give a different lockdep key to
  545. * @of->mutex for files which implement mmap. This is a rather
  546. * crude way to avoid false positive lockdep warning around
  547. * mm->mmap_lock - mmap nests @of->mutex under mm->mmap_lock and
  548. * reading /sys/block/sda/trace/act_mask grabs sr_mutex, under
  549. * which mm->mmap_lock nests, while holding @of->mutex. As each
  550. * open file has a separate mutex, it's okay as long as those don't
  551. * happen on the same file. At this point, we can't easily give
  552. * each file a separate locking class. Let's differentiate on
  553. * whether the file has mmap or not for now.
  554. *
  555. * For similar reasons, writable and readonly files are given different
  556. * lockdep key, because the writable file /sys/power/resume may call vfs
  557. * lookup helpers for arbitrary paths and readonly files can be read by
  558. * overlayfs from vfs helpers when sysfs is a lower layer of overalyfs.
  559. *
  560. * All three cases look the same. They're supposed to
  561. * look that way and give @of->mutex different static lockdep keys.
  562. */
  563. if (has_mmap)
  564. mutex_init(&of->mutex);
  565. else if (file->f_mode & FMODE_WRITE)
  566. mutex_init(&of->mutex);
  567. else
  568. mutex_init(&of->mutex);
  569. of->kn = kn;
  570. of->file = file;
  571. /*
  572. * Write path needs to atomic_write_len outside active reference.
  573. * Cache it in open_file. See kernfs_fop_write_iter() for details.
  574. */
  575. of->atomic_write_len = ops->atomic_write_len;
  576. error = -EINVAL;
  577. /*
  578. * ->seq_show is incompatible with ->prealloc,
  579. * as seq_read does its own allocation.
  580. * ->read must be used instead.
  581. */
  582. if (ops->prealloc && ops->seq_show)
  583. goto err_free;
  584. if (ops->prealloc) {
  585. int len = of->atomic_write_len ?: PAGE_SIZE;
  586. of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
  587. error = -ENOMEM;
  588. if (!of->prealloc_buf)
  589. goto err_free;
  590. mutex_init(&of->prealloc_mutex);
  591. }
  592. /*
  593. * Always instantiate seq_file even if read access doesn't use
  594. * seq_file or is not requested. This unifies private data access
  595. * and readable regular files are the vast majority anyway.
  596. */
  597. if (ops->seq_show)
  598. error = seq_open(file, &kernfs_seq_ops);
  599. else
  600. error = seq_open(file, NULL);
  601. if (error)
  602. goto err_free;
  603. of->seq_file = file->private_data;
  604. of->seq_file->private = of;
  605. /* seq_file clears PWRITE unconditionally, restore it if WRITE */
  606. if (file->f_mode & FMODE_WRITE)
  607. file->f_mode |= FMODE_PWRITE;
  608. /* make sure we have open node struct */
  609. error = kernfs_get_open_node(kn, of);
  610. if (error)
  611. goto err_seq_release;
  612. if (ops->open) {
  613. /* nobody has access to @of yet, skip @of->mutex */
  614. error = ops->open(of);
  615. if (error)
  616. goto err_put_node;
  617. }
  618. /* open succeeded, put active references */
  619. kernfs_put_active(kn);
  620. return 0;
  621. err_put_node:
  622. kernfs_unlink_open_file(kn, of, true);
  623. err_seq_release:
  624. seq_release(inode, file);
  625. err_free:
  626. kfree(of->prealloc_buf);
  627. kfree(of);
  628. err_out:
  629. kernfs_put_active(kn);
  630. return error;
  631. }
  632. /* used from release/drain to ensure that ->release() is called exactly once */
  633. static void kernfs_release_file(struct kernfs_node *kn,
  634. struct kernfs_open_file *of)
  635. {
  636. /*
  637. * @of is guaranteed to have no other file operations in flight and
  638. * we just want to synchronize release and drain paths.
  639. * @kernfs_open_file_mutex_ptr(kn) is enough. @of->mutex can't be used
  640. * here because drain path may be called from places which can
  641. * cause circular dependency.
  642. */
  643. lockdep_assert_held(kernfs_open_file_mutex_ptr(kn));
  644. if (!of->released) {
  645. /*
  646. * A file is never detached without being released and we
  647. * need to be able to release files which are deactivated
  648. * and being drained. Don't use kernfs_ops().
  649. */
  650. kn->attr.ops->release(of);
  651. of->released = true;
  652. of_on(of)->nr_to_release--;
  653. }
  654. }
  655. static int kernfs_fop_release(struct inode *inode, struct file *filp)
  656. {
  657. struct kernfs_node *kn = inode->i_private;
  658. struct kernfs_open_file *of = kernfs_of(filp);
  659. if (kn->flags & KERNFS_HAS_RELEASE) {
  660. struct mutex *mutex;
  661. mutex = kernfs_open_file_mutex_lock(kn);
  662. kernfs_release_file(kn, of);
  663. mutex_unlock(mutex);
  664. }
  665. kernfs_unlink_open_file(kn, of, false);
  666. seq_release(inode, filp);
  667. kfree(of->prealloc_buf);
  668. kfree(of);
  669. return 0;
  670. }
  671. bool kernfs_should_drain_open_files(struct kernfs_node *kn)
  672. {
  673. struct kernfs_open_node *on;
  674. bool ret;
  675. /*
  676. * @kn being deactivated guarantees that @kn->attr.open can't change
  677. * beneath us making the lockless test below safe.
  678. * Callers post kernfs_unbreak_active_protection may be counted in
  679. * kn->active by now, do not WARN_ON because of them.
  680. */
  681. rcu_read_lock();
  682. on = rcu_dereference(kn->attr.open);
  683. ret = on && (on->nr_mmapped || on->nr_to_release);
  684. rcu_read_unlock();
  685. return ret;
  686. }
  687. void kernfs_drain_open_files(struct kernfs_node *kn)
  688. {
  689. struct kernfs_open_node *on;
  690. struct kernfs_open_file *of;
  691. struct mutex *mutex;
  692. mutex = kernfs_open_file_mutex_lock(kn);
  693. on = kernfs_deref_open_node_locked(kn);
  694. if (!on) {
  695. mutex_unlock(mutex);
  696. return;
  697. }
  698. list_for_each_entry(of, &on->files, list) {
  699. struct inode *inode = file_inode(of->file);
  700. if (of->mmapped) {
  701. unmap_mapping_range(inode->i_mapping, 0, 0, 1);
  702. of->mmapped = false;
  703. on->nr_mmapped--;
  704. }
  705. if (kn->flags & KERNFS_HAS_RELEASE)
  706. kernfs_release_file(kn, of);
  707. }
  708. WARN_ON_ONCE(on->nr_mmapped || on->nr_to_release);
  709. mutex_unlock(mutex);
  710. }
  711. /*
  712. * Kernfs attribute files are pollable. The idea is that you read
  713. * the content and then you use 'poll' or 'select' to wait for
  714. * the content to change. When the content changes (assuming the
  715. * manager for the kobject supports notification), poll will
  716. * return EPOLLERR|EPOLLPRI, and select will return the fd whether
  717. * it is waiting for read, write, or exceptions.
  718. * Once poll/select indicates that the value has changed, you
  719. * need to close and re-open the file, or seek to 0 and read again.
  720. * Reminder: this only works for attributes which actively support
  721. * it, and it is not possible to test an attribute from userspace
  722. * to see if it supports poll (Neither 'poll' nor 'select' return
  723. * an appropriate error code). When in doubt, set a suitable timeout value.
  724. */
  725. __poll_t kernfs_generic_poll(struct kernfs_open_file *of, poll_table *wait)
  726. {
  727. struct kernfs_open_node *on = of_on(of);
  728. poll_wait(of->file, &on->poll, wait);
  729. if (of->event != atomic_read(&on->event))
  730. return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
  731. return DEFAULT_POLLMASK;
  732. }
  733. static __poll_t kernfs_fop_poll(struct file *filp, poll_table *wait)
  734. {
  735. struct kernfs_open_file *of = kernfs_of(filp);
  736. struct kernfs_node *kn = kernfs_dentry_node(filp->f_path.dentry);
  737. __poll_t ret;
  738. if (!kernfs_get_active_of(of))
  739. return DEFAULT_POLLMASK|EPOLLERR|EPOLLPRI;
  740. if (kn->attr.ops->poll)
  741. ret = kn->attr.ops->poll(of, wait);
  742. else
  743. ret = kernfs_generic_poll(of, wait);
  744. kernfs_put_active_of(of);
  745. return ret;
  746. }
  747. static loff_t kernfs_fop_llseek(struct file *file, loff_t offset, int whence)
  748. {
  749. struct kernfs_open_file *of = kernfs_of(file);
  750. const struct kernfs_ops *ops;
  751. loff_t ret;
  752. /*
  753. * @of->mutex nests outside active ref and is primarily to ensure that
  754. * the ops aren't called concurrently for the same open file.
  755. */
  756. mutex_lock(&of->mutex);
  757. if (!kernfs_get_active_of(of)) {
  758. mutex_unlock(&of->mutex);
  759. return -ENODEV;
  760. }
  761. ops = kernfs_ops(of->kn);
  762. if (ops->llseek)
  763. ret = ops->llseek(of, offset, whence);
  764. else
  765. ret = generic_file_llseek(file, offset, whence);
  766. kernfs_put_active_of(of);
  767. mutex_unlock(&of->mutex);
  768. return ret;
  769. }
  770. static void kernfs_notify_workfn(struct work_struct *work)
  771. {
  772. struct kernfs_node *kn;
  773. struct kernfs_super_info *info;
  774. struct kernfs_root *root;
  775. repeat:
  776. /* pop one off the notify_list */
  777. spin_lock_irq(&kernfs_notify_lock);
  778. kn = kernfs_notify_list;
  779. if (kn == KERNFS_NOTIFY_EOL) {
  780. spin_unlock_irq(&kernfs_notify_lock);
  781. return;
  782. }
  783. kernfs_notify_list = kn->attr.notify_next;
  784. kn->attr.notify_next = NULL;
  785. spin_unlock_irq(&kernfs_notify_lock);
  786. root = kernfs_root(kn);
  787. /* kick fsnotify */
  788. down_read(&root->kernfs_supers_rwsem);
  789. down_read(&root->kernfs_rwsem);
  790. list_for_each_entry(info, &kernfs_root(kn)->supers, node) {
  791. struct kernfs_node *parent;
  792. struct inode *p_inode = NULL;
  793. const char *kn_name;
  794. struct inode *inode;
  795. struct qstr name;
  796. /*
  797. * We want fsnotify_modify() on @kn but as the
  798. * modifications aren't originating from userland don't
  799. * have the matching @file available. Look up the inodes
  800. * and generate the events manually.
  801. */
  802. inode = ilookup(info->sb, kernfs_ino(kn));
  803. if (!inode)
  804. continue;
  805. kn_name = kernfs_rcu_name(kn);
  806. name = QSTR(kn_name);
  807. parent = kernfs_get_parent(kn);
  808. if (parent) {
  809. p_inode = ilookup(info->sb, kernfs_ino(parent));
  810. if (p_inode) {
  811. fsnotify(FS_MODIFY | FS_EVENT_ON_CHILD,
  812. inode, FSNOTIFY_EVENT_INODE,
  813. p_inode, &name, inode, 0);
  814. iput(p_inode);
  815. }
  816. kernfs_put(parent);
  817. }
  818. if (!p_inode)
  819. fsnotify_inode(inode, FS_MODIFY);
  820. iput(inode);
  821. }
  822. up_read(&root->kernfs_rwsem);
  823. up_read(&root->kernfs_supers_rwsem);
  824. kernfs_put(kn);
  825. goto repeat;
  826. }
  827. /**
  828. * kernfs_notify - notify a kernfs file
  829. * @kn: file to notify
  830. *
  831. * Notify @kn such that poll(2) on @kn wakes up. Maybe be called from any
  832. * context.
  833. */
  834. void kernfs_notify(struct kernfs_node *kn)
  835. {
  836. static DECLARE_WORK(kernfs_notify_work, kernfs_notify_workfn);
  837. unsigned long flags;
  838. struct kernfs_open_node *on;
  839. if (WARN_ON(kernfs_type(kn) != KERNFS_FILE))
  840. return;
  841. /* kick poll immediately */
  842. rcu_read_lock();
  843. on = rcu_dereference(kn->attr.open);
  844. if (on) {
  845. atomic_inc(&on->event);
  846. wake_up_interruptible(&on->poll);
  847. }
  848. rcu_read_unlock();
  849. /* schedule work to kick fsnotify */
  850. spin_lock_irqsave(&kernfs_notify_lock, flags);
  851. if (!kn->attr.notify_next) {
  852. kernfs_get(kn);
  853. kn->attr.notify_next = kernfs_notify_list;
  854. kernfs_notify_list = kn;
  855. schedule_work(&kernfs_notify_work);
  856. }
  857. spin_unlock_irqrestore(&kernfs_notify_lock, flags);
  858. }
  859. EXPORT_SYMBOL_GPL(kernfs_notify);
  860. const struct file_operations kernfs_file_fops = {
  861. .read_iter = kernfs_fop_read_iter,
  862. .write_iter = kernfs_fop_write_iter,
  863. .llseek = kernfs_fop_llseek,
  864. .mmap = kernfs_fop_mmap,
  865. .open = kernfs_fop_open,
  866. .release = kernfs_fop_release,
  867. .poll = kernfs_fop_poll,
  868. .fsync = noop_fsync,
  869. .splice_read = copy_splice_read,
  870. .splice_write = iter_file_splice_write,
  871. };
  872. /**
  873. * __kernfs_create_file - kernfs internal function to create a file
  874. * @parent: directory to create the file in
  875. * @name: name of the file
  876. * @mode: mode of the file
  877. * @uid: uid of the file
  878. * @gid: gid of the file
  879. * @size: size of the file
  880. * @ops: kernfs operations for the file
  881. * @priv: private data for the file
  882. * @ns: optional namespace tag of the file
  883. * @key: lockdep key for the file's active_ref, %NULL to disable lockdep
  884. *
  885. * Return: the created node on success, ERR_PTR() value on error.
  886. */
  887. struct kernfs_node *__kernfs_create_file(struct kernfs_node *parent,
  888. const char *name,
  889. umode_t mode, kuid_t uid, kgid_t gid,
  890. loff_t size,
  891. const struct kernfs_ops *ops,
  892. void *priv, const struct ns_common *ns,
  893. struct lock_class_key *key)
  894. {
  895. struct kernfs_node *kn;
  896. unsigned flags;
  897. int rc;
  898. flags = KERNFS_FILE;
  899. kn = kernfs_new_node(parent, name, (mode & S_IALLUGO) | S_IFREG,
  900. uid, gid, flags);
  901. if (!kn)
  902. return ERR_PTR(-ENOMEM);
  903. kn->attr.ops = ops;
  904. kn->attr.size = size;
  905. kn->ns = ns;
  906. kn->priv = priv;
  907. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  908. if (key) {
  909. lockdep_init_map(&kn->dep_map, "kn->active", key, 0);
  910. kn->flags |= KERNFS_LOCKDEP;
  911. }
  912. #endif
  913. /*
  914. * kn->attr.ops is accessible only while holding active ref. We
  915. * need to know whether some ops are implemented outside active
  916. * ref. Cache their existence in flags.
  917. */
  918. if (ops->seq_show)
  919. kn->flags |= KERNFS_HAS_SEQ_SHOW;
  920. if (ops->mmap)
  921. kn->flags |= KERNFS_HAS_MMAP;
  922. if (ops->release)
  923. kn->flags |= KERNFS_HAS_RELEASE;
  924. rc = kernfs_add_one(kn);
  925. if (rc) {
  926. kernfs_put(kn);
  927. return ERR_PTR(rc);
  928. }
  929. return kn;
  930. }