fserror.c 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /*
  3. * Copyright (c) 2025 Oracle. All Rights Reserved.
  4. * Author: Darrick J. Wong <djwong@kernel.org>
  5. */
  6. #include <linux/fs.h>
  7. #include <linux/fsnotify.h>
  8. #include <linux/mempool.h>
  9. #include <linux/fserror.h>
  10. #define FSERROR_DEFAULT_EVENT_POOL_SIZE (32)
  11. static struct mempool fserror_events_pool;
  12. void fserror_mount(struct super_block *sb)
  13. {
  14. /*
  15. * The pending error counter is biased by 1 so that we don't wake_var
  16. * until we're actually trying to unmount.
  17. */
  18. refcount_set(&sb->s_pending_errors, 1);
  19. }
  20. void fserror_unmount(struct super_block *sb)
  21. {
  22. /*
  23. * If we don't drop the pending error count to zero, then wait for it
  24. * to drop below 1, which means that the pending errors cleared and
  25. * hopefully we didn't saturate with 1 billion+ concurrent events.
  26. */
  27. if (!refcount_dec_and_test(&sb->s_pending_errors))
  28. wait_var_event(&sb->s_pending_errors,
  29. refcount_read(&sb->s_pending_errors) < 1);
  30. }
  31. static inline void fserror_pending_dec(struct super_block *sb)
  32. {
  33. if (refcount_dec_and_test(&sb->s_pending_errors))
  34. wake_up_var(&sb->s_pending_errors);
  35. }
  36. static inline void fserror_free_event(struct fserror_event *event)
  37. {
  38. fserror_pending_dec(event->sb);
  39. mempool_free(event, &fserror_events_pool);
  40. }
  41. static void fserror_worker(struct work_struct *work)
  42. {
  43. struct fserror_event *event =
  44. container_of(work, struct fserror_event, work);
  45. struct super_block *sb = event->sb;
  46. if (sb->s_flags & SB_ACTIVE) {
  47. struct fs_error_report report = {
  48. /* send positive error number to userspace */
  49. .error = -event->error,
  50. .inode = event->inode,
  51. .sb = event->sb,
  52. };
  53. if (sb->s_op->report_error)
  54. sb->s_op->report_error(event);
  55. fsnotify(FS_ERROR, &report, FSNOTIFY_EVENT_ERROR, NULL, NULL,
  56. NULL, 0);
  57. }
  58. iput(event->inode);
  59. fserror_free_event(event);
  60. }
  61. static inline struct fserror_event *fserror_alloc_event(struct super_block *sb,
  62. gfp_t gfp_flags)
  63. {
  64. struct fserror_event *event = NULL;
  65. /*
  66. * If pending_errors already reached zero or is no longer active,
  67. * the superblock is being deactivated so there's no point in
  68. * continuing.
  69. *
  70. * The order of the check of s_pending_errors and SB_ACTIVE are
  71. * mandated by order of accesses in generic_shutdown_super and
  72. * fserror_unmount. Barriers are implicitly provided by the refcount
  73. * manipulations in this function and fserror_unmount.
  74. */
  75. if (!refcount_inc_not_zero(&sb->s_pending_errors))
  76. return NULL;
  77. if (!(sb->s_flags & SB_ACTIVE))
  78. goto out_pending;
  79. event = mempool_alloc(&fserror_events_pool, gfp_flags);
  80. if (!event)
  81. goto out_pending;
  82. /* mempool_alloc doesn't support GFP_ZERO */
  83. memset(event, 0, sizeof(*event));
  84. event->sb = sb;
  85. INIT_WORK(&event->work, fserror_worker);
  86. return event;
  87. out_pending:
  88. fserror_pending_dec(sb);
  89. return NULL;
  90. }
  91. /**
  92. * fserror_report - report a filesystem error of some kind
  93. *
  94. * @sb: superblock of the filesystem
  95. * @inode: inode within that filesystem, if applicable
  96. * @type: type of error encountered
  97. * @pos: start of inode range affected, if applicable
  98. * @len: length of inode range affected, if applicable
  99. * @error: error number encountered, must be negative
  100. * @gfp: memory allocation flags for conveying the event to a worker,
  101. * since this function can be called from atomic contexts
  102. *
  103. * Report details of a filesystem error to the super_operations::report_error
  104. * callback if present; and to fsnotify for distribution to userspace. @sb,
  105. * @gfp, @type, and @error must all be specified. For file I/O errors, the
  106. * @inode, @pos, and @len fields must also be specified. For file metadata
  107. * errors, @inode must be specified. If @inode is not NULL, then @inode->i_sb
  108. * must point to @sb.
  109. *
  110. * Reporting work is deferred to a workqueue to ensure that ->report_error is
  111. * called from process context without any locks held. An active reference to
  112. * the inode is maintained until event handling is complete, and unmount will
  113. * wait for queued events to drain.
  114. */
  115. void fserror_report(struct super_block *sb, struct inode *inode,
  116. enum fserror_type type, loff_t pos, u64 len, int error,
  117. gfp_t gfp)
  118. {
  119. struct fserror_event *event;
  120. /* sb and inode must be from the same filesystem */
  121. WARN_ON_ONCE(inode && inode->i_sb != sb);
  122. /* error number must be negative */
  123. WARN_ON_ONCE(error >= 0);
  124. event = fserror_alloc_event(sb, gfp);
  125. if (!event)
  126. goto lost;
  127. event->type = type;
  128. event->pos = pos;
  129. event->len = len;
  130. event->error = error;
  131. /*
  132. * Can't iput from non-sleeping context, so grabbing another reference
  133. * to the inode must be the last thing before submitting the event.
  134. */
  135. if (inode) {
  136. event->inode = igrab(inode);
  137. if (!event->inode)
  138. goto lost_event;
  139. }
  140. /*
  141. * Use schedule_work here even if we're already in process context so
  142. * that fsnotify and super_operations::report_error implementations are
  143. * guaranteed to run in process context without any locks held. Since
  144. * errors are supposed to be rare, the overhead shouldn't kill us any
  145. * more than the failing device will.
  146. */
  147. schedule_work(&event->work);
  148. return;
  149. lost_event:
  150. fserror_free_event(event);
  151. lost:
  152. if (inode)
  153. pr_err_ratelimited(
  154. "%s: lost file I/O error report for ino %lu type %u pos 0x%llx len 0x%llx error %d",
  155. sb->s_id, inode->i_ino, type, pos, len, error);
  156. else
  157. pr_err_ratelimited(
  158. "%s: lost filesystem error report for type %u error %d",
  159. sb->s_id, type, error);
  160. }
  161. EXPORT_SYMBOL_GPL(fserror_report);
  162. static int __init fserror_init(void)
  163. {
  164. return mempool_init_kmalloc_pool(&fserror_events_pool,
  165. FSERROR_DEFAULT_EVENT_POOL_SIZE,
  166. sizeof(struct fserror_event));
  167. }
  168. fs_initcall(fserror_init);