io.c 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (c) 2016 Trond Myklebust
  4. * Copyright (c) 2019 Jeff Layton
  5. *
  6. * I/O and data path helper functionality.
  7. *
  8. * Heavily borrowed from equivalent code in fs/nfs/io.c
  9. */
  10. #include <linux/ceph/ceph_debug.h>
  11. #include <linux/types.h>
  12. #include <linux/kernel.h>
  13. #include <linux/rwsem.h>
  14. #include <linux/fs.h>
  15. #include "super.h"
  16. #include "io.h"
  17. /* Call with exclusively locked inode->i_rwsem */
  18. static void ceph_block_o_direct(struct ceph_inode_info *ci, struct inode *inode)
  19. {
  20. bool is_odirect;
  21. lockdep_assert_held_write(&inode->i_rwsem);
  22. spin_lock(&ci->i_ceph_lock);
  23. /* ensure that bit state is consistent */
  24. smp_mb__before_atomic();
  25. is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
  26. if (is_odirect) {
  27. clear_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
  28. /* ensure modified bit is visible */
  29. smp_mb__after_atomic();
  30. }
  31. spin_unlock(&ci->i_ceph_lock);
  32. if (is_odirect)
  33. inode_dio_wait(inode);
  34. }
  35. /**
  36. * ceph_start_io_read - declare the file is being used for buffered reads
  37. * @inode: file inode
  38. *
  39. * Declare that a buffered read operation is about to start, and ensure
  40. * that we block all direct I/O.
  41. * On exit, the function ensures that the CEPH_I_ODIRECT flag is unset,
  42. * and holds a shared lock on inode->i_rwsem to ensure that the flag
  43. * cannot be changed.
  44. * In practice, this means that buffered read operations are allowed to
  45. * execute in parallel, thanks to the shared lock, whereas direct I/O
  46. * operations need to wait to grab an exclusive lock in order to set
  47. * CEPH_I_ODIRECT.
  48. * Note that buffered writes and truncates both take a write lock on
  49. * inode->i_rwsem, meaning that those are serialised w.r.t. the reads.
  50. */
  51. int ceph_start_io_read(struct inode *inode)
  52. {
  53. struct ceph_inode_info *ci = ceph_inode(inode);
  54. bool is_odirect;
  55. int err;
  56. /* Be an optimist! */
  57. err = down_read_killable(&inode->i_rwsem);
  58. if (err)
  59. return err;
  60. spin_lock(&ci->i_ceph_lock);
  61. /* ensure that bit state is consistent */
  62. smp_mb__before_atomic();
  63. is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
  64. spin_unlock(&ci->i_ceph_lock);
  65. if (!is_odirect)
  66. return 0;
  67. up_read(&inode->i_rwsem);
  68. /* Slow path.... */
  69. err = down_write_killable(&inode->i_rwsem);
  70. if (err)
  71. return err;
  72. ceph_block_o_direct(ci, inode);
  73. downgrade_write(&inode->i_rwsem);
  74. return 0;
  75. }
  76. /**
  77. * ceph_end_io_read - declare that the buffered read operation is done
  78. * @inode: file inode
  79. *
  80. * Declare that a buffered read operation is done, and release the shared
  81. * lock on inode->i_rwsem.
  82. */
  83. void
  84. ceph_end_io_read(struct inode *inode)
  85. {
  86. up_read(&inode->i_rwsem);
  87. }
  88. /**
  89. * ceph_start_io_write - declare the file is being used for buffered writes
  90. * @inode: file inode
  91. *
  92. * Declare that a buffered write operation is about to start, and ensure
  93. * that we block all direct I/O.
  94. */
  95. int ceph_start_io_write(struct inode *inode)
  96. {
  97. int err = down_write_killable(&inode->i_rwsem);
  98. if (!err)
  99. ceph_block_o_direct(ceph_inode(inode), inode);
  100. return err;
  101. }
  102. /**
  103. * ceph_end_io_write - declare that the buffered write operation is done
  104. * @inode: file inode
  105. *
  106. * Declare that a buffered write operation is done, and release the
  107. * lock on inode->i_rwsem.
  108. */
  109. void
  110. ceph_end_io_write(struct inode *inode)
  111. {
  112. up_write(&inode->i_rwsem);
  113. }
  114. /* Call with exclusively locked inode->i_rwsem */
  115. static void ceph_block_buffered(struct ceph_inode_info *ci, struct inode *inode)
  116. {
  117. bool is_odirect;
  118. lockdep_assert_held_write(&inode->i_rwsem);
  119. spin_lock(&ci->i_ceph_lock);
  120. /* ensure that bit state is consistent */
  121. smp_mb__before_atomic();
  122. is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
  123. if (!is_odirect) {
  124. set_bit(CEPH_I_ODIRECT_BIT, &ci->i_ceph_flags);
  125. /* ensure modified bit is visible */
  126. smp_mb__after_atomic();
  127. }
  128. spin_unlock(&ci->i_ceph_lock);
  129. if (!is_odirect) {
  130. /* FIXME: unmap_mapping_range? */
  131. filemap_write_and_wait(inode->i_mapping);
  132. }
  133. }
  134. /**
  135. * ceph_start_io_direct - declare the file is being used for direct i/o
  136. * @inode: file inode
  137. *
  138. * Declare that a direct I/O operation is about to start, and ensure
  139. * that we block all buffered I/O.
  140. * On exit, the function ensures that the CEPH_I_ODIRECT flag is set,
  141. * and holds a shared lock on inode->i_rwsem to ensure that the flag
  142. * cannot be changed.
  143. * In practice, this means that direct I/O operations are allowed to
  144. * execute in parallel, thanks to the shared lock, whereas buffered I/O
  145. * operations need to wait to grab an exclusive lock in order to clear
  146. * CEPH_I_ODIRECT.
  147. * Note that buffered writes and truncates both take a write lock on
  148. * inode->i_rwsem, meaning that those are serialised w.r.t. O_DIRECT.
  149. */
  150. int ceph_start_io_direct(struct inode *inode)
  151. {
  152. struct ceph_inode_info *ci = ceph_inode(inode);
  153. bool is_odirect;
  154. int err;
  155. /* Be an optimist! */
  156. err = down_read_killable(&inode->i_rwsem);
  157. if (err)
  158. return err;
  159. spin_lock(&ci->i_ceph_lock);
  160. /* ensure that bit state is consistent */
  161. smp_mb__before_atomic();
  162. is_odirect = READ_ONCE(ci->i_ceph_flags) & CEPH_I_ODIRECT;
  163. spin_unlock(&ci->i_ceph_lock);
  164. if (is_odirect)
  165. return 0;
  166. up_read(&inode->i_rwsem);
  167. /* Slow path.... */
  168. err = down_write_killable(&inode->i_rwsem);
  169. if (err)
  170. return err;
  171. ceph_block_buffered(ci, inode);
  172. downgrade_write(&inode->i_rwsem);
  173. return 0;
  174. }
  175. /**
  176. * ceph_end_io_direct - declare that the direct i/o operation is done
  177. * @inode: file inode
  178. *
  179. * Declare that a direct I/O operation is done, and release the shared
  180. * lock on inode->i_rwsem.
  181. */
  182. void
  183. ceph_end_io_direct(struct inode *inode)
  184. {
  185. up_read(&inode->i_rwsem);
  186. }