locking.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright (C) 2008 Oracle. All rights reserved.
  4. */
  5. #include <linux/sched.h>
  6. #include <linux/pagemap.h>
  7. #include <linux/spinlock.h>
  8. #include <linux/page-flags.h>
  9. #include <asm/bug.h>
  10. #include <trace/events/btrfs.h>
  11. #include "ctree.h"
  12. #include "extent_io.h"
  13. #include "locking.h"
  14. /*
  15. * Lockdep class keys for extent_buffer->lock's in this root. For a given
  16. * eb, the lockdep key is determined by the btrfs_root it belongs to and
  17. * the level the eb occupies in the tree.
  18. *
  19. * Different roots are used for different purposes and may nest inside each
  20. * other and they require separate keysets. As lockdep keys should be
  21. * static, assign keysets according to the purpose of the root as indicated
  22. * by btrfs_root->root_key.objectid. This ensures that all special purpose
  23. * roots have separate keysets.
  24. *
  25. * Lock-nesting across peer nodes is always done with the immediate parent
  26. * node locked thus preventing deadlock. As lockdep doesn't know this, use
  27. * subclass to avoid triggering lockdep warning in such cases.
  28. *
  29. * The key is set by the readpage_end_io_hook after the buffer has passed
  30. * csum validation but before the pages are unlocked. It is also set by
  31. * btrfs_init_new_buffer on freshly allocated blocks.
  32. *
  33. * We also add a check to make sure the highest level of the tree is the
  34. * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
  35. * needs update as well.
  36. */
  37. #ifdef CONFIG_DEBUG_LOCK_ALLOC
  38. #if BTRFS_MAX_LEVEL != 8
  39. #error
  40. #endif
  41. #define DEFINE_LEVEL(stem, level) \
  42. .names[level] = "btrfs-" stem "-0" #level,
  43. #define DEFINE_NAME(stem) \
  44. DEFINE_LEVEL(stem, 0) \
  45. DEFINE_LEVEL(stem, 1) \
  46. DEFINE_LEVEL(stem, 2) \
  47. DEFINE_LEVEL(stem, 3) \
  48. DEFINE_LEVEL(stem, 4) \
  49. DEFINE_LEVEL(stem, 5) \
  50. DEFINE_LEVEL(stem, 6) \
  51. DEFINE_LEVEL(stem, 7)
  52. static struct btrfs_lockdep_keyset {
  53. u64 id; /* root objectid */
  54. /* Longest entry: btrfs-block-group-00 */
  55. char names[BTRFS_MAX_LEVEL][24];
  56. struct lock_class_key keys[BTRFS_MAX_LEVEL];
  57. } btrfs_lockdep_keysets[] = {
  58. { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
  59. { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
  60. { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
  61. { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
  62. { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
  63. { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
  64. { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
  65. { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
  66. { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
  67. { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
  68. { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
  69. { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
  70. { .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") },
  71. { .id = BTRFS_REMAP_TREE_OBJECTID, DEFINE_NAME("remap") },
  72. { .id = 0, DEFINE_NAME("tree") },
  73. };
  74. #undef DEFINE_LEVEL
  75. #undef DEFINE_NAME
  76. void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
  77. {
  78. struct btrfs_lockdep_keyset *ks;
  79. ASSERT(level < ARRAY_SIZE(ks->keys));
  80. /* Find the matching keyset, id 0 is the default entry */
  81. for (ks = btrfs_lockdep_keysets; ks->id; ks++)
  82. if (ks->id == objectid)
  83. break;
  84. lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
  85. }
  86. void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
  87. {
  88. if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
  89. btrfs_set_buffer_lockdep_class(btrfs_root_id(root),
  90. eb, btrfs_header_level(eb));
  91. }
  92. #endif
  93. #ifdef CONFIG_BTRFS_DEBUG
  94. static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner)
  95. {
  96. eb->lock_owner = owner;
  97. }
  98. #else
  99. static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { }
  100. #endif
  101. /*
  102. * Extent buffer locking
  103. * =====================
  104. *
  105. * We use a rw_semaphore for tree locking, and the semantics are exactly the
  106. * same:
  107. *
  108. * - reader/writer exclusion
  109. * - writer/writer exclusion
  110. * - reader/reader sharing
  111. * - try-lock semantics for readers and writers
  112. *
  113. * The rwsem implementation does opportunistic spinning which reduces number of
  114. * times the locking task needs to sleep.
  115. */
  116. /*
  117. * btrfs_tree_read_lock_nested - lock extent buffer for read
  118. * @eb: the eb to be locked
  119. * @nest: the nesting level to be used for lockdep
  120. *
  121. * This takes the read lock on the extent buffer, using the specified nesting
  122. * level for lockdep purposes.
  123. */
  124. void btrfs_tree_read_lock_nested(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
  125. {
  126. u64 start_ns = 0;
  127. if (trace_btrfs_tree_read_lock_enabled())
  128. start_ns = ktime_get_ns();
  129. down_read_nested(&eb->lock, nest);
  130. trace_btrfs_tree_read_lock(eb, start_ns);
  131. }
  132. /*
  133. * Try-lock for read.
  134. *
  135. * Return true if the rwlock has been taken, false otherwise
  136. */
  137. bool btrfs_try_tree_read_lock(struct extent_buffer *eb)
  138. {
  139. if (down_read_trylock(&eb->lock)) {
  140. trace_btrfs_try_tree_read_lock(eb);
  141. return true;
  142. }
  143. return false;
  144. }
  145. /*
  146. * Release read lock.
  147. */
  148. void btrfs_tree_read_unlock(struct extent_buffer *eb)
  149. {
  150. trace_btrfs_tree_read_unlock(eb);
  151. up_read(&eb->lock);
  152. }
  153. /*
  154. * Lock eb for write.
  155. *
  156. * @eb: the eb to lock
  157. * @nest: the nesting to use for the lock
  158. *
  159. * Returns with the eb->lock write locked.
  160. */
  161. void btrfs_tree_lock_nested(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
  162. __acquires(&eb->lock)
  163. {
  164. u64 start_ns = 0;
  165. if (trace_btrfs_tree_lock_enabled())
  166. start_ns = ktime_get_ns();
  167. down_write_nested(&eb->lock, nest);
  168. btrfs_set_eb_lock_owner(eb, current->pid);
  169. trace_btrfs_tree_lock(eb, start_ns);
  170. }
  171. /*
  172. * Release the write lock.
  173. */
  174. void btrfs_tree_unlock(struct extent_buffer *eb)
  175. {
  176. trace_btrfs_tree_unlock(eb);
  177. btrfs_set_eb_lock_owner(eb, 0);
  178. up_write(&eb->lock);
  179. }
  180. /*
  181. * This releases any locks held in the path starting at level and going all the
  182. * way up to the root.
  183. *
  184. * btrfs_search_slot will keep the lock held on higher nodes in a few corner
  185. * cases, such as COW of the block at slot zero in the node. This ignores
  186. * those rules, and it should only be called when there are no more updates to
  187. * be done higher up in the tree.
  188. */
  189. void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
  190. {
  191. int i;
  192. if (path->keep_locks)
  193. return;
  194. for (i = level; i < BTRFS_MAX_LEVEL; i++) {
  195. if (!path->nodes[i])
  196. continue;
  197. if (!path->locks[i])
  198. continue;
  199. btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
  200. path->locks[i] = 0;
  201. }
  202. }
  203. /*
  204. * Loop around taking references on and locking the root node of the tree until
  205. * we end up with a lock on the root node.
  206. *
  207. * Return: root extent buffer with write lock held
  208. */
  209. struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
  210. {
  211. struct extent_buffer *eb;
  212. while (1) {
  213. eb = btrfs_root_node(root);
  214. btrfs_maybe_reset_lockdep_class(root, eb);
  215. btrfs_tree_lock(eb);
  216. if (eb == root->node)
  217. break;
  218. btrfs_tree_unlock(eb);
  219. free_extent_buffer(eb);
  220. }
  221. return eb;
  222. }
  223. /*
  224. * Loop around taking references on and locking the root node of the tree until
  225. * we end up with a lock on the root node.
  226. *
  227. * Return: root extent buffer with read lock held
  228. */
  229. struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
  230. {
  231. struct extent_buffer *eb;
  232. while (1) {
  233. eb = btrfs_root_node(root);
  234. btrfs_maybe_reset_lockdep_class(root, eb);
  235. btrfs_tree_read_lock(eb);
  236. if (eb == root->node)
  237. break;
  238. btrfs_tree_read_unlock(eb);
  239. free_extent_buffer(eb);
  240. }
  241. return eb;
  242. }
  243. /*
  244. * Loop around taking references on and locking the root node of the tree in
  245. * nowait mode until we end up with a lock on the root node or returning to
  246. * avoid blocking.
  247. *
  248. * Return: root extent buffer with read lock held or -EAGAIN.
  249. */
  250. struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
  251. {
  252. struct extent_buffer *eb;
  253. while (1) {
  254. eb = btrfs_root_node(root);
  255. if (!btrfs_try_tree_read_lock(eb)) {
  256. free_extent_buffer(eb);
  257. return ERR_PTR(-EAGAIN);
  258. }
  259. if (eb == root->node)
  260. break;
  261. btrfs_tree_read_unlock(eb);
  262. free_extent_buffer(eb);
  263. }
  264. return eb;
  265. }
  266. /*
  267. * DREW locks
  268. * ==========
  269. *
  270. * DREW stands for double-reader-writer-exclusion lock. It's used in situation
  271. * where you want to provide A-B exclusion but not AA or BB.
  272. *
  273. * Currently implementation gives more priority to reader. If a reader and a
  274. * writer both race to acquire their respective sides of the lock the writer
  275. * would yield its lock as soon as it detects a concurrent reader. Additionally
  276. * if there are pending readers no new writers would be allowed to come in and
  277. * acquire the lock.
  278. */
  279. void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
  280. {
  281. atomic_set(&lock->readers, 0);
  282. atomic_set(&lock->writers, 0);
  283. init_waitqueue_head(&lock->pending_readers);
  284. init_waitqueue_head(&lock->pending_writers);
  285. }
  286. /* Return true if acquisition is successful, false otherwise */
  287. bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
  288. {
  289. if (atomic_read(&lock->readers))
  290. return false;
  291. atomic_inc(&lock->writers);
  292. /* Ensure writers count is updated before we check for pending readers */
  293. smp_mb__after_atomic();
  294. if (atomic_read(&lock->readers)) {
  295. btrfs_drew_write_unlock(lock);
  296. return false;
  297. }
  298. return true;
  299. }
  300. void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
  301. {
  302. while (true) {
  303. if (btrfs_drew_try_write_lock(lock))
  304. return;
  305. wait_event(lock->pending_writers, !atomic_read(&lock->readers));
  306. }
  307. }
  308. void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
  309. {
  310. /*
  311. * atomic_dec_and_test() implies a full barrier, so woken up readers are
  312. * guaranteed to see the decrement.
  313. */
  314. if (atomic_dec_and_test(&lock->writers))
  315. wake_up(&lock->pending_readers);
  316. }
  317. void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
  318. {
  319. atomic_inc(&lock->readers);
  320. /*
  321. * Ensure the pending reader count is perceived BEFORE this reader
  322. * goes to sleep in case of active writers. This guarantees new writers
  323. * won't be allowed and that the current reader will be woken up when
  324. * the last active writer finishes its jobs.
  325. */
  326. smp_mb__after_atomic();
  327. wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0);
  328. }
  329. void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
  330. {
  331. /*
  332. * atomic_dec_and_test implies a full barrier, so woken up writers
  333. * are guaranteed to see the decrement
  334. */
  335. if (atomic_dec_and_test(&lock->readers))
  336. wake_up(&lock->pending_writers);
  337. }