| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383 |
- // SPDX-License-Identifier: GPL-2.0
- /*
- * Copyright (C) 2008 Oracle. All rights reserved.
- */
- #include <linux/sched.h>
- #include <linux/pagemap.h>
- #include <linux/spinlock.h>
- #include <linux/page-flags.h>
- #include <asm/bug.h>
- #include <trace/events/btrfs.h>
- #include "ctree.h"
- #include "extent_io.h"
- #include "locking.h"
- /*
- * Lockdep class keys for extent_buffer->lock's in this root. For a given
- * eb, the lockdep key is determined by the btrfs_root it belongs to and
- * the level the eb occupies in the tree.
- *
- * Different roots are used for different purposes and may nest inside each
- * other and they require separate keysets. As lockdep keys should be
- * static, assign keysets according to the purpose of the root as indicated
- * by btrfs_root->root_key.objectid. This ensures that all special purpose
- * roots have separate keysets.
- *
- * Lock-nesting across peer nodes is always done with the immediate parent
- * node locked thus preventing deadlock. As lockdep doesn't know this, use
- * subclass to avoid triggering lockdep warning in such cases.
- *
- * The key is set by the readpage_end_io_hook after the buffer has passed
- * csum validation but before the pages are unlocked. It is also set by
- * btrfs_init_new_buffer on freshly allocated blocks.
- *
- * We also add a check to make sure the highest level of the tree is the
- * same as our lockdep setup here. If BTRFS_MAX_LEVEL changes, this code
- * needs update as well.
- */
- #ifdef CONFIG_DEBUG_LOCK_ALLOC
- #if BTRFS_MAX_LEVEL != 8
- #error
- #endif
- #define DEFINE_LEVEL(stem, level) \
- .names[level] = "btrfs-" stem "-0" #level,
- #define DEFINE_NAME(stem) \
- DEFINE_LEVEL(stem, 0) \
- DEFINE_LEVEL(stem, 1) \
- DEFINE_LEVEL(stem, 2) \
- DEFINE_LEVEL(stem, 3) \
- DEFINE_LEVEL(stem, 4) \
- DEFINE_LEVEL(stem, 5) \
- DEFINE_LEVEL(stem, 6) \
- DEFINE_LEVEL(stem, 7)
- static struct btrfs_lockdep_keyset {
- u64 id; /* root objectid */
- /* Longest entry: btrfs-block-group-00 */
- char names[BTRFS_MAX_LEVEL][24];
- struct lock_class_key keys[BTRFS_MAX_LEVEL];
- } btrfs_lockdep_keysets[] = {
- { .id = BTRFS_ROOT_TREE_OBJECTID, DEFINE_NAME("root") },
- { .id = BTRFS_EXTENT_TREE_OBJECTID, DEFINE_NAME("extent") },
- { .id = BTRFS_CHUNK_TREE_OBJECTID, DEFINE_NAME("chunk") },
- { .id = BTRFS_DEV_TREE_OBJECTID, DEFINE_NAME("dev") },
- { .id = BTRFS_CSUM_TREE_OBJECTID, DEFINE_NAME("csum") },
- { .id = BTRFS_QUOTA_TREE_OBJECTID, DEFINE_NAME("quota") },
- { .id = BTRFS_TREE_LOG_OBJECTID, DEFINE_NAME("log") },
- { .id = BTRFS_TREE_RELOC_OBJECTID, DEFINE_NAME("treloc") },
- { .id = BTRFS_DATA_RELOC_TREE_OBJECTID, DEFINE_NAME("dreloc") },
- { .id = BTRFS_UUID_TREE_OBJECTID, DEFINE_NAME("uuid") },
- { .id = BTRFS_FREE_SPACE_TREE_OBJECTID, DEFINE_NAME("free-space") },
- { .id = BTRFS_BLOCK_GROUP_TREE_OBJECTID, DEFINE_NAME("block-group") },
- { .id = BTRFS_RAID_STRIPE_TREE_OBJECTID, DEFINE_NAME("raid-stripe") },
- { .id = BTRFS_REMAP_TREE_OBJECTID, DEFINE_NAME("remap") },
- { .id = 0, DEFINE_NAME("tree") },
- };
- #undef DEFINE_LEVEL
- #undef DEFINE_NAME
- void btrfs_set_buffer_lockdep_class(u64 objectid, struct extent_buffer *eb, int level)
- {
- struct btrfs_lockdep_keyset *ks;
- ASSERT(level < ARRAY_SIZE(ks->keys));
- /* Find the matching keyset, id 0 is the default entry */
- for (ks = btrfs_lockdep_keysets; ks->id; ks++)
- if (ks->id == objectid)
- break;
- lockdep_set_class_and_name(&eb->lock, &ks->keys[level], ks->names[level]);
- }
- void btrfs_maybe_reset_lockdep_class(struct btrfs_root *root, struct extent_buffer *eb)
- {
- if (test_bit(BTRFS_ROOT_RESET_LOCKDEP_CLASS, &root->state))
- btrfs_set_buffer_lockdep_class(btrfs_root_id(root),
- eb, btrfs_header_level(eb));
- }
- #endif
- #ifdef CONFIG_BTRFS_DEBUG
- static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner)
- {
- eb->lock_owner = owner;
- }
- #else
- static void btrfs_set_eb_lock_owner(struct extent_buffer *eb, pid_t owner) { }
- #endif
- /*
- * Extent buffer locking
- * =====================
- *
- * We use a rw_semaphore for tree locking, and the semantics are exactly the
- * same:
- *
- * - reader/writer exclusion
- * - writer/writer exclusion
- * - reader/reader sharing
- * - try-lock semantics for readers and writers
- *
- * The rwsem implementation does opportunistic spinning which reduces number of
- * times the locking task needs to sleep.
- */
- /*
- * btrfs_tree_read_lock_nested - lock extent buffer for read
- * @eb: the eb to be locked
- * @nest: the nesting level to be used for lockdep
- *
- * This takes the read lock on the extent buffer, using the specified nesting
- * level for lockdep purposes.
- */
- void btrfs_tree_read_lock_nested(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
- {
- u64 start_ns = 0;
- if (trace_btrfs_tree_read_lock_enabled())
- start_ns = ktime_get_ns();
- down_read_nested(&eb->lock, nest);
- trace_btrfs_tree_read_lock(eb, start_ns);
- }
- /*
- * Try-lock for read.
- *
- * Return true if the rwlock has been taken, false otherwise
- */
- bool btrfs_try_tree_read_lock(struct extent_buffer *eb)
- {
- if (down_read_trylock(&eb->lock)) {
- trace_btrfs_try_tree_read_lock(eb);
- return true;
- }
- return false;
- }
- /*
- * Release read lock.
- */
- void btrfs_tree_read_unlock(struct extent_buffer *eb)
- {
- trace_btrfs_tree_read_unlock(eb);
- up_read(&eb->lock);
- }
- /*
- * Lock eb for write.
- *
- * @eb: the eb to lock
- * @nest: the nesting to use for the lock
- *
- * Returns with the eb->lock write locked.
- */
- void btrfs_tree_lock_nested(struct extent_buffer *eb, enum btrfs_lock_nesting nest)
- __acquires(&eb->lock)
- {
- u64 start_ns = 0;
- if (trace_btrfs_tree_lock_enabled())
- start_ns = ktime_get_ns();
- down_write_nested(&eb->lock, nest);
- btrfs_set_eb_lock_owner(eb, current->pid);
- trace_btrfs_tree_lock(eb, start_ns);
- }
- /*
- * Release the write lock.
- */
- void btrfs_tree_unlock(struct extent_buffer *eb)
- {
- trace_btrfs_tree_unlock(eb);
- btrfs_set_eb_lock_owner(eb, 0);
- up_write(&eb->lock);
- }
- /*
- * This releases any locks held in the path starting at level and going all the
- * way up to the root.
- *
- * btrfs_search_slot will keep the lock held on higher nodes in a few corner
- * cases, such as COW of the block at slot zero in the node. This ignores
- * those rules, and it should only be called when there are no more updates to
- * be done higher up in the tree.
- */
- void btrfs_unlock_up_safe(struct btrfs_path *path, int level)
- {
- int i;
- if (path->keep_locks)
- return;
- for (i = level; i < BTRFS_MAX_LEVEL; i++) {
- if (!path->nodes[i])
- continue;
- if (!path->locks[i])
- continue;
- btrfs_tree_unlock_rw(path->nodes[i], path->locks[i]);
- path->locks[i] = 0;
- }
- }
- /*
- * Loop around taking references on and locking the root node of the tree until
- * we end up with a lock on the root node.
- *
- * Return: root extent buffer with write lock held
- */
- struct extent_buffer *btrfs_lock_root_node(struct btrfs_root *root)
- {
- struct extent_buffer *eb;
- while (1) {
- eb = btrfs_root_node(root);
- btrfs_maybe_reset_lockdep_class(root, eb);
- btrfs_tree_lock(eb);
- if (eb == root->node)
- break;
- btrfs_tree_unlock(eb);
- free_extent_buffer(eb);
- }
- return eb;
- }
- /*
- * Loop around taking references on and locking the root node of the tree until
- * we end up with a lock on the root node.
- *
- * Return: root extent buffer with read lock held
- */
- struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root)
- {
- struct extent_buffer *eb;
- while (1) {
- eb = btrfs_root_node(root);
- btrfs_maybe_reset_lockdep_class(root, eb);
- btrfs_tree_read_lock(eb);
- if (eb == root->node)
- break;
- btrfs_tree_read_unlock(eb);
- free_extent_buffer(eb);
- }
- return eb;
- }
- /*
- * Loop around taking references on and locking the root node of the tree in
- * nowait mode until we end up with a lock on the root node or returning to
- * avoid blocking.
- *
- * Return: root extent buffer with read lock held or -EAGAIN.
- */
- struct extent_buffer *btrfs_try_read_lock_root_node(struct btrfs_root *root)
- {
- struct extent_buffer *eb;
- while (1) {
- eb = btrfs_root_node(root);
- if (!btrfs_try_tree_read_lock(eb)) {
- free_extent_buffer(eb);
- return ERR_PTR(-EAGAIN);
- }
- if (eb == root->node)
- break;
- btrfs_tree_read_unlock(eb);
- free_extent_buffer(eb);
- }
- return eb;
- }
- /*
- * DREW locks
- * ==========
- *
- * DREW stands for double-reader-writer-exclusion lock. It's used in situation
- * where you want to provide A-B exclusion but not AA or BB.
- *
- * Currently implementation gives more priority to reader. If a reader and a
- * writer both race to acquire their respective sides of the lock the writer
- * would yield its lock as soon as it detects a concurrent reader. Additionally
- * if there are pending readers no new writers would be allowed to come in and
- * acquire the lock.
- */
- void btrfs_drew_lock_init(struct btrfs_drew_lock *lock)
- {
- atomic_set(&lock->readers, 0);
- atomic_set(&lock->writers, 0);
- init_waitqueue_head(&lock->pending_readers);
- init_waitqueue_head(&lock->pending_writers);
- }
- /* Return true if acquisition is successful, false otherwise */
- bool btrfs_drew_try_write_lock(struct btrfs_drew_lock *lock)
- {
- if (atomic_read(&lock->readers))
- return false;
- atomic_inc(&lock->writers);
- /* Ensure writers count is updated before we check for pending readers */
- smp_mb__after_atomic();
- if (atomic_read(&lock->readers)) {
- btrfs_drew_write_unlock(lock);
- return false;
- }
- return true;
- }
- void btrfs_drew_write_lock(struct btrfs_drew_lock *lock)
- {
- while (true) {
- if (btrfs_drew_try_write_lock(lock))
- return;
- wait_event(lock->pending_writers, !atomic_read(&lock->readers));
- }
- }
- void btrfs_drew_write_unlock(struct btrfs_drew_lock *lock)
- {
- /*
- * atomic_dec_and_test() implies a full barrier, so woken up readers are
- * guaranteed to see the decrement.
- */
- if (atomic_dec_and_test(&lock->writers))
- wake_up(&lock->pending_readers);
- }
- void btrfs_drew_read_lock(struct btrfs_drew_lock *lock)
- {
- atomic_inc(&lock->readers);
- /*
- * Ensure the pending reader count is perceived BEFORE this reader
- * goes to sleep in case of active writers. This guarantees new writers
- * won't be allowed and that the current reader will be woken up when
- * the last active writer finishes its jobs.
- */
- smp_mb__after_atomic();
- wait_event(lock->pending_readers, atomic_read(&lock->writers) == 0);
- }
- void btrfs_drew_read_unlock(struct btrfs_drew_lock *lock)
- {
- /*
- * atomic_dec_and_test implies a full barrier, so woken up writers
- * are guaranteed to see the decrement
- */
- if (atomic_dec_and_test(&lock->readers))
- wake_up(&lock->pending_writers);
- }
|