| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291 |
- /* SPDX-License-Identifier: GPL-2.0 */
- /*
- * Copyright (C) 2012 Fusion-io All rights reserved.
- * Copyright (C) 2012 Intel Corp. All rights reserved.
- */
- #ifndef BTRFS_RAID56_H
- #define BTRFS_RAID56_H
- #include <linux/types.h>
- #include <linux/list.h>
- #include <linux/spinlock.h>
- #include <linux/bio.h>
- #include <linux/refcount.h>
- #include <linux/workqueue.h>
- #include "volumes.h"
- struct page;
- struct btrfs_fs_info;
- enum btrfs_rbio_ops {
- BTRFS_RBIO_WRITE,
- BTRFS_RBIO_READ_REBUILD,
- BTRFS_RBIO_PARITY_SCRUB,
- };
- /*
- * Overview of btrfs_raid_bio.
- *
- * One btrfs_raid_bio represents a full stripe of RAID56, including both data
- * and P/Q stripes. For now, each data and P/Q stripe is of a fixed length (64K).
- *
- * One btrfs_raid_bio can have one or more bios from higher layer, covering
- * part or all of the data stripes.
- *
- * [PAGES FROM HIGHER LAYER BIOS]
- * Higher layer bios are in the btrfs_raid_bio::bio_list.
- *
- * Pages from the bio_list are represented like the following:
- *
- * bio_list: |<- Bio 1 ->| |<- Bio 2 ->| ...
- * bio_paddrs: [0] [1] [2] [3] [4] [5] ...
- *
- * If there is a bio covering a sector (one btrfs fs block), the corresponding
- * pointer in btrfs_raid_bio::bio_paddrs[] will point to the physical address
- * (with the offset inside the page) of the corresponding bio.
- *
- * If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will
- * be INVALID_PADDR.
- *
- * The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)).
- *
- * [PAGES FOR INTERNAL USAGES]
- * Pages not covered by any bio or belonging to P/Q stripes are stored in
- * btrfs_raid_bio::stripe_pages[] and stripe_paddrs[], like the following:
- *
- * stripe_pages: |<- Page 0 ->|<- Page 1 ->| ...
- * stripe_paddrs: [0] [1] [2] [3] [4] ...
- *
- * stripe_pages[] array stores all the pages covering the full stripe, including
- * data and P/Q pages.
- * stripe_pages[0] is the first page of the first data stripe.
- * stripe_pages[BTRFS_STRIPE_LEN / PAGE_SIZE] is the first page of the second
- * data stripe.
- *
- * Some pointers inside stripe_pages[] can be NULL, e.g. for a full stripe write
- * (the bio covers all data stripes) there is no need to allocate pages for
- * data stripes (can grab from bio_paddrs[]).
- *
- * If the corresponding page of stripe_paddrs[i] is not allocated, the value of
- * stripe_paddrs[i] will be INVALID_PADDR.
- *
- * The length of each entry in stripe_paddrs[] is a step.
- *
- * [LOCATING A SECTOR]
- * To locate a sector for IO, we need the following info:
- *
- * - stripe_nr
- * Starts from 0 (representing the first data stripe), ends at
- * @nr_data (RAID5, P stripe) or @nr_data + 1 (RAID6, Q stripe).
- *
- * - sector_nr
- * Starts from 0 (representing the first sector of the stripe), ends
- * at BTRFS_STRIPE_LEN / sectorsize - 1.
- *
- * - step_nr
- * A step is min(sector_size, PAGE_SIZE).
- *
- * Starts from 0 (representing the first step of the sector), ends
- * at @sector_nsteps - 1.
- *
- * For most call sites they do not need to bother this parameter.
- * It is for bs > ps support and only for vertical stripe related works.
- * (e.g. RMW/recover)
- *
- * - from which array
- * Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the
- * bio_paddrs[] (aka, from the higher layer bios).
- *
- * For IO, a physical address is returned, so that we can extract the page and
- * the offset inside the page for IO.
- * A special value INVALID_PADDR represents when the physical address is invalid,
- * normally meaning there is no page allocated for the specified sector.
- */
- struct btrfs_raid_bio {
- struct btrfs_io_context *bioc;
- /*
- * While we're doing RMW on a stripe we put it into a hash table so we
- * can lock the stripe and merge more rbios into it.
- */
- struct list_head hash_list;
- /* LRU list for the stripe cache */
- struct list_head stripe_cache;
- /* For scheduling work in the helper threads */
- struct work_struct work;
- /*
- * bio_list and bio_list_lock are used to add more bios into the stripe
- * in hopes of avoiding the full RMW
- */
- struct bio_list bio_list;
- spinlock_t bio_list_lock;
- /*
- * Also protected by the bio_list_lock, the plug list is used by the
- * plugging code to collect partial bios while plugged. The stripe
- * locking code also uses it to hand off the stripe lock to the next
- * pending IO.
- */
- struct list_head plug_list;
- /* Flags that tell us if it is safe to merge with this bio. */
- unsigned long flags;
- /*
- * Set if we're doing a parity rebuild for a read from higher up, which
- * is handled differently from a parity rebuild as part of RMW.
- */
- enum btrfs_rbio_ops operation;
- /* How many pages there are for the full stripe including P/Q */
- u16 nr_pages;
- /* How many sectors there are for the full stripe including P/Q */
- u16 nr_sectors;
- /* Number of data stripes (no p/q) */
- u8 nr_data;
- /* Number of all stripes (including P/Q) */
- u8 real_stripes;
- /* How many pages there are for each stripe */
- u8 stripe_npages;
- /* How many sectors there are for each stripe */
- u8 stripe_nsectors;
- /*
- * How many steps there are for one sector.
- *
- * For bs > ps cases, it's sectorsize / PAGE_SIZE.
- * For bs <= ps cases, it's always 1.
- */
- u8 sector_nsteps;
- /* Stripe number that we're scrubbing */
- u8 scrubp;
- /*
- * Size of all the bios in the bio_list. This helps us decide if the
- * rbio maps to a full stripe or not.
- */
- int bio_list_bytes;
- refcount_t refs;
- atomic_t stripes_pending;
- wait_queue_head_t io_wait;
- /* Bitmap to record which horizontal stripe has data */
- unsigned long dbitmap;
- /* Allocated with stripe_nsectors-many bits for finish_*() calls */
- unsigned long finish_pbitmap;
- /*
- * These are two arrays of pointers. We allocate the rbio big enough
- * to hold them both and setup their locations when the rbio is
- * allocated.
- */
- /*
- * Pointers to pages that we allocated for reading/writing stripes
- * directly from the disk (including P/Q).
- */
- struct page **stripe_pages;
- /* Pointers to the sectors in the bio_list, for faster lookup */
- phys_addr_t *bio_paddrs;
- /* Pointers to the sectors in the stripe_pages[]. */
- phys_addr_t *stripe_paddrs;
- /* Each set bit means the corresponding sector in stripe_sectors[] is uptodate. */
- unsigned long *stripe_uptodate_bitmap;
- /* Allocated with real_stripes-many pointers for finish_*() calls */
- void **finish_pointers;
- /*
- * The bitmap recording where IO errors happened.
- * Each bit is corresponding to one sector in either bio_sectors[] or
- * stripe_sectors[] array.
- */
- unsigned long *error_bitmap;
- /*
- * Checksum buffer if the rbio is for data. The buffer should cover
- * all data sectors (excluding P/Q sectors).
- */
- u8 *csum_buf;
- /*
- * Each bit represents if the corresponding sector has data csum found.
- * Should only cover data sectors (excluding P/Q sectors).
- */
- unsigned long *csum_bitmap;
- };
- /*
- * For trace event usage only. Records useful debug info for each bio submitted
- * by RAID56 to each physical device.
- *
- * No matter signed or not, (-1) is always the one indicating we can not grab
- * the proper stripe number.
- */
- struct raid56_bio_trace_info {
- u64 devid;
- /* The offset inside the stripe. (<= STRIPE_LEN) */
- u32 offset;
- /*
- * Stripe number.
- * 0 is the first data stripe, and nr_data for P stripe,
- * nr_data + 1 for Q stripe.
- * >= real_stripes for
- */
- u8 stripe_nr;
- };
- static inline int nr_data_stripes(const struct btrfs_chunk_map *map)
- {
- return map->num_stripes - btrfs_nr_parity_stripes(map->type);
- }
- static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
- {
- return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
- }
- #define RAID5_P_STRIPE ((u64)-2)
- #define RAID6_Q_STRIPE ((u64)-1)
- #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
- ((x) == RAID6_Q_STRIPE))
- struct btrfs_device;
- void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
- int mirror_num);
- void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
- struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
- struct btrfs_io_context *bioc,
- struct btrfs_device *scrub_dev,
- unsigned long *dbitmap, int stripe_nsectors);
- void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
- void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
- struct folio **data_folios, u64 data_logical);
- int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
- void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
- #endif
|