raid56.h 8.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2012 Fusion-io All rights reserved.
  4. * Copyright (C) 2012 Intel Corp. All rights reserved.
  5. */
  6. #ifndef BTRFS_RAID56_H
  7. #define BTRFS_RAID56_H
  8. #include <linux/types.h>
  9. #include <linux/list.h>
  10. #include <linux/spinlock.h>
  11. #include <linux/bio.h>
  12. #include <linux/refcount.h>
  13. #include <linux/workqueue.h>
  14. #include "volumes.h"
  15. struct page;
  16. struct btrfs_fs_info;
  17. enum btrfs_rbio_ops {
  18. BTRFS_RBIO_WRITE,
  19. BTRFS_RBIO_READ_REBUILD,
  20. BTRFS_RBIO_PARITY_SCRUB,
  21. };
  22. /*
  23. * Overview of btrfs_raid_bio.
  24. *
  25. * One btrfs_raid_bio represents a full stripe of RAID56, including both data
  26. * and P/Q stripes. For now, each data and P/Q stripe is of a fixed length (64K).
  27. *
  28. * One btrfs_raid_bio can have one or more bios from higher layer, covering
  29. * part or all of the data stripes.
  30. *
  31. * [PAGES FROM HIGHER LAYER BIOS]
  32. * Higher layer bios are in the btrfs_raid_bio::bio_list.
  33. *
  34. * Pages from the bio_list are represented like the following:
  35. *
  36. * bio_list: |<- Bio 1 ->| |<- Bio 2 ->| ...
  37. * bio_paddrs: [0] [1] [2] [3] [4] [5] ...
  38. *
  39. * If there is a bio covering a sector (one btrfs fs block), the corresponding
  40. * pointer in btrfs_raid_bio::bio_paddrs[] will point to the physical address
  41. * (with the offset inside the page) of the corresponding bio.
  42. *
  43. * If there is no bio covering a sector, then btrfs_raid_bio::bio_paddrs[i] will
  44. * be INVALID_PADDR.
  45. *
  46. * The length of each entry in bio_paddrs[] is a step (aka, min(sectorsize, PAGE_SIZE)).
  47. *
  48. * [PAGES FOR INTERNAL USAGES]
  49. * Pages not covered by any bio or belonging to P/Q stripes are stored in
  50. * btrfs_raid_bio::stripe_pages[] and stripe_paddrs[], like the following:
  51. *
  52. * stripe_pages: |<- Page 0 ->|<- Page 1 ->| ...
  53. * stripe_paddrs: [0] [1] [2] [3] [4] ...
  54. *
  55. * stripe_pages[] array stores all the pages covering the full stripe, including
  56. * data and P/Q pages.
  57. * stripe_pages[0] is the first page of the first data stripe.
  58. * stripe_pages[BTRFS_STRIPE_LEN / PAGE_SIZE] is the first page of the second
  59. * data stripe.
  60. *
  61. * Some pointers inside stripe_pages[] can be NULL, e.g. for a full stripe write
  62. * (the bio covers all data stripes) there is no need to allocate pages for
  63. * data stripes (can grab from bio_paddrs[]).
  64. *
  65. * If the corresponding page of stripe_paddrs[i] is not allocated, the value of
  66. * stripe_paddrs[i] will be INVALID_PADDR.
  67. *
  68. * The length of each entry in stripe_paddrs[] is a step.
  69. *
  70. * [LOCATING A SECTOR]
  71. * To locate a sector for IO, we need the following info:
  72. *
  73. * - stripe_nr
  74. * Starts from 0 (representing the first data stripe), ends at
  75. * @nr_data (RAID5, P stripe) or @nr_data + 1 (RAID6, Q stripe).
  76. *
  77. * - sector_nr
  78. * Starts from 0 (representing the first sector of the stripe), ends
  79. * at BTRFS_STRIPE_LEN / sectorsize - 1.
  80. *
  81. * - step_nr
  82. * A step is min(sector_size, PAGE_SIZE).
  83. *
  84. * Starts from 0 (representing the first step of the sector), ends
  85. * at @sector_nsteps - 1.
  86. *
  87. * For most call sites they do not need to bother this parameter.
  88. * It is for bs > ps support and only for vertical stripe related works.
  89. * (e.g. RMW/recover)
  90. *
  91. * - from which array
  92. * Whether grabbing from stripe_paddrs[] (aka, internal pages) or from the
  93. * bio_paddrs[] (aka, from the higher layer bios).
  94. *
  95. * For IO, a physical address is returned, so that we can extract the page and
  96. * the offset inside the page for IO.
  97. * A special value INVALID_PADDR represents when the physical address is invalid,
  98. * normally meaning there is no page allocated for the specified sector.
  99. */
  100. struct btrfs_raid_bio {
  101. struct btrfs_io_context *bioc;
  102. /*
  103. * While we're doing RMW on a stripe we put it into a hash table so we
  104. * can lock the stripe and merge more rbios into it.
  105. */
  106. struct list_head hash_list;
  107. /* LRU list for the stripe cache */
  108. struct list_head stripe_cache;
  109. /* For scheduling work in the helper threads */
  110. struct work_struct work;
  111. /*
  112. * bio_list and bio_list_lock are used to add more bios into the stripe
  113. * in hopes of avoiding the full RMW
  114. */
  115. struct bio_list bio_list;
  116. spinlock_t bio_list_lock;
  117. /*
  118. * Also protected by the bio_list_lock, the plug list is used by the
  119. * plugging code to collect partial bios while plugged. The stripe
  120. * locking code also uses it to hand off the stripe lock to the next
  121. * pending IO.
  122. */
  123. struct list_head plug_list;
  124. /* Flags that tell us if it is safe to merge with this bio. */
  125. unsigned long flags;
  126. /*
  127. * Set if we're doing a parity rebuild for a read from higher up, which
  128. * is handled differently from a parity rebuild as part of RMW.
  129. */
  130. enum btrfs_rbio_ops operation;
  131. /* How many pages there are for the full stripe including P/Q */
  132. u16 nr_pages;
  133. /* How many sectors there are for the full stripe including P/Q */
  134. u16 nr_sectors;
  135. /* Number of data stripes (no p/q) */
  136. u8 nr_data;
  137. /* Number of all stripes (including P/Q) */
  138. u8 real_stripes;
  139. /* How many pages there are for each stripe */
  140. u8 stripe_npages;
  141. /* How many sectors there are for each stripe */
  142. u8 stripe_nsectors;
  143. /*
  144. * How many steps there are for one sector.
  145. *
  146. * For bs > ps cases, it's sectorsize / PAGE_SIZE.
  147. * For bs <= ps cases, it's always 1.
  148. */
  149. u8 sector_nsteps;
  150. /* Stripe number that we're scrubbing */
  151. u8 scrubp;
  152. /*
  153. * Size of all the bios in the bio_list. This helps us decide if the
  154. * rbio maps to a full stripe or not.
  155. */
  156. int bio_list_bytes;
  157. refcount_t refs;
  158. atomic_t stripes_pending;
  159. wait_queue_head_t io_wait;
  160. /* Bitmap to record which horizontal stripe has data */
  161. unsigned long dbitmap;
  162. /* Allocated with stripe_nsectors-many bits for finish_*() calls */
  163. unsigned long finish_pbitmap;
  164. /*
  165. * These are two arrays of pointers. We allocate the rbio big enough
  166. * to hold them both and setup their locations when the rbio is
  167. * allocated.
  168. */
  169. /*
  170. * Pointers to pages that we allocated for reading/writing stripes
  171. * directly from the disk (including P/Q).
  172. */
  173. struct page **stripe_pages;
  174. /* Pointers to the sectors in the bio_list, for faster lookup */
  175. phys_addr_t *bio_paddrs;
  176. /* Pointers to the sectors in the stripe_pages[]. */
  177. phys_addr_t *stripe_paddrs;
  178. /* Each set bit means the corresponding sector in stripe_sectors[] is uptodate. */
  179. unsigned long *stripe_uptodate_bitmap;
  180. /* Allocated with real_stripes-many pointers for finish_*() calls */
  181. void **finish_pointers;
  182. /*
  183. * The bitmap recording where IO errors happened.
  184. * Each bit is corresponding to one sector in either bio_sectors[] or
  185. * stripe_sectors[] array.
  186. */
  187. unsigned long *error_bitmap;
  188. /*
  189. * Checksum buffer if the rbio is for data. The buffer should cover
  190. * all data sectors (excluding P/Q sectors).
  191. */
  192. u8 *csum_buf;
  193. /*
  194. * Each bit represents if the corresponding sector has data csum found.
  195. * Should only cover data sectors (excluding P/Q sectors).
  196. */
  197. unsigned long *csum_bitmap;
  198. };
  199. /*
  200. * For trace event usage only. Records useful debug info for each bio submitted
  201. * by RAID56 to each physical device.
  202. *
  203. * No matter signed or not, (-1) is always the one indicating we can not grab
  204. * the proper stripe number.
  205. */
  206. struct raid56_bio_trace_info {
  207. u64 devid;
  208. /* The offset inside the stripe. (<= STRIPE_LEN) */
  209. u32 offset;
  210. /*
  211. * Stripe number.
  212. * 0 is the first data stripe, and nr_data for P stripe,
  213. * nr_data + 1 for Q stripe.
  214. * >= real_stripes for
  215. */
  216. u8 stripe_nr;
  217. };
  218. static inline int nr_data_stripes(const struct btrfs_chunk_map *map)
  219. {
  220. return map->num_stripes - btrfs_nr_parity_stripes(map->type);
  221. }
  222. static inline int nr_bioc_data_stripes(const struct btrfs_io_context *bioc)
  223. {
  224. return bioc->num_stripes - btrfs_nr_parity_stripes(bioc->map_type);
  225. }
  226. #define RAID5_P_STRIPE ((u64)-2)
  227. #define RAID6_Q_STRIPE ((u64)-1)
  228. #define is_parity_stripe(x) (((x) == RAID5_P_STRIPE) || \
  229. ((x) == RAID6_Q_STRIPE))
  230. struct btrfs_device;
  231. void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
  232. int mirror_num);
  233. void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc);
  234. struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
  235. struct btrfs_io_context *bioc,
  236. struct btrfs_device *scrub_dev,
  237. unsigned long *dbitmap, int stripe_nsectors);
  238. void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio);
  239. void raid56_parity_cache_data_folios(struct btrfs_raid_bio *rbio,
  240. struct folio **data_folios, u64 data_logical);
  241. int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info);
  242. void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info);
  243. #endif