volumes.h 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907
  1. /* SPDX-License-Identifier: GPL-2.0 */
  2. /*
  3. * Copyright (C) 2007 Oracle. All rights reserved.
  4. */
  5. #ifndef BTRFS_VOLUMES_H
  6. #define BTRFS_VOLUMES_H
  7. #include <linux/blk_types.h>
  8. #include <linux/blkdev.h>
  9. #include <linux/sizes.h>
  10. #include <linux/atomic.h>
  11. #include <linux/sort.h>
  12. #include <linux/list.h>
  13. #include <linux/mutex.h>
  14. #include <linux/log2.h>
  15. #include <linux/kobject.h>
  16. #include <linux/refcount.h>
  17. #include <linux/completion.h>
  18. #include <linux/rbtree.h>
  19. #include <uapi/linux/btrfs.h>
  20. #include <uapi/linux/btrfs_tree.h>
  21. #include "messages.h"
  22. #include "extent-io-tree.h"
  23. struct block_device;
  24. struct bdev_handle;
  25. struct btrfs_fs_info;
  26. struct btrfs_block_group;
  27. struct btrfs_trans_handle;
  28. struct btrfs_transaction;
  29. struct btrfs_zoned_device_info;
  30. struct btrfs_space_info;
  31. #define BTRFS_MAX_DATA_CHUNK_SIZE (10ULL * SZ_1G)
  32. /*
  33. * Arbitrary maximum size of one discard request to limit potentially long time
  34. * spent in blkdev_issue_discard().
  35. */
  36. #define BTRFS_MAX_DISCARD_CHUNK_SIZE (SZ_1G)
  37. extern struct mutex uuid_mutex;
  38. #define BTRFS_STRIPE_LEN SZ_64K
  39. #define BTRFS_STRIPE_LEN_SHIFT (16)
  40. #define BTRFS_STRIPE_LEN_MASK (BTRFS_STRIPE_LEN - 1)
  41. static_assert(ilog2(BTRFS_STRIPE_LEN) == BTRFS_STRIPE_LEN_SHIFT);
  42. /* Used by sanity check for btrfs_raid_types. */
  43. #define const_ffs(n) (__builtin_ctzll(n) + 1)
  44. /*
  45. * The conversion from BTRFS_BLOCK_GROUP_* bits to btrfs_raid_type requires
  46. * RAID0 always to be the lowest profile bit.
  47. * Although it's part of on-disk format and should never change, do extra
  48. * compile-time sanity checks.
  49. */
  50. static_assert(const_ffs(BTRFS_BLOCK_GROUP_RAID0) <
  51. const_ffs(BTRFS_BLOCK_GROUP_PROFILE_MASK & ~BTRFS_BLOCK_GROUP_RAID0));
  52. /* ilog2() can handle both constants and variables */
  53. #define BTRFS_BG_FLAG_TO_INDEX(profile) \
  54. ilog2((profile) >> (ilog2(BTRFS_BLOCK_GROUP_RAID0) - 1))
  55. enum btrfs_raid_types {
  56. /* SINGLE is the special one as it doesn't have on-disk bit. */
  57. BTRFS_RAID_SINGLE = 0,
  58. BTRFS_RAID_RAID0 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID0),
  59. BTRFS_RAID_RAID1 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1),
  60. BTRFS_RAID_DUP = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_DUP),
  61. BTRFS_RAID_RAID10 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID10),
  62. BTRFS_RAID_RAID5 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID5),
  63. BTRFS_RAID_RAID6 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID6),
  64. BTRFS_RAID_RAID1C3 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C3),
  65. BTRFS_RAID_RAID1C4 = BTRFS_BG_FLAG_TO_INDEX(BTRFS_BLOCK_GROUP_RAID1C4),
  66. BTRFS_NR_RAID_TYPES
  67. };
  68. static_assert(BTRFS_RAID_RAID0 == 1);
  69. static_assert(BTRFS_RAID_RAID1 == 2);
  70. static_assert(BTRFS_RAID_DUP == 3);
  71. static_assert(BTRFS_RAID_RAID10 == 4);
  72. static_assert(BTRFS_RAID_RAID5 == 5);
  73. static_assert(BTRFS_RAID_RAID6 == 6);
  74. static_assert(BTRFS_RAID_RAID1C3 == 7);
  75. static_assert(BTRFS_RAID_RAID1C4 == 8);
  76. /*
  77. * Use sequence counter to get consistent device stat data on
  78. * 32-bit processors.
  79. */
  80. #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
  81. #include <linux/seqlock.h>
  82. #define __BTRFS_NEED_DEVICE_DATA_ORDERED
  83. #define btrfs_device_data_ordered_init(device) \
  84. seqcount_init(&device->data_seqcount)
  85. #else
  86. #define btrfs_device_data_ordered_init(device) do { } while (0)
  87. #endif
  88. #define BTRFS_DEV_STATE_WRITEABLE (0)
  89. #define BTRFS_DEV_STATE_IN_FS_METADATA (1)
  90. #define BTRFS_DEV_STATE_MISSING (2)
  91. #define BTRFS_DEV_STATE_REPLACE_TGT (3)
  92. #define BTRFS_DEV_STATE_FLUSH_SENT (4)
  93. #define BTRFS_DEV_STATE_NO_READA (5)
  94. #define BTRFS_DEV_STATE_FLUSH_FAILED (6)
  95. /* Set when the device item is found in chunk tree, used to catch unexpected registered device. */
  96. #define BTRFS_DEV_STATE_ITEM_FOUND (7)
  97. /* Special value encoding failure to write primary super block. */
  98. #define BTRFS_SUPER_PRIMARY_WRITE_ERROR (INT_MAX / 2)
  99. struct btrfs_fs_devices;
  100. struct btrfs_device {
  101. struct list_head dev_list; /* device_list_mutex */
  102. struct list_head dev_alloc_list; /* chunk mutex */
  103. struct list_head post_commit_list; /* chunk mutex */
  104. struct btrfs_fs_devices *fs_devices;
  105. struct btrfs_fs_info *fs_info;
  106. /* Device path or NULL if missing. */
  107. const char __rcu *name;
  108. u64 generation;
  109. struct file *bdev_file;
  110. struct block_device *bdev;
  111. struct btrfs_zoned_device_info *zone_info;
  112. unsigned long dev_state;
  113. #ifdef __BTRFS_NEED_DEVICE_DATA_ORDERED
  114. seqcount_t data_seqcount;
  115. #endif
  116. /* the internal btrfs device id */
  117. u64 devid;
  118. /* size of the device in memory */
  119. u64 total_bytes;
  120. /* size of the device on disk */
  121. u64 disk_total_bytes;
  122. /* bytes used */
  123. u64 bytes_used;
  124. /* optimal io alignment for this device */
  125. u32 io_align;
  126. /* optimal io width for this device */
  127. u32 io_width;
  128. /* type and info about this device */
  129. u64 type;
  130. /*
  131. * Counter of super block write errors, values larger than
  132. * BTRFS_SUPER_PRIMARY_WRITE_ERROR encode primary super block write failure.
  133. */
  134. atomic_t sb_write_errors;
  135. /* minimal io size for this device */
  136. u32 sector_size;
  137. /* physical drive uuid (or lvm uuid) */
  138. u8 uuid[BTRFS_UUID_SIZE];
  139. /*
  140. * size of the device on the current transaction
  141. *
  142. * This variant is update when committing the transaction,
  143. * and protected by chunk mutex
  144. */
  145. u64 commit_total_bytes;
  146. /* bytes used on the current transaction */
  147. u64 commit_bytes_used;
  148. /* Bio used for flushing device barriers */
  149. struct bio flush_bio;
  150. struct completion flush_wait;
  151. /* per-device scrub information */
  152. struct scrub_ctx *scrub_ctx;
  153. /* disk I/O failure stats. For detailed description refer to
  154. * enum btrfs_dev_stat_values in ioctl.h */
  155. int dev_stats_valid;
  156. /* Counter to record the change of device stats */
  157. atomic_t dev_stats_ccnt;
  158. atomic_t dev_stat_values[BTRFS_DEV_STAT_VALUES_MAX];
  159. /*
  160. * Device's major-minor number. Must be set even if the device is not
  161. * opened (bdev == NULL), unless the device is missing.
  162. */
  163. dev_t devt;
  164. struct extent_io_tree alloc_state;
  165. struct completion kobj_unregister;
  166. /* For sysfs/FSID/devinfo/devid/ */
  167. struct kobject devid_kobj;
  168. /* Bandwidth limit for scrub, in bytes */
  169. u64 scrub_speed_max;
  170. };
  171. /*
  172. * Block group or device which contains an active swapfile. Used for preventing
  173. * unsafe operations while a swapfile is active.
  174. *
  175. * These are sorted on (ptr, inode) (note that a block group or device can
  176. * contain more than one swapfile). We compare the pointer values because we
  177. * don't actually care what the object is, we just need a quick check whether
  178. * the object exists in the rbtree.
  179. */
  180. struct btrfs_swapfile_pin {
  181. struct rb_node node;
  182. void *ptr;
  183. struct inode *inode;
  184. /*
  185. * If true, ptr points to a struct btrfs_block_group. Otherwise, ptr
  186. * points to a struct btrfs_device.
  187. */
  188. bool is_block_group;
  189. /*
  190. * Only used when 'is_block_group' is true and it is the number of
  191. * extents used by a swapfile for this block group ('ptr' field).
  192. */
  193. int bg_extent_count;
  194. };
  195. /*
  196. * If we read those variants at the context of their own lock, we needn't
  197. * use the following helpers, reading them directly is safe.
  198. */
  199. #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
  200. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  201. static inline u64 \
  202. btrfs_device_get_##name(const struct btrfs_device *dev) \
  203. { \
  204. u64 size; \
  205. unsigned int seq; \
  206. \
  207. do { \
  208. seq = read_seqcount_begin(&dev->data_seqcount); \
  209. size = dev->name; \
  210. } while (read_seqcount_retry(&dev->data_seqcount, seq)); \
  211. return size; \
  212. } \
  213. \
  214. static inline void \
  215. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  216. { \
  217. preempt_disable(); \
  218. write_seqcount_begin(&dev->data_seqcount); \
  219. dev->name = size; \
  220. write_seqcount_end(&dev->data_seqcount); \
  221. preempt_enable(); \
  222. }
  223. #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
  224. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  225. static inline u64 \
  226. btrfs_device_get_##name(const struct btrfs_device *dev) \
  227. { \
  228. u64 size; \
  229. \
  230. preempt_disable(); \
  231. size = dev->name; \
  232. preempt_enable(); \
  233. return size; \
  234. } \
  235. \
  236. static inline void \
  237. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  238. { \
  239. preempt_disable(); \
  240. dev->name = size; \
  241. preempt_enable(); \
  242. }
  243. #else
  244. #define BTRFS_DEVICE_GETSET_FUNCS(name) \
  245. static inline u64 \
  246. btrfs_device_get_##name(const struct btrfs_device *dev) \
  247. { \
  248. return dev->name; \
  249. } \
  250. \
  251. static inline void \
  252. btrfs_device_set_##name(struct btrfs_device *dev, u64 size) \
  253. { \
  254. dev->name = size; \
  255. }
  256. #endif
  257. BTRFS_DEVICE_GETSET_FUNCS(total_bytes);
  258. BTRFS_DEVICE_GETSET_FUNCS(disk_total_bytes);
  259. BTRFS_DEVICE_GETSET_FUNCS(bytes_used);
  260. enum btrfs_chunk_allocation_policy {
  261. BTRFS_CHUNK_ALLOC_REGULAR,
  262. BTRFS_CHUNK_ALLOC_ZONED,
  263. };
  264. #define BTRFS_DEFAULT_RR_MIN_CONTIG_READ (SZ_256K)
  265. /* Keep in sync with raid_attr table, current maximum is RAID1C4. */
  266. #define BTRFS_RAID1_MAX_MIRRORS (4)
  267. /*
  268. * Read policies for mirrored block group profiles, read picks the stripe based
  269. * on these policies.
  270. */
  271. enum btrfs_read_policy {
  272. /* Use process PID to choose the stripe */
  273. BTRFS_READ_POLICY_PID,
  274. #ifdef CONFIG_BTRFS_EXPERIMENTAL
  275. /* Balancing RAID1 reads across all striped devices (round-robin). */
  276. BTRFS_READ_POLICY_RR,
  277. /* Read from a specific device. */
  278. BTRFS_READ_POLICY_DEVID,
  279. #endif
  280. BTRFS_NR_READ_POLICY,
  281. };
  282. struct btrfs_fs_devices {
  283. u8 fsid[BTRFS_FSID_SIZE]; /* FS specific uuid */
  284. /*
  285. * UUID written into the btree blocks:
  286. *
  287. * - If metadata_uuid != fsid then super block must have
  288. * BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag set.
  289. *
  290. * - Following shall be true at all times:
  291. * - metadata_uuid == btrfs_header::fsid
  292. * - metadata_uuid == btrfs_dev_item::fsid
  293. *
  294. * - Relations between fsid and metadata_uuid in sb and fs_devices:
  295. * - Normal:
  296. * fs_devices->fsid == fs_devices->metadata_uuid == sb->fsid
  297. * sb->metadata_uuid == 0
  298. *
  299. * - When the BTRFS_FEATURE_INCOMPAT_METADATA_UUID flag is set:
  300. * fs_devices->fsid == sb->fsid
  301. * fs_devices->metadata_uuid == sb->metadata_uuid
  302. *
  303. * - When in-memory fs_devices->temp_fsid is true
  304. * fs_devices->fsid = random
  305. * fs_devices->metadata_uuid == sb->fsid
  306. */
  307. u8 metadata_uuid[BTRFS_FSID_SIZE];
  308. struct list_head fs_list;
  309. /*
  310. * Number of devices under this fsid including missing and
  311. * replace-target device and excludes seed devices.
  312. */
  313. u64 num_devices;
  314. /*
  315. * The number of devices that successfully opened, including
  316. * replace-target, excludes seed devices.
  317. */
  318. u64 open_devices;
  319. /* The number of devices that are under the chunk allocation list. */
  320. u64 rw_devices;
  321. /* Count of missing devices under this fsid excluding seed device. */
  322. u64 missing_devices;
  323. u64 total_rw_bytes;
  324. /*
  325. * Count of devices from btrfs_super_block::num_devices for this fsid,
  326. * which includes the seed device, excludes the transient replace-target
  327. * device.
  328. */
  329. u64 total_devices;
  330. /* Highest generation number of seen devices */
  331. u64 latest_generation;
  332. /*
  333. * The mount device or a device with highest generation after removal
  334. * or replace.
  335. */
  336. struct btrfs_device *latest_dev;
  337. /*
  338. * All of the devices in the filesystem, protected by a mutex so we can
  339. * safely walk it to write out the super blocks without worrying about
  340. * adding/removing by the multi-device code. Scrubbing super block can
  341. * kick off supers writing by holding this mutex lock.
  342. */
  343. struct mutex device_list_mutex;
  344. /* List of all devices, protected by device_list_mutex */
  345. struct list_head devices;
  346. /* Devices which can satisfy space allocation. Protected by * chunk_mutex. */
  347. struct list_head alloc_list;
  348. struct list_head seed_list;
  349. /* Count fs-devices opened. */
  350. int opened;
  351. /*
  352. * Counter of the processes that are holding this fs_devices but not
  353. * yet opened.
  354. * This is for mounting handling, as we can only open the fs_devices
  355. * after a super block is created. But we cannot take uuid_mutex
  356. * during sget_fc(), thus we have to hold the fs_devices (meaning it
  357. * cannot be released) until a super block is returned.
  358. */
  359. int holding;
  360. /* Set when we find or add a device that doesn't have the nonrot flag set. */
  361. bool rotating;
  362. /* Devices support TRIM/discard commands. */
  363. bool discardable;
  364. /* The filesystem is a seed filesystem. */
  365. bool seeding;
  366. /* The mount needs to use a randomly generated fsid. */
  367. bool temp_fsid;
  368. /* Enable/disable the filesystem stats tracking. */
  369. bool collect_fs_stats;
  370. struct btrfs_fs_info *fs_info;
  371. /* sysfs kobjects */
  372. struct kobject fsid_kobj;
  373. struct kobject *devices_kobj;
  374. struct kobject *devinfo_kobj;
  375. struct completion kobj_unregister;
  376. enum btrfs_chunk_allocation_policy chunk_alloc_policy;
  377. /* Policy used to read the mirrored stripes. */
  378. enum btrfs_read_policy read_policy;
  379. #ifdef CONFIG_BTRFS_EXPERIMENTAL
  380. /*
  381. * Minimum contiguous reads before switching to next device, the unit
  382. * is one block/sectorsize.
  383. */
  384. u32 rr_min_contig_read;
  385. /* Device to be used for reading in case of RAID1. */
  386. u64 read_devid;
  387. #endif
  388. };
  389. #define BTRFS_MAX_DEVS(info) ((BTRFS_MAX_ITEM_SIZE(info) \
  390. - sizeof(struct btrfs_chunk)) \
  391. / sizeof(struct btrfs_stripe) + 1)
  392. #define BTRFS_MAX_DEVS_SYS_CHUNK ((BTRFS_SYSTEM_CHUNK_ARRAY_SIZE \
  393. - 2 * sizeof(struct btrfs_disk_key) \
  394. - 2 * sizeof(struct btrfs_chunk)) \
  395. / sizeof(struct btrfs_stripe) + 1)
  396. struct btrfs_io_stripe {
  397. struct btrfs_device *dev;
  398. /* Block mapping. */
  399. u64 physical;
  400. bool rst_search_commit_root;
  401. /* For the endio handler. */
  402. struct btrfs_io_context *bioc;
  403. };
  404. struct btrfs_discard_stripe {
  405. struct btrfs_device *dev;
  406. u64 physical;
  407. u64 length;
  408. };
  409. /*
  410. * Context for IO submission for device stripe.
  411. *
  412. * - Track the unfinished mirrors for mirror based profiles
  413. * Mirror based profiles are SINGLE/DUP/RAID1/RAID10.
  414. *
  415. * - Contain the logical -> physical mapping info
  416. * Used by submit_stripe_bio() for mapping logical bio
  417. * into physical device address.
  418. *
  419. * - Contain device replace info
  420. * Used by handle_ops_on_dev_replace() to copy logical bios
  421. * into the new device.
  422. *
  423. * - Contain RAID56 full stripe logical bytenrs
  424. */
  425. struct btrfs_io_context {
  426. refcount_t refs;
  427. struct btrfs_fs_info *fs_info;
  428. /* Taken from struct btrfs_chunk_map::type. */
  429. u64 map_type;
  430. struct bio *orig_bio;
  431. atomic_t error;
  432. u16 max_errors;
  433. bool use_rst;
  434. u64 logical;
  435. u64 size;
  436. /* Raid stripe tree ordered entry. */
  437. struct list_head rst_ordered_entry;
  438. /*
  439. * The total number of stripes, including the extra duplicated
  440. * stripe for replace.
  441. */
  442. u16 num_stripes;
  443. /*
  444. * The mirror_num of this bioc.
  445. *
  446. * This is for reads which use 0 as mirror_num, thus we should return a
  447. * valid mirror_num (>0) for the reader.
  448. */
  449. u16 mirror_num;
  450. /*
  451. * The following two members are for dev-replace case only.
  452. *
  453. * @replace_nr_stripes: Number of duplicated stripes which need to be
  454. * written to replace target.
  455. * Should be <= 2 (2 for DUP, otherwise <= 1).
  456. * @replace_stripe_src: The array indicates where the duplicated stripes
  457. * are from.
  458. *
  459. * The @replace_stripe_src[] array is mostly for RAID56 cases.
  460. * As non-RAID56 stripes share the same contents of the mapped range,
  461. * thus no need to bother where the duplicated ones are from.
  462. *
  463. * But for RAID56 case, all stripes contain different contents, thus
  464. * we need a way to know the mapping.
  465. *
  466. * There is an example for the two members, using a RAID5 write:
  467. *
  468. * num_stripes: 4 (3 + 1 duplicated write)
  469. * stripes[0]: dev = devid 1, physical = X
  470. * stripes[1]: dev = devid 2, physical = Y
  471. * stripes[2]: dev = devid 3, physical = Z
  472. * stripes[3]: dev = devid 0, physical = Y
  473. *
  474. * replace_nr_stripes = 1
  475. * replace_stripe_src = 1 <- Means stripes[1] is involved in replace.
  476. * The duplicated stripe index would be
  477. * (@num_stripes - 1).
  478. *
  479. * Note, that we can still have cases replace_nr_stripes = 2 for DUP.
  480. * In that case, all stripes share the same content, thus we don't
  481. * need to bother @replace_stripe_src value at all.
  482. */
  483. u16 replace_nr_stripes;
  484. s16 replace_stripe_src;
  485. /*
  486. * Logical bytenr of the full stripe start, only for RAID56 cases.
  487. *
  488. * When this value is set to other than (u64)-1, the stripes[] should
  489. * follow this pattern:
  490. *
  491. * (real_stripes = num_stripes - replace_nr_stripes)
  492. * (data_stripes = (is_raid6) ? (real_stripes - 2) : (real_stripes - 1))
  493. *
  494. * stripes[0]: The first data stripe
  495. * stripes[1]: The second data stripe
  496. * ...
  497. * stripes[data_stripes - 1]: The last data stripe
  498. * stripes[data_stripes]: The P stripe
  499. * stripes[data_stripes + 1]: The Q stripe (only for RAID6).
  500. */
  501. u64 full_stripe_logical;
  502. struct btrfs_io_stripe stripes[];
  503. };
  504. struct btrfs_device_info {
  505. struct btrfs_device *dev;
  506. u64 dev_offset;
  507. u64 max_avail;
  508. u64 total_avail;
  509. };
  510. struct btrfs_raid_attr {
  511. u8 sub_stripes; /* sub_stripes info for map */
  512. u8 dev_stripes; /* stripes per dev */
  513. u8 devs_max; /* max devs to use */
  514. u8 devs_min; /* min devs needed */
  515. u8 tolerated_failures; /* max tolerated fail devs */
  516. u8 devs_increment; /* ndevs has to be a multiple of this */
  517. u8 ncopies; /* how many copies to data has */
  518. u8 nparity; /* number of stripes worth of bytes to store
  519. * parity information */
  520. u8 mindev_error; /* error code if min devs requisite is unmet */
  521. const char raid_name[8]; /* name of the raid */
  522. u64 bg_flag; /* block group flag of the raid */
  523. };
  524. extern const struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES];
  525. struct btrfs_chunk_map {
  526. struct rb_node rb_node;
  527. /* For mount time dev extent verification. */
  528. int verified_stripes;
  529. refcount_t refs;
  530. u64 start;
  531. u64 chunk_len;
  532. u64 stripe_size;
  533. u64 type;
  534. int io_align;
  535. int io_width;
  536. int num_stripes;
  537. int sub_stripes;
  538. struct btrfs_io_stripe stripes[];
  539. };
  540. #define btrfs_chunk_map_size(n) (sizeof(struct btrfs_chunk_map) + \
  541. (sizeof(struct btrfs_io_stripe) * (n)))
  542. static inline void btrfs_free_chunk_map(struct btrfs_chunk_map *map)
  543. {
  544. if (map && refcount_dec_and_test(&map->refs)) {
  545. ASSERT(RB_EMPTY_NODE(&map->rb_node));
  546. kfree(map);
  547. }
  548. }
  549. DEFINE_FREE(btrfs_free_chunk_map, struct btrfs_chunk_map *, btrfs_free_chunk_map(_T))
  550. struct btrfs_balance_control {
  551. struct btrfs_balance_args data;
  552. struct btrfs_balance_args meta;
  553. struct btrfs_balance_args sys;
  554. u64 flags;
  555. struct btrfs_balance_progress stat;
  556. };
  557. /*
  558. * Search for a given device by the set parameters
  559. */
  560. struct btrfs_dev_lookup_args {
  561. u64 devid;
  562. u8 *uuid;
  563. u8 *fsid;
  564. /*
  565. * If devt is specified, all other members will be ignored as it is
  566. * enough to uniquely locate a device.
  567. */
  568. dev_t devt;
  569. bool missing;
  570. };
  571. /* We have to initialize to -1 because BTRFS_DEV_REPLACE_DEVID is 0 */
  572. #define BTRFS_DEV_LOOKUP_ARGS_INIT { .devid = (u64)-1 }
  573. #define BTRFS_DEV_LOOKUP_ARGS(name) \
  574. struct btrfs_dev_lookup_args name = BTRFS_DEV_LOOKUP_ARGS_INIT
  575. enum btrfs_map_op {
  576. BTRFS_MAP_READ,
  577. BTRFS_MAP_WRITE,
  578. BTRFS_MAP_GET_READ_MIRRORS,
  579. };
  580. static inline enum btrfs_map_op btrfs_op(const struct bio *bio)
  581. {
  582. switch (bio_op(bio)) {
  583. case REQ_OP_WRITE:
  584. case REQ_OP_ZONE_APPEND:
  585. return BTRFS_MAP_WRITE;
  586. default:
  587. WARN_ON_ONCE(1);
  588. fallthrough;
  589. case REQ_OP_READ:
  590. return BTRFS_MAP_READ;
  591. }
  592. }
  593. static inline unsigned long btrfs_chunk_item_size(int num_stripes)
  594. {
  595. ASSERT(num_stripes);
  596. return sizeof(struct btrfs_chunk) +
  597. sizeof(struct btrfs_stripe) * (num_stripes - 1);
  598. }
  599. /*
  600. * Do the type safe conversion from stripe_nr to offset inside the chunk.
  601. *
  602. * @stripe_nr is u32, with left shift it can overflow u32 for chunks larger
  603. * than 4G. This does the proper type cast to avoid overflow.
  604. */
  605. static inline u64 btrfs_stripe_nr_to_offset(u32 stripe_nr)
  606. {
  607. return (u64)stripe_nr << BTRFS_STRIPE_LEN_SHIFT;
  608. }
  609. void btrfs_get_bioc(struct btrfs_io_context *bioc);
  610. void btrfs_put_bioc(struct btrfs_io_context *bioc);
  611. int btrfs_map_block(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
  612. u64 logical, u64 *length,
  613. struct btrfs_io_context **bioc_ret,
  614. struct btrfs_io_stripe *smap, int *mirror_num_ret);
  615. int btrfs_map_repair_block(struct btrfs_fs_info *fs_info,
  616. struct btrfs_io_stripe *smap, u64 logical,
  617. u32 length, int mirror_num);
  618. struct btrfs_discard_stripe *btrfs_map_discard(struct btrfs_fs_info *fs_info,
  619. u64 logical, u64 *length_ret,
  620. u32 *num_stripes, bool do_remap);
  621. int btrfs_read_sys_array(struct btrfs_fs_info *fs_info);
  622. int btrfs_read_chunk_tree(struct btrfs_fs_info *fs_info);
  623. struct btrfs_block_group *btrfs_create_chunk(struct btrfs_trans_handle *trans,
  624. struct btrfs_space_info *space_info,
  625. u64 type);
  626. void btrfs_mapping_tree_free(struct btrfs_fs_info *fs_info);
  627. int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
  628. blk_mode_t flags, void *holder);
  629. struct btrfs_device *btrfs_scan_one_device(const char *path, bool mount_arg_dev);
  630. int btrfs_forget_devices(dev_t devt);
  631. void btrfs_close_devices(struct btrfs_fs_devices *fs_devices);
  632. void btrfs_free_extra_devids(struct btrfs_fs_devices *fs_devices);
  633. void btrfs_assign_next_active_device(struct btrfs_device *device,
  634. struct btrfs_device *this_dev);
  635. struct btrfs_device *btrfs_find_device_by_devspec(struct btrfs_fs_info *fs_info,
  636. u64 devid,
  637. const char *devpath);
  638. int btrfs_get_dev_args_from_path(struct btrfs_fs_info *fs_info,
  639. struct btrfs_dev_lookup_args *args,
  640. const char *path);
  641. struct btrfs_device *btrfs_alloc_device(struct btrfs_fs_info *fs_info,
  642. const u64 *devid, const u8 *uuid,
  643. const char *path);
  644. void btrfs_put_dev_args_from_path(struct btrfs_dev_lookup_args *args);
  645. int btrfs_rm_device(struct btrfs_fs_info *fs_info,
  646. struct btrfs_dev_lookup_args *args,
  647. struct file **bdev_file);
  648. void __exit btrfs_cleanup_fs_uuids(void);
  649. int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len);
  650. int btrfs_grow_device(struct btrfs_trans_handle *trans,
  651. struct btrfs_device *device, u64 new_size);
  652. struct btrfs_device *btrfs_find_device(const struct btrfs_fs_devices *fs_devices,
  653. const struct btrfs_dev_lookup_args *args);
  654. int btrfs_shrink_device(struct btrfs_device *device, u64 new_size);
  655. int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *path);
  656. int btrfs_balance(struct btrfs_fs_info *fs_info,
  657. struct btrfs_balance_control *bctl,
  658. struct btrfs_ioctl_balance_args *bargs);
  659. void btrfs_describe_block_groups(u64 flags, char *buf, u32 size_buf);
  660. int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info);
  661. int btrfs_recover_balance(struct btrfs_fs_info *fs_info);
  662. int btrfs_pause_balance(struct btrfs_fs_info *fs_info);
  663. int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset,
  664. bool verbose);
  665. int btrfs_cancel_balance(struct btrfs_fs_info *fs_info);
  666. bool btrfs_chunk_writeable(struct btrfs_fs_info *fs_info, u64 chunk_offset);
  667. void btrfs_dev_stat_inc_and_print(struct btrfs_device *dev, int index);
  668. int btrfs_get_dev_stats(struct btrfs_fs_info *fs_info,
  669. struct btrfs_ioctl_get_dev_stats *stats);
  670. int btrfs_init_devices_late(struct btrfs_fs_info *fs_info);
  671. int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info);
  672. int btrfs_run_dev_stats(struct btrfs_trans_handle *trans);
  673. void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_device *srcdev);
  674. void btrfs_rm_dev_replace_free_srcdev(struct btrfs_device *srcdev);
  675. void btrfs_destroy_dev_replace_tgtdev(struct btrfs_device *tgtdev);
  676. unsigned long btrfs_full_stripe_len(struct btrfs_fs_info *fs_info,
  677. u64 logical);
  678. u64 btrfs_calc_stripe_length(const struct btrfs_chunk_map *map);
  679. int btrfs_nr_parity_stripes(u64 type);
  680. int btrfs_chunk_alloc_add_chunk_item(struct btrfs_trans_handle *trans,
  681. struct btrfs_block_group *bg);
  682. int btrfs_remove_dev_extents(struct btrfs_trans_handle *trans, struct btrfs_chunk_map *map);
  683. int btrfs_remove_chunk(struct btrfs_trans_handle *trans, u64 chunk_offset);
  684. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  685. struct btrfs_chunk_map *btrfs_alloc_chunk_map(int num_stripes, gfp_t gfp);
  686. int btrfs_add_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
  687. #endif
  688. struct btrfs_chunk_map *btrfs_find_chunk_map(struct btrfs_fs_info *fs_info,
  689. u64 logical, u64 length);
  690. struct btrfs_chunk_map *btrfs_find_chunk_map_nolock(struct btrfs_fs_info *fs_info,
  691. u64 logical, u64 length);
  692. struct btrfs_chunk_map *btrfs_get_chunk_map(struct btrfs_fs_info *fs_info,
  693. u64 logical, u64 length);
  694. void btrfs_remove_chunk_map(struct btrfs_fs_info *fs_info, struct btrfs_chunk_map *map);
  695. struct btrfs_super_block *btrfs_read_disk_super(struct block_device *bdev,
  696. int copy_num, bool drop_cache);
  697. void btrfs_release_disk_super(struct btrfs_super_block *super);
  698. static inline void btrfs_dev_stat_inc(struct btrfs_device *dev,
  699. int index)
  700. {
  701. atomic_inc(dev->dev_stat_values + index);
  702. /*
  703. * This memory barrier orders stores updating statistics before stores
  704. * updating dev_stats_ccnt.
  705. *
  706. * It pairs with smp_rmb() in btrfs_run_dev_stats().
  707. */
  708. smp_mb__before_atomic();
  709. atomic_inc(&dev->dev_stats_ccnt);
  710. }
  711. static inline int btrfs_dev_stat_read(struct btrfs_device *dev,
  712. int index)
  713. {
  714. return atomic_read(dev->dev_stat_values + index);
  715. }
  716. static inline int btrfs_dev_stat_read_and_reset(struct btrfs_device *dev,
  717. int index)
  718. {
  719. int ret;
  720. ret = atomic_xchg(dev->dev_stat_values + index, 0);
  721. /*
  722. * atomic_xchg implies a full memory barriers as per atomic_t.txt:
  723. * - RMW operations that have a return value are fully ordered;
  724. *
  725. * This implicit memory barriers is paired with the smp_rmb in
  726. * btrfs_run_dev_stats
  727. */
  728. atomic_inc(&dev->dev_stats_ccnt);
  729. return ret;
  730. }
  731. static inline void btrfs_dev_stat_set(struct btrfs_device *dev,
  732. int index, unsigned long val)
  733. {
  734. atomic_set(dev->dev_stat_values + index, val);
  735. /*
  736. * This memory barrier orders stores updating statistics before stores
  737. * updating dev_stats_ccnt.
  738. *
  739. * It pairs with smp_rmb() in btrfs_run_dev_stats().
  740. */
  741. smp_mb__before_atomic();
  742. atomic_inc(&dev->dev_stats_ccnt);
  743. }
  744. static inline const char *btrfs_dev_name(const struct btrfs_device *device)
  745. {
  746. if (!device || test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
  747. return "<missing disk>";
  748. else
  749. return rcu_dereference(device->name);
  750. }
  751. static inline void btrfs_warn_unknown_chunk_allocation(enum btrfs_chunk_allocation_policy pol)
  752. {
  753. WARN_ONCE(1, "unknown allocation policy %d, fallback to regular", pol);
  754. }
  755. static inline void btrfs_fs_devices_inc_holding(struct btrfs_fs_devices *fs_devices)
  756. {
  757. lockdep_assert_held(&uuid_mutex);
  758. ASSERT(fs_devices->holding >= 0);
  759. fs_devices->holding++;
  760. }
  761. static inline void btrfs_fs_devices_dec_holding(struct btrfs_fs_devices *fs_devices)
  762. {
  763. lockdep_assert_held(&uuid_mutex);
  764. ASSERT(fs_devices->holding > 0);
  765. fs_devices->holding--;
  766. }
  767. void btrfs_commit_device_sizes(struct btrfs_transaction *trans);
  768. struct list_head * __attribute_const__ btrfs_get_fs_uuids(void);
  769. bool btrfs_check_rw_degradable(struct btrfs_fs_info *fs_info,
  770. struct btrfs_device *failing_dev);
  771. void btrfs_scratch_superblocks(struct btrfs_fs_info *fs_info, struct btrfs_device *device);
  772. enum btrfs_raid_types __attribute_const__ btrfs_bg_flags_to_raid_index(u64 flags);
  773. int btrfs_bg_type_to_factor(u64 flags);
  774. const char *btrfs_bg_type_to_raid_name(u64 flags);
  775. int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
  776. bool btrfs_verify_dev_items(const struct btrfs_fs_info *fs_info);
  777. bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
  778. bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
  779. const u8 *btrfs_sb_fsid_ptr(const struct btrfs_super_block *sb);
  780. int btrfs_update_device(struct btrfs_trans_handle *trans, struct btrfs_device *device);
  781. void btrfs_chunk_map_device_clear_bits(struct btrfs_chunk_map *map, unsigned int bits);
  782. bool btrfs_first_pending_extent(struct btrfs_device *device, u64 start, u64 len,
  783. u64 *pending_start, u64 *pending_end);
  784. bool btrfs_find_hole_in_pending_extents(struct btrfs_device *device,
  785. u64 *start, u64 *len, u64 min_hole_size);
  786. #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
  787. struct btrfs_io_context *alloc_btrfs_io_context(struct btrfs_fs_info *fs_info,
  788. u64 logical, u16 total_stripes);
  789. #endif
  790. #endif